1//=- AArch64InstrInfo.td - Describe the AArch64 Instructions -*- tablegen -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// AArch64 Instruction definitions.
10//
11//===----------------------------------------------------------------------===//
12
13//===----------------------------------------------------------------------===//
14// ARM Instruction Predicate Definitions.
15//
16def HasV8_0a         : Predicate<"Subtarget->hasV8_0aOps()">,
17                                 AssemblerPredicate<(all_of HasV8_0aOps), "armv8.0a">;
18def HasV8_1a         : Predicate<"Subtarget->hasV8_1aOps()">,
19                                 AssemblerPredicateWithAll<(all_of HasV8_1aOps), "armv8.1a">;
20def HasV8_2a         : Predicate<"Subtarget->hasV8_2aOps()">,
21                                 AssemblerPredicateWithAll<(all_of HasV8_2aOps), "armv8.2a">;
22def HasV8_3a         : Predicate<"Subtarget->hasV8_3aOps()">,
23                                 AssemblerPredicateWithAll<(all_of HasV8_3aOps), "armv8.3a">;
24def HasV8_4a         : Predicate<"Subtarget->hasV8_4aOps()">,
25                                 AssemblerPredicateWithAll<(all_of HasV8_4aOps), "armv8.4a">;
26def HasV8_5a         : Predicate<"Subtarget->hasV8_5aOps()">,
27                                 AssemblerPredicateWithAll<(all_of HasV8_5aOps), "armv8.5a">;
28def HasV8_6a         : Predicate<"Subtarget->hasV8_6aOps()">,
29                                 AssemblerPredicateWithAll<(all_of HasV8_6aOps), "armv8.6a">;
30def HasV8_7a         : Predicate<"Subtarget->hasV8_7aOps()">,
31                                 AssemblerPredicateWithAll<(all_of HasV8_7aOps), "armv8.7a">;
32def HasV8_8a         : Predicate<"Subtarget->hasV8_8aOps()">,
33                                 AssemblerPredicateWithAll<(all_of HasV8_8aOps), "armv8.8a">;
34def HasV8_9a         : Predicate<"Subtarget->hasV8_9aOps()">,
35                                 AssemblerPredicateWithAll<(all_of HasV8_9aOps), "armv8.9a">;
36def HasV9_0a         : Predicate<"Subtarget->hasV9_0aOps()">,
37                                 AssemblerPredicateWithAll<(all_of HasV9_0aOps), "armv9-a">;
38def HasV9_1a         : Predicate<"Subtarget->hasV9_1aOps()">,
39                                 AssemblerPredicateWithAll<(all_of HasV9_1aOps), "armv9.1a">;
40def HasV9_2a         : Predicate<"Subtarget->hasV9_2aOps()">,
41                                 AssemblerPredicateWithAll<(all_of HasV9_2aOps), "armv9.2a">;
42def HasV9_3a         : Predicate<"Subtarget->hasV9_3aOps()">,
43                                 AssemblerPredicateWithAll<(all_of HasV9_3aOps), "armv9.3a">;
44def HasV9_4a         : Predicate<"Subtarget->hasV9_4aOps()">,
45                                 AssemblerPredicateWithAll<(all_of HasV9_4aOps), "armv9.4a">;
46def HasV8_0r         : Predicate<"Subtarget->hasV8_0rOps()">,
47                                 AssemblerPredicateWithAll<(all_of HasV8_0rOps), "armv8-r">;
48
49def HasEL2VMSA       : Predicate<"Subtarget->hasEL2VMSA()">,
50                       AssemblerPredicateWithAll<(all_of FeatureEL2VMSA), "el2vmsa">;
51
52def HasEL3           : Predicate<"Subtarget->hasEL3()">,
53                       AssemblerPredicateWithAll<(all_of FeatureEL3), "el3">;
54
55def HasVH            : Predicate<"Subtarget->hasVH()">,
56                       AssemblerPredicateWithAll<(all_of FeatureVH), "vh">;
57
58def HasLOR           : Predicate<"Subtarget->hasLOR()">,
59                       AssemblerPredicateWithAll<(all_of FeatureLOR), "lor">;
60
61def HasPAuth         : Predicate<"Subtarget->hasPAuth()">,
62                       AssemblerPredicateWithAll<(all_of FeaturePAuth), "pauth">;
63
64def HasJS            : Predicate<"Subtarget->hasJS()">,
65                       AssemblerPredicateWithAll<(all_of FeatureJS), "jsconv">;
66
67def HasCCIDX         : Predicate<"Subtarget->hasCCIDX()">,
68                       AssemblerPredicateWithAll<(all_of FeatureCCIDX), "ccidx">;
69
70def HasComplxNum      : Predicate<"Subtarget->hasComplxNum()">,
71                       AssemblerPredicateWithAll<(all_of FeatureComplxNum), "complxnum">;
72
73def HasNV            : Predicate<"Subtarget->hasNV()">,
74                       AssemblerPredicateWithAll<(all_of FeatureNV), "nv">;
75
76def HasMPAM          : Predicate<"Subtarget->hasMPAM()">,
77                       AssemblerPredicateWithAll<(all_of FeatureMPAM), "mpam">;
78
79def HasDIT           : Predicate<"Subtarget->hasDIT()">,
80                       AssemblerPredicateWithAll<(all_of FeatureDIT), "dit">;
81
82def HasTRACEV8_4         : Predicate<"Subtarget->hasTRACEV8_4()">,
83                       AssemblerPredicateWithAll<(all_of FeatureTRACEV8_4), "tracev8.4">;
84
85def HasAM            : Predicate<"Subtarget->hasAM()">,
86                       AssemblerPredicateWithAll<(all_of FeatureAM), "am">;
87
88def HasSEL2          : Predicate<"Subtarget->hasSEL2()">,
89                       AssemblerPredicateWithAll<(all_of FeatureSEL2), "sel2">;
90
91def HasTLB_RMI          : Predicate<"Subtarget->hasTLB_RMI()">,
92                       AssemblerPredicateWithAll<(all_of FeatureTLB_RMI), "tlb-rmi">;
93
94def HasFlagM         : Predicate<"Subtarget->hasFlagM()">,
95                       AssemblerPredicateWithAll<(all_of FeatureFlagM), "flagm">;
96
97def HasRCPC_IMMO      : Predicate<"Subtarget->hasRCPCImm()">,
98                       AssemblerPredicateWithAll<(all_of FeatureRCPC_IMMO), "rcpc-immo">;
99
100def HasFPARMv8       : Predicate<"Subtarget->hasFPARMv8()">,
101                               AssemblerPredicateWithAll<(all_of FeatureFPARMv8), "fp-armv8">;
102def HasNEON          : Predicate<"Subtarget->hasNEON()">,
103                                 AssemblerPredicateWithAll<(all_of FeatureNEON), "neon">;
104def HasSM4           : Predicate<"Subtarget->hasSM4()">,
105                                 AssemblerPredicateWithAll<(all_of FeatureSM4), "sm4">;
106def HasSHA3          : Predicate<"Subtarget->hasSHA3()">,
107                                 AssemblerPredicateWithAll<(all_of FeatureSHA3), "sha3">;
108def HasSHA2          : Predicate<"Subtarget->hasSHA2()">,
109                                 AssemblerPredicateWithAll<(all_of FeatureSHA2), "sha2">;
110def HasAES           : Predicate<"Subtarget->hasAES()">,
111                                 AssemblerPredicateWithAll<(all_of FeatureAES), "aes">;
112def HasDotProd       : Predicate<"Subtarget->hasDotProd()">,
113                                 AssemblerPredicateWithAll<(all_of FeatureDotProd), "dotprod">;
114def HasCRC           : Predicate<"Subtarget->hasCRC()">,
115                                 AssemblerPredicateWithAll<(all_of FeatureCRC), "crc">;
116def HasCSSC          : Predicate<"Subtarget->hasCSSC()">,
117                                 AssemblerPredicateWithAll<(all_of FeatureCSSC), "cssc">;
118def HasNoCSSC        : Predicate<"!Subtarget->hasCSSC()">;
119def HasLSE           : Predicate<"Subtarget->hasLSE()">,
120                                 AssemblerPredicateWithAll<(all_of FeatureLSE), "lse">;
121def HasNoLSE         : Predicate<"!Subtarget->hasLSE()">;
122def HasRAS           : Predicate<"Subtarget->hasRAS()">,
123                                 AssemblerPredicateWithAll<(all_of FeatureRAS), "ras">;
124def HasRDM           : Predicate<"Subtarget->hasRDM()">,
125                                 AssemblerPredicateWithAll<(all_of FeatureRDM), "rdm">;
126def HasFullFP16      : Predicate<"Subtarget->hasFullFP16()">,
127                                 AssemblerPredicateWithAll<(all_of FeatureFullFP16), "fullfp16">;
128def HasFP16FML       : Predicate<"Subtarget->hasFP16FML()">,
129                                 AssemblerPredicateWithAll<(all_of FeatureFP16FML), "fp16fml">;
130def HasSPE           : Predicate<"Subtarget->hasSPE()">,
131                                 AssemblerPredicateWithAll<(all_of FeatureSPE), "spe">;
132def HasFuseAES       : Predicate<"Subtarget->hasFuseAES()">,
133                                 AssemblerPredicateWithAll<(all_of FeatureFuseAES),
134                                 "fuse-aes">;
135def HasSVE           : Predicate<"Subtarget->hasSVE()">,
136                                 AssemblerPredicateWithAll<(all_of FeatureSVE), "sve">;
137def HasSVE2          : Predicate<"Subtarget->hasSVE2()">,
138                                 AssemblerPredicateWithAll<(all_of FeatureSVE2), "sve2">;
139def HasSVE2p1        : Predicate<"Subtarget->hasSVE2p1()">,
140                                 AssemblerPredicate<(any_of FeatureSVE2p1), "sve2p1">;
141def HasSVE2AES       : Predicate<"Subtarget->hasSVE2AES()">,
142                                 AssemblerPredicateWithAll<(all_of FeatureSVE2AES), "sve2-aes">;
143def HasSVE2SM4       : Predicate<"Subtarget->hasSVE2SM4()">,
144                                 AssemblerPredicateWithAll<(all_of FeatureSVE2SM4), "sve2-sm4">;
145def HasSVE2SHA3      : Predicate<"Subtarget->hasSVE2SHA3()">,
146                                 AssemblerPredicateWithAll<(all_of FeatureSVE2SHA3), "sve2-sha3">;
147def HasSVE2BitPerm   : Predicate<"Subtarget->hasSVE2BitPerm()">,
148                                 AssemblerPredicateWithAll<(all_of FeatureSVE2BitPerm), "sve2-bitperm">;
149def HasB16B16        : Predicate<"Subtarget->hasB16B16()">,
150                                 AssemblerPredicateWithAll<(all_of FeatureB16B16), "b16b16">;
151def HasSME           : Predicate<"Subtarget->hasSME()">,
152                                 AssemblerPredicateWithAll<(all_of FeatureSME), "sme">;
153def HasSMEF64F64     : Predicate<"Subtarget->hasSMEF64F64()">,
154                                 AssemblerPredicateWithAll<(all_of FeatureSMEF64F64), "sme-f64f64">;
155def HasSMEF16F16     : Predicate<"Subtarget->hasSMEF16F16()">,
156                                 AssemblerPredicateWithAll<(all_of FeatureSMEF16F16), "sme-f16f16">;
157def HasSMEI16I64     : Predicate<"Subtarget->hasSMEI16I64()">,
158                                 AssemblerPredicateWithAll<(all_of FeatureSMEI16I64), "sme-i16i64">;
159def HasSME2          : Predicate<"Subtarget->hasSME2()">,
160                                 AssemblerPredicateWithAll<(all_of FeatureSME2), "sme2">;
161def HasSME2p1        : Predicate<"Subtarget->hasSME2p1()">,
162                                 AssemblerPredicateWithAll<(all_of FeatureSME2p1), "sme2p1">;
163
164// A subset of SVE(2) instructions are legal in Streaming SVE execution mode,
165// they should be enabled if either has been specified.
166def HasSVEorSME
167    : Predicate<"Subtarget->hasSVEorSME()">,
168                AssemblerPredicateWithAll<(any_of FeatureSVE, FeatureSME),
169                "sve or sme">;
170def HasSVE2orSME
171    : Predicate<"Subtarget->hasSVE2() || Subtarget->hasSME()">,
172                AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSME),
173                "sve2 or sme">;
174def HasSVE2p1_or_HasSME
175    : Predicate<"Subtarget->hasSVE2p1() || Subtarget->hasSME()">,
176                 AssemblerPredicateWithAll<(any_of FeatureSME, FeatureSVE2p1), "sme or sve2p1">;
177def HasSVE2p1_or_HasSME2
178    : Predicate<"Subtarget->hasSVE2p1() || Subtarget->hasSME2()">,
179                 AssemblerPredicateWithAll<(any_of FeatureSME2, FeatureSVE2p1), "sme2 or sve2p1">;
180def HasSVE2p1_or_HasSME2p1
181    : Predicate<"Subtarget->hasSVE2p1() || Subtarget->hasSME2p1()">,
182                 AssemblerPredicateWithAll<(any_of FeatureSME2p1, FeatureSVE2p1), "sme2p1 or sve2p1">;
183// A subset of NEON instructions are legal in Streaming SVE execution mode,
184// they should be enabled if either has been specified.
185def HasNEONorSME
186    : Predicate<"Subtarget->hasNEON() || Subtarget->hasSME()">,
187                AssemblerPredicateWithAll<(any_of FeatureNEON, FeatureSME),
188                "neon or sme">;
189def HasRCPC          : Predicate<"Subtarget->hasRCPC()">,
190                                 AssemblerPredicateWithAll<(all_of FeatureRCPC), "rcpc">;
191def HasAltNZCV       : Predicate<"Subtarget->hasAlternativeNZCV()">,
192                       AssemblerPredicateWithAll<(all_of FeatureAltFPCmp), "altnzcv">;
193def HasFRInt3264     : Predicate<"Subtarget->hasFRInt3264()">,
194                       AssemblerPredicateWithAll<(all_of FeatureFRInt3264), "frint3264">;
195def HasSB            : Predicate<"Subtarget->hasSB()">,
196                       AssemblerPredicateWithAll<(all_of FeatureSB), "sb">;
197def HasPredRes      : Predicate<"Subtarget->hasPredRes()">,
198                       AssemblerPredicateWithAll<(all_of FeaturePredRes), "predres">;
199def HasCCDP          : Predicate<"Subtarget->hasCCDP()">,
200                       AssemblerPredicateWithAll<(all_of FeatureCacheDeepPersist), "ccdp">;
201def HasBTI           : Predicate<"Subtarget->hasBTI()">,
202                       AssemblerPredicateWithAll<(all_of FeatureBranchTargetId), "bti">;
203def HasMTE           : Predicate<"Subtarget->hasMTE()">,
204                       AssemblerPredicateWithAll<(all_of FeatureMTE), "mte">;
205def HasTME           : Predicate<"Subtarget->hasTME()">,
206                       AssemblerPredicateWithAll<(all_of FeatureTME), "tme">;
207def HasETE           : Predicate<"Subtarget->hasETE()">,
208                       AssemblerPredicateWithAll<(all_of FeatureETE), "ete">;
209def HasTRBE          : Predicate<"Subtarget->hasTRBE()">,
210                       AssemblerPredicateWithAll<(all_of FeatureTRBE), "trbe">;
211def HasBF16          : Predicate<"Subtarget->hasBF16()">,
212                       AssemblerPredicateWithAll<(all_of FeatureBF16), "bf16">;
213def HasMatMulInt8    : Predicate<"Subtarget->hasMatMulInt8()">,
214                       AssemblerPredicateWithAll<(all_of FeatureMatMulInt8), "i8mm">;
215def HasMatMulFP32    : Predicate<"Subtarget->hasMatMulFP32()">,
216                       AssemblerPredicateWithAll<(all_of FeatureMatMulFP32), "f32mm">;
217def HasMatMulFP64    : Predicate<"Subtarget->hasMatMulFP64()">,
218                       AssemblerPredicateWithAll<(all_of FeatureMatMulFP64), "f64mm">;
219def HasXS            : Predicate<"Subtarget->hasXS()">,
220                       AssemblerPredicateWithAll<(all_of FeatureXS), "xs">;
221def HasWFxT          : Predicate<"Subtarget->hasWFxT()">,
222                       AssemblerPredicateWithAll<(all_of FeatureWFxT), "wfxt">;
223def HasLS64          : Predicate<"Subtarget->hasLS64()">,
224                       AssemblerPredicateWithAll<(all_of FeatureLS64), "ls64">;
225def HasBRBE          : Predicate<"Subtarget->hasBRBE()">,
226                       AssemblerPredicateWithAll<(all_of FeatureBRBE), "brbe">;
227def HasSPE_EEF       : Predicate<"Subtarget->hasSPE_EEF()">,
228                       AssemblerPredicateWithAll<(all_of FeatureSPE_EEF), "spe-eef">;
229def HasHBC           : Predicate<"Subtarget->hasHBC()">,
230                       AssemblerPredicateWithAll<(all_of FeatureHBC), "hbc">;
231def HasMOPS          : Predicate<"Subtarget->hasMOPS()">,
232                       AssemblerPredicateWithAll<(all_of FeatureMOPS), "mops">;
233def HasCLRBHB        : Predicate<"Subtarget->hasCLRBHB()">,
234                       AssemblerPredicateWithAll<(all_of FeatureCLRBHB), "clrbhb">;
235def HasSPECRES2      : Predicate<"Subtarget->hasSPECRES2()">,
236                       AssemblerPredicateWithAll<(all_of FeatureSPECRES2), "specres2">;
237def HasITE           : Predicate<"Subtarget->hasITE()">,
238                       AssemblerPredicateWithAll<(all_of FeatureITE), "ite">;
239def HasTHE           : Predicate<"Subtarget->hasTHE()">,
240                       AssemblerPredicateWithAll<(all_of FeatureTHE), "the">;
241def HasRCPC3         : Predicate<"Subtarget->hasRCPC3()">,
242                       AssemblerPredicateWithAll<(all_of FeatureRCPC3), "rcpc3">;
243def HasLSE128        : Predicate<"Subtarget->hasLSE128()">,
244                       AssemblerPredicateWithAll<(all_of FeatureLSE128), "lse128">;
245def HasD128          : Predicate<"Subtarget->hasD128()">,
246                       AssemblerPredicateWithAll<(all_of FeatureD128), "d128">;
247def HasCHK           : Predicate<"Subtarget->hasCHK()">,
248                       AssemblerPredicateWithAll<(all_of FeatureCHK), "chk">;
249def HasGCS           : Predicate<"Subtarget->hasGCS()">,
250                       AssemblerPredicateWithAll<(all_of FeatureGCS), "gcs">;
251def IsLE             : Predicate<"Subtarget->isLittleEndian()">;
252def IsBE             : Predicate<"!Subtarget->isLittleEndian()">;
253def IsWindows        : Predicate<"Subtarget->isTargetWindows()">;
254def UseExperimentalZeroingPseudos
255    : Predicate<"Subtarget->useExperimentalZeroingPseudos()">;
256def UseAlternateSExtLoadCVTF32
257    : Predicate<"Subtarget->useAlternateSExtLoadCVTF32Pattern()">;
258
259def UseNegativeImmediates
260    : Predicate<"false">, AssemblerPredicate<(all_of (not FeatureNoNegativeImmediates)),
261                                             "NegativeImmediates">;
262
263def UseScalarIncVL : Predicate<"Subtarget->useScalarIncVL()">;
264
265def IsNeonAvailable : Predicate<"Subtarget->isNeonAvailable()">;
266
267def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER",
268                                  SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
269                                                       SDTCisInt<1>]>>;
270
271
272//===----------------------------------------------------------------------===//
273// AArch64-specific DAG Nodes.
274//
275
276// SDTBinaryArithWithFlagsOut - RES1, FLAGS = op LHS, RHS
277def SDTBinaryArithWithFlagsOut : SDTypeProfile<2, 2,
278                                              [SDTCisSameAs<0, 2>,
279                                               SDTCisSameAs<0, 3>,
280                                               SDTCisInt<0>, SDTCisVT<1, i32>]>;
281
282// SDTBinaryArithWithFlagsIn - RES1, FLAGS = op LHS, RHS, FLAGS
283def SDTBinaryArithWithFlagsIn : SDTypeProfile<1, 3,
284                                            [SDTCisSameAs<0, 1>,
285                                             SDTCisSameAs<0, 2>,
286                                             SDTCisInt<0>,
287                                             SDTCisVT<3, i32>]>;
288
289// SDTBinaryArithWithFlagsInOut - RES1, FLAGS = op LHS, RHS, FLAGS
290def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3,
291                                            [SDTCisSameAs<0, 2>,
292                                             SDTCisSameAs<0, 3>,
293                                             SDTCisInt<0>,
294                                             SDTCisVT<1, i32>,
295                                             SDTCisVT<4, i32>]>;
296
297def SDT_AArch64Brcond  : SDTypeProfile<0, 3,
298                                     [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>,
299                                      SDTCisVT<2, i32>]>;
300def SDT_AArch64cbz : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>;
301def SDT_AArch64tbz : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>,
302                                        SDTCisVT<2, OtherVT>]>;
303
304
305def SDT_AArch64CSel  : SDTypeProfile<1, 4,
306                                   [SDTCisSameAs<0, 1>,
307                                    SDTCisSameAs<0, 2>,
308                                    SDTCisInt<3>,
309                                    SDTCisVT<4, i32>]>;
310def SDT_AArch64CCMP : SDTypeProfile<1, 5,
311                                    [SDTCisVT<0, i32>,
312                                     SDTCisInt<1>,
313                                     SDTCisSameAs<1, 2>,
314                                     SDTCisInt<3>,
315                                     SDTCisInt<4>,
316                                     SDTCisVT<5, i32>]>;
317def SDT_AArch64FCCMP : SDTypeProfile<1, 5,
318                                     [SDTCisVT<0, i32>,
319                                      SDTCisFP<1>,
320                                      SDTCisSameAs<1, 2>,
321                                      SDTCisInt<3>,
322                                      SDTCisInt<4>,
323                                      SDTCisVT<5, i32>]>;
324def SDT_AArch64FCmp   : SDTypeProfile<0, 2,
325                                   [SDTCisFP<0>,
326                                    SDTCisSameAs<0, 1>]>;
327def SDT_AArch64Dup   : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
328def SDT_AArch64DupLane   : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<2>]>;
329def SDT_AArch64Insr  : SDTypeProfile<1, 2, [SDTCisVec<0>]>;
330def SDT_AArch64Zip   : SDTypeProfile<1, 2, [SDTCisVec<0>,
331                                          SDTCisSameAs<0, 1>,
332                                          SDTCisSameAs<0, 2>]>;
333def SDT_AArch64MOVIedit : SDTypeProfile<1, 1, [SDTCisInt<1>]>;
334def SDT_AArch64MOVIshift : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>;
335def SDT_AArch64vecimm : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
336                                           SDTCisInt<2>, SDTCisInt<3>]>;
337def SDT_AArch64UnaryVec: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
338def SDT_AArch64ExtVec: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
339                                          SDTCisSameAs<0,2>, SDTCisInt<3>]>;
340def SDT_AArch64vshift : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, SDTCisInt<2>]>;
341def SDT_AArch64Dot: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
342                                         SDTCisVec<2>, SDTCisSameAs<2,3>]>;
343
344def SDT_AArch64vshiftinsert : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<3>,
345                                                 SDTCisSameAs<0,1>,
346                                                 SDTCisSameAs<0,2>]>;
347
348def SDT_AArch64unvec : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
349def SDT_AArch64fcmpz : SDTypeProfile<1, 1, []>;
350def SDT_AArch64fcmp  : SDTypeProfile<1, 2, [SDTCisSameAs<1,2>]>;
351def SDT_AArch64binvec : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
352                                           SDTCisSameAs<0,2>]>;
353def SDT_AArch64trivec : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
354                                           SDTCisSameAs<0,2>,
355                                           SDTCisSameAs<0,3>]>;
356def SDT_AArch64TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>]>;
357def SDT_AArch64PREFETCH : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>;
358
359def SDT_AArch64ITOF  : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>;
360
361def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>,
362                                                 SDTCisPtrTy<1>]>;
363
364def SDT_AArch64uaddlp : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
365
366def SDT_AArch64ldp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
367def SDT_AArch64ldiapp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
368def SDT_AArch64ldnp : SDTypeProfile<2, 1, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
369def SDT_AArch64stp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
370def SDT_AArch64stilp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
371def SDT_AArch64stnp : SDTypeProfile<0, 3, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
372
373// Generates the general dynamic sequences, i.e.
374//  adrp  x0, :tlsdesc:var
375//  ldr   x1, [x0, #:tlsdesc_lo12:var]
376//  add   x0, x0, #:tlsdesc_lo12:var
377//  .tlsdesccall var
378//  blr   x1
379
380// (the TPIDR_EL0 offset is put directly in X0, hence no "result" here)
381// number of operands (the variable)
382def SDT_AArch64TLSDescCallSeq : SDTypeProfile<0,1,
383                                          [SDTCisPtrTy<0>]>;
384
385def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4,
386                                        [SDTCisVT<0, i64>, SDTCisVT<1, i32>,
387                                         SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>,
388                                         SDTCisSameAs<1, 4>]>;
389
390def SDT_AArch64TBL : SDTypeProfile<1, 2, [
391  SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisInt<2>
392]>;
393
394// non-extending masked load fragment.
395def nonext_masked_load :
396  PatFrag<(ops node:$ptr, node:$pred, node:$def),
397          (masked_ld node:$ptr, undef, node:$pred, node:$def), [{
398  return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD &&
399         cast<MaskedLoadSDNode>(N)->isUnindexed() &&
400         !cast<MaskedLoadSDNode>(N)->isNonTemporal();
401}]>;
402// Any/Zero extending masked load fragments.
403def azext_masked_load :
404  PatFrag<(ops node:$ptr, node:$pred, node:$def),
405          (masked_ld node:$ptr, undef, node:$pred, node:$def),[{
406  return (cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD ||
407          cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD) &&
408         cast<MaskedLoadSDNode>(N)->isUnindexed();
409}]>;
410def azext_masked_load_i8 :
411  PatFrag<(ops node:$ptr, node:$pred, node:$def),
412          (azext_masked_load node:$ptr, node:$pred, node:$def), [{
413  return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
414}]>;
415def azext_masked_load_i16 :
416  PatFrag<(ops node:$ptr, node:$pred, node:$def),
417          (azext_masked_load node:$ptr, node:$pred, node:$def), [{
418  return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
419}]>;
420def azext_masked_load_i32 :
421  PatFrag<(ops node:$ptr, node:$pred, node:$def),
422          (azext_masked_load node:$ptr, node:$pred, node:$def), [{
423  return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
424}]>;
425// Sign extending masked load fragments.
426def sext_masked_load :
427  PatFrag<(ops node:$ptr, node:$pred, node:$def),
428          (masked_ld node:$ptr, undef, node:$pred, node:$def), [{
429  return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD &&
430         cast<MaskedLoadSDNode>(N)->isUnindexed();
431}]>;
432def sext_masked_load_i8 :
433  PatFrag<(ops node:$ptr, node:$pred, node:$def),
434          (sext_masked_load node:$ptr, node:$pred, node:$def), [{
435  return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
436}]>;
437def sext_masked_load_i16 :
438  PatFrag<(ops node:$ptr, node:$pred, node:$def),
439          (sext_masked_load node:$ptr, node:$pred, node:$def), [{
440  return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
441}]>;
442def sext_masked_load_i32 :
443  PatFrag<(ops node:$ptr, node:$pred, node:$def),
444          (sext_masked_load node:$ptr, node:$pred, node:$def), [{
445  return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
446}]>;
447
448def non_temporal_load :
449   PatFrag<(ops node:$ptr, node:$pred, node:$def),
450           (masked_ld node:$ptr, undef, node:$pred, node:$def), [{
451   return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD &&
452          cast<MaskedLoadSDNode>(N)->isUnindexed() &&
453          cast<MaskedLoadSDNode>(N)->isNonTemporal();
454}]>;
455
456// non-truncating masked store fragment.
457def nontrunc_masked_store :
458  PatFrag<(ops node:$val, node:$ptr, node:$pred),
459          (masked_st node:$val, node:$ptr, undef, node:$pred), [{
460  return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
461         cast<MaskedStoreSDNode>(N)->isUnindexed() &&
462         !cast<MaskedStoreSDNode>(N)->isNonTemporal();
463}]>;
464// truncating masked store fragments.
465def trunc_masked_store :
466  PatFrag<(ops node:$val, node:$ptr, node:$pred),
467          (masked_st node:$val, node:$ptr, undef, node:$pred), [{
468  return cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
469         cast<MaskedStoreSDNode>(N)->isUnindexed();
470}]>;
471def trunc_masked_store_i8 :
472  PatFrag<(ops node:$val, node:$ptr, node:$pred),
473          (trunc_masked_store node:$val, node:$ptr, node:$pred), [{
474  return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
475}]>;
476def trunc_masked_store_i16 :
477  PatFrag<(ops node:$val, node:$ptr, node:$pred),
478          (trunc_masked_store node:$val, node:$ptr, node:$pred), [{
479  return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
480}]>;
481def trunc_masked_store_i32 :
482  PatFrag<(ops node:$val, node:$ptr, node:$pred),
483          (trunc_masked_store node:$val, node:$ptr, node:$pred), [{
484  return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
485}]>;
486
487def non_temporal_store :
488  PatFrag<(ops node:$val, node:$ptr, node:$pred),
489          (masked_st node:$val, node:$ptr, undef, node:$pred), [{
490  return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
491         cast<MaskedStoreSDNode>(N)->isUnindexed() &&
492         cast<MaskedStoreSDNode>(N)->isNonTemporal();
493}]>;
494
495multiclass masked_gather_scatter<PatFrags GatherScatterOp> {
496  // offsets = (signed)Index << sizeof(elt)
497  def NAME#_signed_scaled :
498    PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx),
499            (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{
500    auto MGS = cast<MaskedGatherScatterSDNode>(N);
501    bool Signed = MGS->isIndexSigned() ||
502        MGS->getIndex().getValueType().getVectorElementType() == MVT::i64;
503    return Signed && MGS->isIndexScaled();
504  }]>;
505  // offsets = (signed)Index
506  def NAME#_signed_unscaled :
507    PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx),
508            (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{
509    auto MGS = cast<MaskedGatherScatterSDNode>(N);
510    bool Signed = MGS->isIndexSigned() ||
511        MGS->getIndex().getValueType().getVectorElementType() == MVT::i64;
512    return Signed && !MGS->isIndexScaled();
513  }]>;
514  // offsets = (unsigned)Index << sizeof(elt)
515  def NAME#_unsigned_scaled :
516    PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx),
517            (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{
518    auto MGS = cast<MaskedGatherScatterSDNode>(N);
519    bool Signed = MGS->isIndexSigned() ||
520        MGS->getIndex().getValueType().getVectorElementType() == MVT::i64;
521    return !Signed && MGS->isIndexScaled();
522  }]>;
523  // offsets = (unsigned)Index
524  def NAME#_unsigned_unscaled :
525    PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx),
526            (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{
527    auto MGS = cast<MaskedGatherScatterSDNode>(N);
528    bool Signed = MGS->isIndexSigned() ||
529        MGS->getIndex().getValueType().getVectorElementType() == MVT::i64;
530    return !Signed && !MGS->isIndexScaled();
531  }]>;
532}
533
534defm nonext_masked_gather    : masked_gather_scatter<nonext_masked_gather>;
535defm azext_masked_gather_i8  : masked_gather_scatter<azext_masked_gather_i8>;
536defm azext_masked_gather_i16 : masked_gather_scatter<azext_masked_gather_i16>;
537defm azext_masked_gather_i32 : masked_gather_scatter<azext_masked_gather_i32>;
538defm sext_masked_gather_i8   : masked_gather_scatter<sext_masked_gather_i8>;
539defm sext_masked_gather_i16  : masked_gather_scatter<sext_masked_gather_i16>;
540defm sext_masked_gather_i32  : masked_gather_scatter<sext_masked_gather_i32>;
541
542defm nontrunc_masked_scatter  : masked_gather_scatter<nontrunc_masked_scatter>;
543defm trunc_masked_scatter_i8  : masked_gather_scatter<trunc_masked_scatter_i8>;
544defm trunc_masked_scatter_i16 : masked_gather_scatter<trunc_masked_scatter_i16>;
545defm trunc_masked_scatter_i32 : masked_gather_scatter<trunc_masked_scatter_i32>;
546
547// top16Zero - answer true if the upper 16 bits of $src are 0, false otherwise
548def top16Zero: PatLeaf<(i32 GPR32:$src), [{
549  return SDValue(N,0)->getValueType(0) == MVT::i32 &&
550         CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16));
551  }]>;
552
553// top32Zero - answer true if the upper 32 bits of $src are 0, false otherwise
554def top32Zero: PatLeaf<(i64 GPR64:$src), [{
555  return SDValue(N,0)->getValueType(0) == MVT::i64 &&
556         CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(64, 32));
557  }]>;
558
559// topbitsallzero - Return true if all bits except the lowest bit are known zero
560def topbitsallzero32: PatLeaf<(i32 GPR32:$src), [{
561  return SDValue(N,0)->getValueType(0) == MVT::i32 &&
562         CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 31));
563  }]>;
564def topbitsallzero64: PatLeaf<(i64 GPR64:$src), [{
565  return SDValue(N,0)->getValueType(0) == MVT::i64 &&
566         CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(64, 63));
567  }]>;
568
569// Node definitions.
570def AArch64adrp          : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>;
571def AArch64adr           : SDNode<"AArch64ISD::ADR", SDTIntUnaryOp, []>;
572def AArch64addlow        : SDNode<"AArch64ISD::ADDlow", SDTIntBinOp, []>;
573def AArch64LOADgot       : SDNode<"AArch64ISD::LOADgot", SDTIntUnaryOp>;
574def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START",
575                                SDCallSeqStart<[ SDTCisVT<0, i32>,
576                                                 SDTCisVT<1, i32> ]>,
577                                [SDNPHasChain, SDNPOutGlue]>;
578def AArch64callseq_end   : SDNode<"ISD::CALLSEQ_END",
579                                SDCallSeqEnd<[ SDTCisVT<0, i32>,
580                                               SDTCisVT<1, i32> ]>,
581                                [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
582def AArch64call          : SDNode<"AArch64ISD::CALL",
583                                SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
584                                [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
585                                 SDNPVariadic]>;
586
587def AArch64call_bti      : SDNode<"AArch64ISD::CALL_BTI",
588                                SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
589                                [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
590                                 SDNPVariadic]>;
591
592def AArch64call_rvmarker: SDNode<"AArch64ISD::CALL_RVMARKER",
593                             SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
594                             [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
595                              SDNPVariadic]>;
596
597def AArch64brcond        : SDNode<"AArch64ISD::BRCOND", SDT_AArch64Brcond,
598                                [SDNPHasChain]>;
599def AArch64cbz           : SDNode<"AArch64ISD::CBZ", SDT_AArch64cbz,
600                                [SDNPHasChain]>;
601def AArch64cbnz           : SDNode<"AArch64ISD::CBNZ", SDT_AArch64cbz,
602                                [SDNPHasChain]>;
603def AArch64tbz           : SDNode<"AArch64ISD::TBZ", SDT_AArch64tbz,
604                                [SDNPHasChain]>;
605def AArch64tbnz           : SDNode<"AArch64ISD::TBNZ", SDT_AArch64tbz,
606                                [SDNPHasChain]>;
607
608
609def AArch64csel          : SDNode<"AArch64ISD::CSEL", SDT_AArch64CSel>;
610def AArch64csinv         : SDNode<"AArch64ISD::CSINV", SDT_AArch64CSel>;
611def AArch64csneg         : SDNode<"AArch64ISD::CSNEG", SDT_AArch64CSel>;
612def AArch64csinc         : SDNode<"AArch64ISD::CSINC", SDT_AArch64CSel>;
613def AArch64retglue       : SDNode<"AArch64ISD::RET_GLUE", SDTNone,
614                                [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
615def AArch64adc       : SDNode<"AArch64ISD::ADC",  SDTBinaryArithWithFlagsIn >;
616def AArch64sbc       : SDNode<"AArch64ISD::SBC",  SDTBinaryArithWithFlagsIn>;
617def AArch64add_flag  : SDNode<"AArch64ISD::ADDS",  SDTBinaryArithWithFlagsOut,
618                            [SDNPCommutative]>;
619def AArch64sub_flag  : SDNode<"AArch64ISD::SUBS",  SDTBinaryArithWithFlagsOut>;
620def AArch64and_flag  : SDNode<"AArch64ISD::ANDS",  SDTBinaryArithWithFlagsOut,
621                            [SDNPCommutative]>;
622def AArch64adc_flag  : SDNode<"AArch64ISD::ADCS",  SDTBinaryArithWithFlagsInOut>;
623def AArch64sbc_flag  : SDNode<"AArch64ISD::SBCS",  SDTBinaryArithWithFlagsInOut>;
624
625def AArch64ccmp      : SDNode<"AArch64ISD::CCMP",  SDT_AArch64CCMP>;
626def AArch64ccmn      : SDNode<"AArch64ISD::CCMN",  SDT_AArch64CCMP>;
627def AArch64fccmp     : SDNode<"AArch64ISD::FCCMP", SDT_AArch64FCCMP>;
628
629def AArch64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>;
630
631def AArch64fcmp         : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>;
632def AArch64strict_fcmp  : SDNode<"AArch64ISD::STRICT_FCMP", SDT_AArch64FCmp,
633                                 [SDNPHasChain]>;
634def AArch64strict_fcmpe : SDNode<"AArch64ISD::STRICT_FCMPE", SDT_AArch64FCmp,
635                                 [SDNPHasChain]>;
636def AArch64any_fcmp     : PatFrags<(ops node:$lhs, node:$rhs),
637                                   [(AArch64strict_fcmp node:$lhs, node:$rhs),
638                                    (AArch64fcmp node:$lhs, node:$rhs)]>;
639
640def AArch64dup       : SDNode<"AArch64ISD::DUP", SDT_AArch64Dup>;
641def AArch64duplane8  : SDNode<"AArch64ISD::DUPLANE8", SDT_AArch64DupLane>;
642def AArch64duplane16 : SDNode<"AArch64ISD::DUPLANE16", SDT_AArch64DupLane>;
643def AArch64duplane32 : SDNode<"AArch64ISD::DUPLANE32", SDT_AArch64DupLane>;
644def AArch64duplane64 : SDNode<"AArch64ISD::DUPLANE64", SDT_AArch64DupLane>;
645def AArch64duplane128 : SDNode<"AArch64ISD::DUPLANE128", SDT_AArch64DupLane>;
646
647def AArch64insr      : SDNode<"AArch64ISD::INSR", SDT_AArch64Insr>;
648
649def AArch64zip1      : SDNode<"AArch64ISD::ZIP1", SDT_AArch64Zip>;
650def AArch64zip2      : SDNode<"AArch64ISD::ZIP2", SDT_AArch64Zip>;
651def AArch64uzp1      : SDNode<"AArch64ISD::UZP1", SDT_AArch64Zip>;
652def AArch64uzp2      : SDNode<"AArch64ISD::UZP2", SDT_AArch64Zip>;
653def AArch64trn1      : SDNode<"AArch64ISD::TRN1", SDT_AArch64Zip>;
654def AArch64trn2      : SDNode<"AArch64ISD::TRN2", SDT_AArch64Zip>;
655
656def AArch64movi_edit : SDNode<"AArch64ISD::MOVIedit", SDT_AArch64MOVIedit>;
657def AArch64movi_shift : SDNode<"AArch64ISD::MOVIshift", SDT_AArch64MOVIshift>;
658def AArch64movi_msl : SDNode<"AArch64ISD::MOVImsl", SDT_AArch64MOVIshift>;
659def AArch64mvni_shift : SDNode<"AArch64ISD::MVNIshift", SDT_AArch64MOVIshift>;
660def AArch64mvni_msl : SDNode<"AArch64ISD::MVNImsl", SDT_AArch64MOVIshift>;
661def AArch64movi : SDNode<"AArch64ISD::MOVI", SDT_AArch64MOVIedit>;
662def AArch64fmov : SDNode<"AArch64ISD::FMOV", SDT_AArch64MOVIedit>;
663
664def AArch64rev16 : SDNode<"AArch64ISD::REV16", SDT_AArch64UnaryVec>;
665def AArch64rev32 : SDNode<"AArch64ISD::REV32", SDT_AArch64UnaryVec>;
666def AArch64rev64 : SDNode<"AArch64ISD::REV64", SDT_AArch64UnaryVec>;
667def AArch64ext : SDNode<"AArch64ISD::EXT", SDT_AArch64ExtVec>;
668
669def AArch64vashr : SDNode<"AArch64ISD::VASHR", SDT_AArch64vshift>;
670def AArch64vlshr : SDNode<"AArch64ISD::VLSHR", SDT_AArch64vshift>;
671def AArch64vshl : SDNode<"AArch64ISD::VSHL", SDT_AArch64vshift>;
672def AArch64sqshli : SDNode<"AArch64ISD::SQSHL_I", SDT_AArch64vshift>;
673def AArch64uqshli : SDNode<"AArch64ISD::UQSHL_I", SDT_AArch64vshift>;
674def AArch64sqshlui : SDNode<"AArch64ISD::SQSHLU_I", SDT_AArch64vshift>;
675def AArch64srshri : SDNode<"AArch64ISD::SRSHR_I", SDT_AArch64vshift>;
676def AArch64urshri : SDNode<"AArch64ISD::URSHR_I", SDT_AArch64vshift>;
677def AArch64vsli : SDNode<"AArch64ISD::VSLI", SDT_AArch64vshiftinsert>;
678def AArch64vsri : SDNode<"AArch64ISD::VSRI", SDT_AArch64vshiftinsert>;
679
680def AArch64bit: SDNode<"AArch64ISD::BIT", SDT_AArch64trivec>;
681def AArch64bsp: SDNode<"AArch64ISD::BSP", SDT_AArch64trivec>;
682
683def AArch64cmeq: SDNode<"AArch64ISD::CMEQ", SDT_AArch64binvec>;
684def AArch64cmge: SDNode<"AArch64ISD::CMGE", SDT_AArch64binvec>;
685def AArch64cmgt: SDNode<"AArch64ISD::CMGT", SDT_AArch64binvec>;
686def AArch64cmhi: SDNode<"AArch64ISD::CMHI", SDT_AArch64binvec>;
687def AArch64cmhs: SDNode<"AArch64ISD::CMHS", SDT_AArch64binvec>;
688
689def AArch64fcmeq: SDNode<"AArch64ISD::FCMEQ", SDT_AArch64fcmp>;
690def AArch64fcmge: SDNode<"AArch64ISD::FCMGE", SDT_AArch64fcmp>;
691def AArch64fcmgt: SDNode<"AArch64ISD::FCMGT", SDT_AArch64fcmp>;
692
693def AArch64cmeqz: SDNode<"AArch64ISD::CMEQz", SDT_AArch64unvec>;
694def AArch64cmgez: SDNode<"AArch64ISD::CMGEz", SDT_AArch64unvec>;
695def AArch64cmgtz: SDNode<"AArch64ISD::CMGTz", SDT_AArch64unvec>;
696def AArch64cmlez: SDNode<"AArch64ISD::CMLEz", SDT_AArch64unvec>;
697def AArch64cmltz: SDNode<"AArch64ISD::CMLTz", SDT_AArch64unvec>;
698def AArch64cmtst : PatFrag<(ops node:$LHS, node:$RHS),
699                        (vnot (AArch64cmeqz (and node:$LHS, node:$RHS)))>;
700
701def AArch64fcmeqz: SDNode<"AArch64ISD::FCMEQz", SDT_AArch64fcmpz>;
702def AArch64fcmgez: SDNode<"AArch64ISD::FCMGEz", SDT_AArch64fcmpz>;
703def AArch64fcmgtz: SDNode<"AArch64ISD::FCMGTz", SDT_AArch64fcmpz>;
704def AArch64fcmlez: SDNode<"AArch64ISD::FCMLEz", SDT_AArch64fcmpz>;
705def AArch64fcmltz: SDNode<"AArch64ISD::FCMLTz", SDT_AArch64fcmpz>;
706
707def AArch64bici: SDNode<"AArch64ISD::BICi", SDT_AArch64vecimm>;
708def AArch64orri: SDNode<"AArch64ISD::ORRi", SDT_AArch64vecimm>;
709
710def AArch64tcret: SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64TCRET,
711                  [SDNPHasChain,  SDNPOptInGlue, SDNPVariadic]>;
712
713def AArch64Prefetch        : SDNode<"AArch64ISD::PREFETCH", SDT_AArch64PREFETCH,
714                               [SDNPHasChain, SDNPSideEffect]>;
715
716def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>;
717def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>;
718
719def AArch64tlsdesc_callseq : SDNode<"AArch64ISD::TLSDESC_CALLSEQ",
720                                    SDT_AArch64TLSDescCallSeq,
721                                    [SDNPInGlue, SDNPOutGlue, SDNPHasChain,
722                                     SDNPVariadic]>;
723
724
725def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge",
726                                 SDT_AArch64WrapperLarge>;
727
728def AArch64NvCast : SDNode<"AArch64ISD::NVCAST", SDTUnaryOp>;
729
730def SDT_AArch64mull : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
731                                    SDTCisSameAs<1, 2>]>;
732def AArch64pmull    : SDNode<"AArch64ISD::PMULL", SDT_AArch64mull,
733                             [SDNPCommutative]>;
734def AArch64smull    : SDNode<"AArch64ISD::SMULL", SDT_AArch64mull,
735                             [SDNPCommutative]>;
736def AArch64umull    : SDNode<"AArch64ISD::UMULL", SDT_AArch64mull,
737                             [SDNPCommutative]>;
738
739def AArch64frecpe   : SDNode<"AArch64ISD::FRECPE", SDTFPUnaryOp>;
740def AArch64frecps   : SDNode<"AArch64ISD::FRECPS", SDTFPBinOp>;
741def AArch64frsqrte  : SDNode<"AArch64ISD::FRSQRTE", SDTFPUnaryOp>;
742def AArch64frsqrts  : SDNode<"AArch64ISD::FRSQRTS", SDTFPBinOp>;
743
744def AArch64sdot     : SDNode<"AArch64ISD::SDOT", SDT_AArch64Dot>;
745def AArch64udot     : SDNode<"AArch64ISD::UDOT", SDT_AArch64Dot>;
746
747def AArch64saddv    : SDNode<"AArch64ISD::SADDV", SDT_AArch64UnaryVec>;
748def AArch64uaddv    : SDNode<"AArch64ISD::UADDV", SDT_AArch64UnaryVec>;
749def AArch64sminv    : SDNode<"AArch64ISD::SMINV", SDT_AArch64UnaryVec>;
750def AArch64uminv    : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>;
751def AArch64smaxv    : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>;
752def AArch64umaxv    : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>;
753
754def AArch64uabd     : PatFrags<(ops node:$lhs, node:$rhs),
755                               [(abdu node:$lhs, node:$rhs),
756                                (int_aarch64_neon_uabd node:$lhs, node:$rhs)]>;
757def AArch64sabd     : PatFrags<(ops node:$lhs, node:$rhs),
758                               [(abds node:$lhs, node:$rhs),
759                                (int_aarch64_neon_sabd node:$lhs, node:$rhs)]>;
760
761def AArch64addp_n   : SDNode<"AArch64ISD::ADDP", SDT_AArch64Zip>;
762def AArch64uaddlp_n : SDNode<"AArch64ISD::UADDLP", SDT_AArch64uaddlp>;
763def AArch64saddlp_n : SDNode<"AArch64ISD::SADDLP", SDT_AArch64uaddlp>;
764def AArch64addp     : PatFrags<(ops node:$Rn, node:$Rm),
765                               [(AArch64addp_n node:$Rn, node:$Rm),
766                                (int_aarch64_neon_addp node:$Rn, node:$Rm)]>;
767def AArch64uaddlp   : PatFrags<(ops node:$src),
768                               [(AArch64uaddlp_n node:$src),
769                                (int_aarch64_neon_uaddlp node:$src)]>;
770def AArch64saddlp   : PatFrags<(ops node:$src),
771                               [(AArch64saddlp_n node:$src),
772                                (int_aarch64_neon_saddlp node:$src)]>;
773def AArch64faddp     : PatFrags<(ops node:$Rn, node:$Rm),
774                                [(AArch64addp_n node:$Rn, node:$Rm),
775                                 (int_aarch64_neon_faddp node:$Rn, node:$Rm)]>;
776def AArch64roundingvlshr : ComplexPattern<vAny, 2, "SelectRoundingVLShr", [AArch64vlshr]>;
777def AArch64facge     : PatFrags<(ops node:$Rn, node:$Rm),
778                                [(AArch64fcmge (fabs node:$Rn), (fabs node:$Rm)),
779                                 (int_aarch64_neon_facge node:$Rn, node:$Rm)]>;
780def AArch64facgt     : PatFrags<(ops node:$Rn, node:$Rm),
781                                [(AArch64fcmgt (fabs node:$Rn), (fabs node:$Rm)),
782                                 (int_aarch64_neon_facgt node:$Rn, node:$Rm)]>;
783
784def AArch64fmaxnmv : PatFrags<(ops node:$Rn),
785                              [(vecreduce_fmax node:$Rn),
786                               (int_aarch64_neon_fmaxnmv node:$Rn)]>;
787def AArch64fminnmv : PatFrags<(ops node:$Rn),
788                              [(vecreduce_fmin node:$Rn),
789                               (int_aarch64_neon_fminnmv node:$Rn)]>;
790def AArch64fmaxv : PatFrags<(ops node:$Rn),
791                            [(vecreduce_fmaximum node:$Rn),
792                             (int_aarch64_neon_fmaxv node:$Rn)]>;
793def AArch64fminv : PatFrags<(ops node:$Rn),
794                            [(vecreduce_fminimum node:$Rn),
795                             (int_aarch64_neon_fminv node:$Rn)]>;
796
797def SDT_AArch64SETTAG : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
798def AArch64stg : SDNode<"AArch64ISD::STG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
799def AArch64stzg : SDNode<"AArch64ISD::STZG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
800def AArch64st2g : SDNode<"AArch64ISD::ST2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
801def AArch64stz2g : SDNode<"AArch64ISD::STZ2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
802
803def SDT_AArch64unpk : SDTypeProfile<1, 1, [
804    SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<1, 0>
805]>;
806def AArch64sunpkhi : SDNode<"AArch64ISD::SUNPKHI", SDT_AArch64unpk>;
807def AArch64sunpklo : SDNode<"AArch64ISD::SUNPKLO", SDT_AArch64unpk>;
808def AArch64uunpkhi : SDNode<"AArch64ISD::UUNPKHI", SDT_AArch64unpk>;
809def AArch64uunpklo : SDNode<"AArch64ISD::UUNPKLO", SDT_AArch64unpk>;
810
811def AArch64ldp : SDNode<"AArch64ISD::LDP", SDT_AArch64ldp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
812def AArch64ldiapp : SDNode<"AArch64ISD::LDIAPP", SDT_AArch64ldiapp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
813def AArch64ldnp : SDNode<"AArch64ISD::LDNP", SDT_AArch64ldnp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
814def AArch64stp : SDNode<"AArch64ISD::STP", SDT_AArch64stp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
815def AArch64stilp : SDNode<"AArch64ISD::STILP", SDT_AArch64stilp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
816def AArch64stnp : SDNode<"AArch64ISD::STNP", SDT_AArch64stnp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
817
818def AArch64tbl : SDNode<"AArch64ISD::TBL", SDT_AArch64TBL>;
819def AArch64mrs : SDNode<"AArch64ISD::MRS",
820                        SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, i32>]>,
821                        [SDNPHasChain, SDNPOutGlue]>;
822
823// Match add node and also treat an 'or' node is as an 'add' if the or'ed operands
824// have no common bits.
825def add_and_or_is_add : PatFrags<(ops node:$lhs, node:$rhs),
826                         [(add node:$lhs, node:$rhs), (or node:$lhs, node:$rhs)],[{
827   if (N->getOpcode() == ISD::ADD)
828     return true;
829   return CurDAG->haveNoCommonBitsSet(N->getOperand(0), N->getOperand(1));
830}]> {
831  let GISelPredicateCode = [{
832     // Only handle G_ADD for now. FIXME. build capability to compute whether
833     // operands of G_OR have common bits set or not.
834     return MI.getOpcode() == TargetOpcode::G_ADD;
835  }];
836}
837
838// Match mul with enough sign-bits. Can be reduced to a smaller mul operand.
839def smullwithsignbits : PatFrag<(ops node:$l, node:$r), (mul node:$l, node:$r), [{
840  return CurDAG->ComputeNumSignBits(N->getOperand(0)) > 32 &&
841         CurDAG->ComputeNumSignBits(N->getOperand(1)) > 32;
842}]>;
843
844//===----------------------------------------------------------------------===//
845
846//===----------------------------------------------------------------------===//
847
848// AArch64 Instruction Predicate Definitions.
849// We could compute these on a per-module basis but doing so requires accessing
850// the Function object through the <Target>Subtarget and objections were raised
851// to that (see post-commit review comments for r301750).
852let RecomputePerFunction = 1 in {
853  def ForCodeSize   : Predicate<"shouldOptForSize(MF)">;
854  def NotForCodeSize   : Predicate<"!shouldOptForSize(MF)">;
855  // Avoid generating STRQro if it is slow, unless we're optimizing for code size.
856  def UseSTRQro : Predicate<"!Subtarget->isSTRQroSlow() || shouldOptForSize(MF)">;
857
858  def UseBTI : Predicate<[{ MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() }]>;
859  def NotUseBTI : Predicate<[{ !MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() }]>;
860
861  def SLSBLRMitigation : Predicate<[{ MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>;
862  def NoSLSBLRMitigation : Predicate<[{ !MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>;
863  // Toggles patterns which aren't beneficial in GlobalISel when we aren't
864  // optimizing. This allows us to selectively use patterns without impacting
865  // SelectionDAG's behaviour.
866  // FIXME: One day there will probably be a nicer way to check for this, but
867  // today is not that day.
868  def OptimizedGISelOrOtherSelector : Predicate<"!MF->getFunction().hasOptNone() || MF->getProperties().hasProperty(MachineFunctionProperties::Property::FailedISel) || !MF->getProperties().hasProperty(MachineFunctionProperties::Property::Legalized)">;
869}
870
871include "AArch64InstrFormats.td"
872include "SVEInstrFormats.td"
873include "SMEInstrFormats.td"
874
875//===----------------------------------------------------------------------===//
876
877//===----------------------------------------------------------------------===//
878// Miscellaneous instructions.
879//===----------------------------------------------------------------------===//
880
881let Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 in {
882// We set Sched to empty list because we expect these instructions to simply get
883// removed in most cases.
884def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
885                              [(AArch64callseq_start timm:$amt1, timm:$amt2)]>,
886                              Sched<[]>;
887def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
888                            [(AArch64callseq_end timm:$amt1, timm:$amt2)]>,
889                            Sched<[]>;
890} // Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1
891
892let isReMaterializable = 1, isCodeGenOnly = 1 in {
893// FIXME: The following pseudo instructions are only needed because remat
894// cannot handle multiple instructions.  When that changes, they can be
895// removed, along with the AArch64Wrapper node.
896
897let AddedComplexity = 10 in
898def LOADgot : Pseudo<(outs GPR64common:$dst), (ins i64imm:$addr),
899                     [(set GPR64common:$dst, (AArch64LOADgot tglobaladdr:$addr))]>,
900              Sched<[WriteLDAdr]>;
901
902// The MOVaddr instruction should match only when the add is not folded
903// into a load or store address.
904def MOVaddr
905    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
906             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaladdr:$hi),
907                                            tglobaladdr:$low))]>,
908      Sched<[WriteAdrAdr]>;
909def MOVaddrJT
910    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
911             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tjumptable:$hi),
912                                             tjumptable:$low))]>,
913      Sched<[WriteAdrAdr]>;
914def MOVaddrCP
915    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
916             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tconstpool:$hi),
917                                             tconstpool:$low))]>,
918      Sched<[WriteAdrAdr]>;
919def MOVaddrBA
920    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
921             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tblockaddress:$hi),
922                                             tblockaddress:$low))]>,
923      Sched<[WriteAdrAdr]>;
924def MOVaddrTLS
925    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
926             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaltlsaddr:$hi),
927                                            tglobaltlsaddr:$low))]>,
928      Sched<[WriteAdrAdr]>;
929def MOVaddrEXT
930    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
931             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp texternalsym:$hi),
932                                            texternalsym:$low))]>,
933      Sched<[WriteAdrAdr]>;
934// Normally AArch64addlow either gets folded into a following ldr/str,
935// or together with an adrp into MOVaddr above. For cases with TLS, it
936// might appear without either of them, so allow lowering it into a plain
937// add.
938def ADDlowTLS
939    : Pseudo<(outs GPR64sp:$dst), (ins GPR64sp:$src, i64imm:$low),
940             [(set GPR64sp:$dst, (AArch64addlow GPR64sp:$src,
941                                            tglobaltlsaddr:$low))]>,
942      Sched<[WriteAdr]>;
943
944} // isReMaterializable, isCodeGenOnly
945
946def : Pat<(AArch64LOADgot tglobaltlsaddr:$addr),
947          (LOADgot tglobaltlsaddr:$addr)>;
948
949def : Pat<(AArch64LOADgot texternalsym:$addr),
950          (LOADgot texternalsym:$addr)>;
951
952def : Pat<(AArch64LOADgot tconstpool:$addr),
953          (LOADgot tconstpool:$addr)>;
954
955// In general these get lowered into a sequence of three 4-byte instructions.
956// 32-bit jump table destination is actually only 2 instructions since we can
957// use the table itself as a PC-relative base. But optimization occurs after
958// branch relaxation so be pessimistic.
959let Size = 12, Constraints = "@earlyclobber $dst,@earlyclobber $scratch",
960    isNotDuplicable = 1 in {
961def JumpTableDest32 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
962                             (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
963                      Sched<[]>;
964def JumpTableDest16 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
965                             (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
966                      Sched<[]>;
967def JumpTableDest8 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
968                            (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
969                     Sched<[]>;
970}
971
972// Space-consuming pseudo to aid testing of placement and reachability
973// algorithms. Immediate operand is the number of bytes this "instruction"
974// occupies; register operands can be used to enforce dependency and constrain
975// the scheduler.
976let hasSideEffects = 1, mayLoad = 1, mayStore = 1 in
977def SPACE : Pseudo<(outs GPR64:$Rd), (ins i32imm:$size, GPR64:$Rn),
978                   [(set GPR64:$Rd, (int_aarch64_space imm:$size, GPR64:$Rn))]>,
979            Sched<[]>;
980
981let hasSideEffects = 1, isCodeGenOnly = 1 in {
982  def SpeculationSafeValueX
983      : Pseudo<(outs GPR64:$dst), (ins GPR64:$src), []>, Sched<[]>;
984  def SpeculationSafeValueW
985      : Pseudo<(outs GPR32:$dst), (ins GPR32:$src), []>, Sched<[]>;
986}
987
988// SpeculationBarrierEndBB must only be used after an unconditional control
989// flow, i.e. after a terminator for which isBarrier is True.
990let hasSideEffects = 1, isCodeGenOnly = 1, isTerminator = 1, isBarrier = 1 in {
991  // This gets lowered to a pair of 4-byte instructions.
992  let Size = 8 in
993  def SpeculationBarrierISBDSBEndBB
994      : Pseudo<(outs), (ins), []>, Sched<[]>;
995  // This gets lowered to a 4-byte instruction.
996  let Size = 4 in
997  def SpeculationBarrierSBEndBB
998      : Pseudo<(outs), (ins), []>, Sched<[]>;
999}
1000
1001//===----------------------------------------------------------------------===//
1002// System instructions.
1003//===----------------------------------------------------------------------===//
1004
1005def HINT : HintI<"hint">;
1006def : InstAlias<"nop",  (HINT 0b000)>;
1007def : InstAlias<"yield",(HINT 0b001)>;
1008def : InstAlias<"wfe",  (HINT 0b010)>;
1009def : InstAlias<"wfi",  (HINT 0b011)>;
1010def : InstAlias<"sev",  (HINT 0b100)>;
1011def : InstAlias<"sevl", (HINT 0b101)>;
1012def : InstAlias<"dgh",  (HINT 0b110)>;
1013def : InstAlias<"esb",  (HINT 0b10000)>, Requires<[HasRAS]>;
1014def : InstAlias<"csdb", (HINT 20)>;
1015// In order to be able to write readable assembly, LLVM should accept assembly
1016// inputs that use Branch Target Indentification mnemonics, even with BTI disabled.
1017// However, in order to be compatible with other assemblers (e.g. GAS), LLVM
1018// should not emit these mnemonics unless BTI is enabled.
1019def : InstAlias<"bti",  (HINT 32), 0>;
1020def : InstAlias<"bti $op", (HINT btihint_op:$op), 0>;
1021def : InstAlias<"bti",  (HINT 32)>, Requires<[HasBTI]>;
1022def : InstAlias<"bti $op", (HINT btihint_op:$op)>, Requires<[HasBTI]>;
1023
1024// v8.2a Statistical Profiling extension
1025def : InstAlias<"psb $op",  (HINT psbhint_op:$op)>, Requires<[HasSPE]>;
1026
1027// As far as LLVM is concerned this writes to the system's exclusive monitors.
1028let mayLoad = 1, mayStore = 1 in
1029def CLREX : CRmSystemI<imm0_15, 0b010, "clrex">;
1030
1031// NOTE: ideally, this would have mayStore = 0, mayLoad = 0, but we cannot
1032// model patterns with sufficiently fine granularity.
1033let mayLoad = ?, mayStore = ? in {
1034def DMB   : CRmSystemI<barrier_op, 0b101, "dmb",
1035                       [(int_aarch64_dmb (i32 imm32_0_15:$CRm))]>;
1036
1037def DSB   : CRmSystemI<barrier_op, 0b100, "dsb",
1038                       [(int_aarch64_dsb (i32 imm32_0_15:$CRm))]>;
1039
1040def ISB   : CRmSystemI<barrier_op, 0b110, "isb",
1041                       [(int_aarch64_isb (i32 imm32_0_15:$CRm))]>;
1042
1043def TSB   : CRmSystemI<barrier_op, 0b010, "tsb", []> {
1044  let CRm        = 0b0010;
1045  let Inst{12}   = 0;
1046  let Predicates = [HasTRACEV8_4];
1047}
1048
1049def DSBnXS  : CRmSystemI<barrier_nxs_op, 0b001, "dsb"> {
1050  let CRm{1-0}   = 0b11;
1051  let Inst{9-8}  = 0b10;
1052  let Predicates = [HasXS];
1053}
1054
1055let Predicates = [HasWFxT] in {
1056def WFET : RegInputSystemI<0b0000, 0b000, "wfet">;
1057def WFIT : RegInputSystemI<0b0000, 0b001, "wfit">;
1058}
1059
1060// Branch Record Buffer two-word mnemonic instructions
1061class BRBEI<bits<3> op2, string keyword>
1062    : SimpleSystemI<0, (ins), "brb", keyword>, Sched<[WriteSys]> {
1063  let Inst{31-8} = 0b110101010000100101110010;
1064  let Inst{7-5} = op2;
1065  let Predicates = [HasBRBE];
1066}
1067def BRB_IALL: BRBEI<0b100, "\tiall">;
1068def BRB_INJ:  BRBEI<0b101, "\tinj">;
1069
1070}
1071
1072// Allow uppercase and lowercase keyword arguments for BRB IALL and BRB INJ
1073def : TokenAlias<"INJ", "inj">;
1074def : TokenAlias<"IALL", "iall">;
1075
1076
1077// ARMv9.4-A Guarded Control Stack
1078class GCSNoOp<bits<3> op2, string mnemonic>
1079    : SimpleSystemI<0, (ins), mnemonic, "">, Sched<[]> {
1080  let Inst{20-8} = 0b0100001110111;
1081  let Inst{7-5} = op2;
1082  let Predicates = [HasGCS];
1083}
1084def GCSPUSHX : GCSNoOp<0b100, "gcspushx">;
1085def GCSPOPCX : GCSNoOp<0b101, "gcspopcx">;
1086def GCSPOPX  : GCSNoOp<0b110, "gcspopx">;
1087
1088class GCSRtIn<bits<3> op1, bits<3> op2, string mnemonic,
1089            list<dag> pattern = []>
1090    : RtSystemI<0, (outs), (ins GPR64:$Rt), mnemonic, "\t$Rt", pattern> {
1091  let Inst{20-19} = 0b01;
1092  let Inst{18-16} = op1;
1093  let Inst{15-8} = 0b01110111;
1094  let Inst{7-5} = op2;
1095  let Predicates = [HasGCS];
1096}
1097
1098def GCSSS1   : GCSRtIn<0b011, 0b010, "gcsss1">;
1099def GCSPUSHM : GCSRtIn<0b011, 0b000, "gcspushm">;
1100
1101class GCSRtOut<bits<3> op1, bits<3> op2, string mnemonic,
1102            list<dag> pattern = []>
1103    : RtSystemI<1, (outs GPR64:$Rt), (ins), mnemonic, "\t$Rt", pattern> {
1104  let Inst{20-19} = 0b01;
1105  let Inst{18-16} = op1;
1106  let Inst{15-8} = 0b01110111;
1107  let Inst{7-5} = op2;
1108  let Predicates = [HasGCS];
1109}
1110
1111def GCSSS2  : GCSRtOut<0b011, 0b011, "gcsss2">;
1112def GCSPOPM : GCSRtOut<0b011, 0b001, "gcspopm">;
1113def GCSPOPM_NoOp : InstAlias<"gcspopm", (GCSPOPM XZR)>, Requires<[HasGCS]>; // Rt defaults to XZR if absent
1114
1115def GCSB_DSYNC_disable : InstAlias<"gcsb\tdsync", (HINT 19), 0>;
1116def GCSB_DSYNC         : InstAlias<"gcsb\tdsync", (HINT 19), 1>, Requires<[HasGCS]>;
1117
1118def : TokenAlias<"DSYNC", "dsync">;
1119
1120let Uses = [X16], Defs = [X16], CRm = 0b0101 in {
1121  def CHKFEAT   : SystemNoOperands<0b000, "hint\t#40">;
1122}
1123def : InstAlias<"chkfeat\tx16", (CHKFEAT), 0>;
1124def : InstAlias<"chkfeat\tx16", (CHKFEAT), 1>, Requires<[HasCHK]>;
1125
1126class GCSSt<string mnemonic, bits<3> op>
1127    : I<(outs), (ins GPR64:$Rt, GPR64sp:$Rn), mnemonic, "\t$Rt, $Rn", "", []>, Sched<[]> {
1128  bits<5> Rt;
1129  bits<5> Rn;
1130  let Inst{31-15} = 0b11011001000111110;
1131  let Inst{14-12} = op;
1132  let Inst{11-10} = 0b11;
1133  let Inst{9-5} = Rn;
1134  let Inst{4-0} = Rt;
1135  let Predicates = [HasGCS];
1136}
1137def GCSSTR  : GCSSt<"gcsstr",  0b000>;
1138def GCSSTTR : GCSSt<"gcssttr", 0b001>;
1139
1140
1141// ARMv8.2-A Dot Product
1142let Predicates = [HasDotProd] in {
1143defm SDOT : SIMDThreeSameVectorDot<0, 0, "sdot", AArch64sdot>;
1144defm UDOT : SIMDThreeSameVectorDot<1, 0, "udot", AArch64udot>;
1145defm SDOTlane : SIMDThreeSameVectorDotIndex<0, 0, 0b10, "sdot", AArch64sdot>;
1146defm UDOTlane : SIMDThreeSameVectorDotIndex<1, 0, 0b10, "udot", AArch64udot>;
1147}
1148
1149// ARMv8.6-A BFloat
1150let Predicates = [HasNEON, HasBF16] in {
1151defm BFDOT       : SIMDThreeSameVectorBFDot<1, "bfdot">;
1152defm BF16DOTlane : SIMDThreeSameVectorBF16DotI<0, "bfdot">;
1153def BFMMLA       : SIMDThreeSameVectorBF16MatrixMul<"bfmmla">;
1154def BFMLALB      : SIMDBF16MLAL<0, "bfmlalb", int_aarch64_neon_bfmlalb>;
1155def BFMLALT      : SIMDBF16MLAL<1, "bfmlalt", int_aarch64_neon_bfmlalt>;
1156def BFMLALBIdx   : SIMDBF16MLALIndex<0, "bfmlalb", int_aarch64_neon_bfmlalb>;
1157def BFMLALTIdx   : SIMDBF16MLALIndex<1, "bfmlalt", int_aarch64_neon_bfmlalt>;
1158def BFCVTN       : SIMD_BFCVTN;
1159def BFCVTN2      : SIMD_BFCVTN2;
1160
1161// Vector-scalar BFDOT:
1162// The second source operand of the 64-bit variant of BF16DOTlane is a 128-bit
1163// register (the instruction uses a single 32-bit lane from it), so the pattern
1164// is a bit tricky.
1165def : Pat<(v2f32 (int_aarch64_neon_bfdot
1166                    (v2f32 V64:$Rd), (v4bf16 V64:$Rn),
1167                    (v4bf16 (bitconvert
1168                      (v2i32 (AArch64duplane32
1169                        (v4i32 (bitconvert
1170                          (v8bf16 (insert_subvector undef,
1171                            (v4bf16 V64:$Rm),
1172                            (i64 0))))),
1173                        VectorIndexS:$idx)))))),
1174          (BF16DOTlanev4bf16 (v2f32 V64:$Rd), (v4bf16 V64:$Rn),
1175                             (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
1176                             VectorIndexS:$idx)>;
1177}
1178
1179let Predicates = [HasNEONorSME, HasBF16] in {
1180def BFCVT : BF16ToSinglePrecision<"bfcvt">;
1181}
1182
1183// ARMv8.6A AArch64 matrix multiplication
1184let Predicates = [HasMatMulInt8] in {
1185def  SMMLA : SIMDThreeSameVectorMatMul<0, 0, "smmla", int_aarch64_neon_smmla>;
1186def  UMMLA : SIMDThreeSameVectorMatMul<0, 1, "ummla", int_aarch64_neon_ummla>;
1187def USMMLA : SIMDThreeSameVectorMatMul<1, 0, "usmmla", int_aarch64_neon_usmmla>;
1188defm USDOT : SIMDThreeSameVectorDot<0, 1, "usdot", int_aarch64_neon_usdot>;
1189defm USDOTlane : SIMDThreeSameVectorDotIndex<0, 1, 0b10, "usdot", int_aarch64_neon_usdot>;
1190
1191// sudot lane has a pattern where usdot is expected (there is no sudot).
1192// The second operand is used in the dup operation to repeat the indexed
1193// element.
1194class BaseSIMDSUDOTIndex<bit Q, string dst_kind, string lhs_kind,
1195                         string rhs_kind, RegisterOperand RegType,
1196                         ValueType AccumType, ValueType InputType>
1197      : BaseSIMDThreeSameVectorDotIndex<Q, 0, 1, 0b00, "sudot", dst_kind,
1198                                        lhs_kind, rhs_kind, RegType, AccumType,
1199                                        InputType, null_frag> {
1200  let Pattern = [(set (AccumType RegType:$dst),
1201                      (AccumType (int_aarch64_neon_usdot (AccumType RegType:$Rd),
1202                                 (InputType (bitconvert (AccumType
1203                                    (AArch64duplane32 (v4i32 V128:$Rm),
1204                                        VectorIndexS:$idx)))),
1205                                 (InputType RegType:$Rn))))];
1206}
1207
1208multiclass SIMDSUDOTIndex {
1209  def v8i8  : BaseSIMDSUDOTIndex<0, ".2s", ".8b", ".4b", V64, v2i32, v8i8>;
1210  def v16i8 : BaseSIMDSUDOTIndex<1, ".4s", ".16b", ".4b", V128, v4i32, v16i8>;
1211}
1212
1213defm SUDOTlane : SIMDSUDOTIndex;
1214
1215}
1216
1217// ARMv8.2-A FP16 Fused Multiply-Add Long
1218let Predicates = [HasNEON, HasFP16FML] in {
1219defm FMLAL      : SIMDThreeSameVectorFML<0, 1, 0b001, "fmlal", int_aarch64_neon_fmlal>;
1220defm FMLSL      : SIMDThreeSameVectorFML<0, 1, 0b101, "fmlsl", int_aarch64_neon_fmlsl>;
1221defm FMLAL2     : SIMDThreeSameVectorFML<1, 0, 0b001, "fmlal2", int_aarch64_neon_fmlal2>;
1222defm FMLSL2     : SIMDThreeSameVectorFML<1, 0, 0b101, "fmlsl2", int_aarch64_neon_fmlsl2>;
1223defm FMLALlane  : SIMDThreeSameVectorFMLIndex<0, 0b0000, "fmlal", int_aarch64_neon_fmlal>;
1224defm FMLSLlane  : SIMDThreeSameVectorFMLIndex<0, 0b0100, "fmlsl", int_aarch64_neon_fmlsl>;
1225defm FMLAL2lane : SIMDThreeSameVectorFMLIndex<1, 0b1000, "fmlal2", int_aarch64_neon_fmlal2>;
1226defm FMLSL2lane : SIMDThreeSameVectorFMLIndex<1, 0b1100, "fmlsl2", int_aarch64_neon_fmlsl2>;
1227}
1228
1229// Armv8.2-A Crypto extensions
1230let Predicates = [HasSHA3] in {
1231def SHA512H   : CryptoRRRTied<0b0, 0b00, "sha512h">;
1232def SHA512H2  : CryptoRRRTied<0b0, 0b01, "sha512h2">;
1233def SHA512SU0 : CryptoRRTied_2D<0b0, 0b00, "sha512su0">;
1234def SHA512SU1 : CryptoRRRTied_2D<0b0, 0b10, "sha512su1">;
1235def RAX1      : CryptoRRR_2D<0b0,0b11, "rax1">;
1236def EOR3      : CryptoRRRR_16B<0b00, "eor3">;
1237def BCAX      : CryptoRRRR_16B<0b01, "bcax">;
1238def XAR       : CryptoRRRi6<"xar">;
1239
1240class SHA3_pattern<Instruction INST, Intrinsic OpNode, ValueType VecTy>
1241  : Pat<(VecTy (OpNode (VecTy V128:$Vd), (VecTy V128:$Vn), (VecTy V128:$Vm))),
1242        (INST (VecTy V128:$Vd), (VecTy V128:$Vn), (VecTy V128:$Vm))>;
1243
1244def : Pat<(v2i64 (int_aarch64_crypto_sha512su0 (v2i64 V128:$Vn), (v2i64 V128:$Vm))),
1245          (SHA512SU0 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>;
1246
1247def : SHA3_pattern<SHA512H, int_aarch64_crypto_sha512h, v2i64>;
1248def : SHA3_pattern<SHA512H2, int_aarch64_crypto_sha512h2, v2i64>;
1249def : SHA3_pattern<SHA512SU1, int_aarch64_crypto_sha512su1, v2i64>;
1250
1251def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v16i8>;
1252def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v8i16>;
1253def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v4i32>;
1254def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v2i64>;
1255
1256class EOR3_pattern<ValueType VecTy>
1257  : Pat<(xor (xor (VecTy V128:$Vn), (VecTy V128:$Vm)), (VecTy V128:$Va)),
1258        (EOR3 (VecTy V128:$Vn), (VecTy V128:$Vm), (VecTy V128:$Va))>;
1259
1260def : EOR3_pattern<v16i8>;
1261def : EOR3_pattern<v8i16>;
1262def : EOR3_pattern<v4i32>;
1263def : EOR3_pattern<v2i64>;
1264
1265class BCAX_pattern<ValueType VecTy>
1266  : Pat<(xor (VecTy V128:$Vn), (and (VecTy V128:$Vm), (vnot (VecTy V128:$Va)))),
1267        (BCAX (VecTy V128:$Vn), (VecTy V128:$Vm), (VecTy V128:$Va))>;
1268
1269def : BCAX_pattern<v16i8>;
1270def : BCAX_pattern<v8i16>;
1271def : BCAX_pattern<v4i32>;
1272def : BCAX_pattern<v2i64>;
1273
1274def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v16i8>;
1275def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v8i16>;
1276def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v4i32>;
1277def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v2i64>;
1278
1279def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v16i8>;
1280def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v8i16>;
1281def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v4i32>;
1282def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v2i64>;
1283
1284def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v16i8>;
1285def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v8i16>;
1286def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v4i32>;
1287def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v2i64>;
1288
1289def : Pat<(v2i64 (int_aarch64_crypto_rax1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))),
1290          (RAX1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>;
1291
1292def : Pat<(v2i64 (int_aarch64_crypto_xar (v2i64 V128:$Vn), (v2i64 V128:$Vm), (i64 timm0_63:$imm))),
1293          (XAR (v2i64 V128:$Vn), (v2i64 V128:$Vm), (timm0_63:$imm))>;
1294
1295def : Pat<(xor  (v2i64 V128:$Vn), (or (AArch64vlshr (v2i64 V128:$Vm), (i32 63)), (AArch64vshl (v2i64 V128:$Vm), (i32 1)))),
1296          (RAX1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>;
1297
1298} // HasSHA3
1299
1300let Predicates = [HasSM4] in {
1301def SM3TT1A   : CryptoRRRi2Tied<0b0, 0b00, "sm3tt1a">;
1302def SM3TT1B   : CryptoRRRi2Tied<0b0, 0b01, "sm3tt1b">;
1303def SM3TT2A   : CryptoRRRi2Tied<0b0, 0b10, "sm3tt2a">;
1304def SM3TT2B   : CryptoRRRi2Tied<0b0, 0b11, "sm3tt2b">;
1305def SM3SS1    : CryptoRRRR_4S<0b10, "sm3ss1">;
1306def SM3PARTW1 : CryptoRRRTied_4S<0b1, 0b00, "sm3partw1">;
1307def SM3PARTW2 : CryptoRRRTied_4S<0b1, 0b01, "sm3partw2">;
1308def SM4ENCKEY : CryptoRRR_4S<0b1, 0b10, "sm4ekey">;
1309def SM4E      : CryptoRRTied_4S<0b0, 0b01, "sm4e">;
1310
1311def : Pat<(v4i32 (int_aarch64_crypto_sm3ss1 (v4i32 V128:$Vn), (v4i32 V128:$Vm), (v4i32 V128:$Va))),
1312          (SM3SS1 (v4i32 V128:$Vn), (v4i32 V128:$Vm), (v4i32 V128:$Va))>;
1313
1314class SM3PARTW_pattern<Instruction INST, Intrinsic OpNode>
1315  : Pat<(v4i32 (OpNode (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm))),
1316        (INST (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm))>;
1317
1318class SM3TT_pattern<Instruction INST, Intrinsic OpNode>
1319  : Pat<(v4i32 (OpNode (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm), (i64 VectorIndexS_timm:$imm) )),
1320        (INST (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm), (VectorIndexS_timm:$imm))>;
1321
1322class SM4_pattern<Instruction INST, Intrinsic OpNode>
1323  : Pat<(v4i32 (OpNode (v4i32 V128:$Vn), (v4i32 V128:$Vm))),
1324        (INST (v4i32 V128:$Vn), (v4i32 V128:$Vm))>;
1325
1326def : SM3PARTW_pattern<SM3PARTW1, int_aarch64_crypto_sm3partw1>;
1327def : SM3PARTW_pattern<SM3PARTW2, int_aarch64_crypto_sm3partw2>;
1328
1329def : SM3TT_pattern<SM3TT1A, int_aarch64_crypto_sm3tt1a>;
1330def : SM3TT_pattern<SM3TT1B, int_aarch64_crypto_sm3tt1b>;
1331def : SM3TT_pattern<SM3TT2A, int_aarch64_crypto_sm3tt2a>;
1332def : SM3TT_pattern<SM3TT2B, int_aarch64_crypto_sm3tt2b>;
1333
1334def : SM4_pattern<SM4ENCKEY, int_aarch64_crypto_sm4ekey>;
1335def : SM4_pattern<SM4E, int_aarch64_crypto_sm4e>;
1336} // HasSM4
1337
1338let Predicates = [HasRCPC] in {
1339  // v8.3 Release Consistent Processor Consistent support, optional in v8.2.
1340  def LDAPRB  : RCPCLoad<0b00, "ldaprb", GPR32>;
1341  def LDAPRH  : RCPCLoad<0b01, "ldaprh", GPR32>;
1342  def LDAPRW  : RCPCLoad<0b10, "ldapr", GPR32>;
1343  def LDAPRX  : RCPCLoad<0b11, "ldapr", GPR64>;
1344}
1345
1346// v8.3a complex add and multiply-accumulate. No predicate here, that is done
1347// inside the multiclass as the FP16 versions need different predicates.
1348defm FCMLA : SIMDThreeSameVectorTiedComplexHSD<1, 0b110, complexrotateop,
1349                                               "fcmla", null_frag>;
1350defm FCADD : SIMDThreeSameVectorComplexHSD<1, 0b111, complexrotateopodd,
1351                                           "fcadd", null_frag>;
1352defm FCMLA : SIMDIndexedTiedComplexHSD<0, 1, complexrotateop, "fcmla">;
1353
1354let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in {
1355  def : Pat<(v4f16 (int_aarch64_neon_vcadd_rot90 (v4f16 V64:$Rn), (v4f16 V64:$Rm))),
1356            (FCADDv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm), (i32 0))>;
1357  def : Pat<(v4f16 (int_aarch64_neon_vcadd_rot270 (v4f16 V64:$Rn), (v4f16 V64:$Rm))),
1358            (FCADDv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm), (i32 1))>;
1359  def : Pat<(v8f16 (int_aarch64_neon_vcadd_rot90 (v8f16 V128:$Rn), (v8f16 V128:$Rm))),
1360            (FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 0))>;
1361  def : Pat<(v8f16 (int_aarch64_neon_vcadd_rot270 (v8f16 V128:$Rn), (v8f16 V128:$Rm))),
1362            (FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 1))>;
1363}
1364
1365let Predicates = [HasComplxNum, HasNEON] in {
1366  def : Pat<(v2f32 (int_aarch64_neon_vcadd_rot90 (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
1367            (FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 0))>;
1368  def : Pat<(v2f32 (int_aarch64_neon_vcadd_rot270 (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
1369            (FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 1))>;
1370  foreach Ty = [v4f32, v2f64] in {
1371    def : Pat<(Ty (int_aarch64_neon_vcadd_rot90 (Ty V128:$Rn), (Ty V128:$Rm))),
1372              (!cast<Instruction>("FCADD"#Ty) (Ty V128:$Rn), (Ty V128:$Rm), (i32 0))>;
1373    def : Pat<(Ty (int_aarch64_neon_vcadd_rot270 (Ty V128:$Rn), (Ty V128:$Rm))),
1374              (!cast<Instruction>("FCADD"#Ty) (Ty V128:$Rn), (Ty V128:$Rm), (i32 1))>;
1375  }
1376}
1377
1378multiclass FCMLA_PATS<ValueType ty, DAGOperand Reg> {
1379  def : Pat<(ty (int_aarch64_neon_vcmla_rot0 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
1380            (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 0)>;
1381  def : Pat<(ty (int_aarch64_neon_vcmla_rot90 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
1382            (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 1)>;
1383  def : Pat<(ty (int_aarch64_neon_vcmla_rot180 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
1384            (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 2)>;
1385  def : Pat<(ty (int_aarch64_neon_vcmla_rot270 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
1386            (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 3)>;
1387}
1388
1389multiclass FCMLA_LANE_PATS<ValueType ty, DAGOperand Reg, dag RHSDup> {
1390  def : Pat<(ty (int_aarch64_neon_vcmla_rot0 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
1391            (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 0)>;
1392  def : Pat<(ty (int_aarch64_neon_vcmla_rot90 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
1393            (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 1)>;
1394  def : Pat<(ty (int_aarch64_neon_vcmla_rot180 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
1395            (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 2)>;
1396  def : Pat<(ty (int_aarch64_neon_vcmla_rot270 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
1397            (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 3)>;
1398}
1399
1400
1401let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in {
1402  defm : FCMLA_PATS<v4f16, V64>;
1403  defm : FCMLA_PATS<v8f16, V128>;
1404
1405  defm : FCMLA_LANE_PATS<v4f16, V64,
1406                         (v4f16 (bitconvert (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexD:$idx))))>;
1407  defm : FCMLA_LANE_PATS<v8f16, V128,
1408                         (v8f16 (bitconvert (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))>;
1409}
1410let Predicates = [HasComplxNum, HasNEON] in {
1411  defm : FCMLA_PATS<v2f32, V64>;
1412  defm : FCMLA_PATS<v4f32, V128>;
1413  defm : FCMLA_PATS<v2f64, V128>;
1414
1415  defm : FCMLA_LANE_PATS<v4f32, V128,
1416                         (v4f32 (bitconvert (v2i64 (AArch64duplane64 (v2i64 V128:$Rm), VectorIndexD:$idx))))>;
1417}
1418
1419// v8.3a Pointer Authentication
1420// These instructions inhabit part of the hint space and so can be used for
1421// armv8 targets. Keeping the old HINT mnemonic when compiling without PA is
1422// important for compatibility with other assemblers (e.g. GAS) when building
1423// software compatible with both CPUs that do or don't implement PA.
1424let Uses = [LR], Defs = [LR] in {
1425  def PACIAZ   : SystemNoOperands<0b000, "hint\t#24">;
1426  def PACIBZ   : SystemNoOperands<0b010, "hint\t#26">;
1427  let isAuthenticated = 1 in {
1428    def AUTIAZ   : SystemNoOperands<0b100, "hint\t#28">;
1429    def AUTIBZ   : SystemNoOperands<0b110, "hint\t#30">;
1430  }
1431}
1432let Uses = [LR, SP], Defs = [LR] in {
1433  def PACIASP  : SystemNoOperands<0b001, "hint\t#25">;
1434  def PACIBSP  : SystemNoOperands<0b011, "hint\t#27">;
1435  let isAuthenticated = 1 in {
1436    def AUTIASP  : SystemNoOperands<0b101, "hint\t#29">;
1437    def AUTIBSP  : SystemNoOperands<0b111, "hint\t#31">;
1438  }
1439}
1440let Uses = [X16, X17], Defs = [X17], CRm = 0b0001 in {
1441  def PACIA1716  : SystemNoOperands<0b000, "hint\t#8">;
1442  def PACIB1716  : SystemNoOperands<0b010, "hint\t#10">;
1443  let isAuthenticated = 1 in {
1444    def AUTIA1716  : SystemNoOperands<0b100, "hint\t#12">;
1445    def AUTIB1716  : SystemNoOperands<0b110, "hint\t#14">;
1446  }
1447}
1448
1449let Uses = [LR], Defs = [LR], CRm = 0b0000 in {
1450  def XPACLRI   : SystemNoOperands<0b111, "hint\t#7">;
1451}
1452
1453// In order to be able to write readable assembly, LLVM should accept assembly
1454// inputs that use pointer authentication mnemonics, even with PA disabled.
1455// However, in order to be compatible with other assemblers (e.g. GAS), LLVM
1456// should not emit these mnemonics unless PA is enabled.
1457def : InstAlias<"paciaz", (PACIAZ), 0>;
1458def : InstAlias<"pacibz", (PACIBZ), 0>;
1459def : InstAlias<"autiaz", (AUTIAZ), 0>;
1460def : InstAlias<"autibz", (AUTIBZ), 0>;
1461def : InstAlias<"paciasp", (PACIASP), 0>;
1462def : InstAlias<"pacibsp", (PACIBSP), 0>;
1463def : InstAlias<"autiasp", (AUTIASP), 0>;
1464def : InstAlias<"autibsp", (AUTIBSP), 0>;
1465def : InstAlias<"pacia1716", (PACIA1716), 0>;
1466def : InstAlias<"pacib1716", (PACIB1716), 0>;
1467def : InstAlias<"autia1716", (AUTIA1716), 0>;
1468def : InstAlias<"autib1716", (AUTIB1716), 0>;
1469def : InstAlias<"xpaclri", (XPACLRI), 0>;
1470
1471// These pointer authentication instructions require armv8.3a
1472let Predicates = [HasPAuth] in {
1473
1474  // When PA is enabled, a better mnemonic should be emitted.
1475  def : InstAlias<"paciaz", (PACIAZ), 1>;
1476  def : InstAlias<"pacibz", (PACIBZ), 1>;
1477  def : InstAlias<"autiaz", (AUTIAZ), 1>;
1478  def : InstAlias<"autibz", (AUTIBZ), 1>;
1479  def : InstAlias<"paciasp", (PACIASP), 1>;
1480  def : InstAlias<"pacibsp", (PACIBSP), 1>;
1481  def : InstAlias<"autiasp", (AUTIASP), 1>;
1482  def : InstAlias<"autibsp", (AUTIBSP), 1>;
1483  def : InstAlias<"pacia1716", (PACIA1716), 1>;
1484  def : InstAlias<"pacib1716", (PACIB1716), 1>;
1485  def : InstAlias<"autia1716", (AUTIA1716), 1>;
1486  def : InstAlias<"autib1716", (AUTIB1716), 1>;
1487  def : InstAlias<"xpaclri", (XPACLRI), 1>;
1488
1489  multiclass SignAuth<bits<3> prefix, bits<3> prefix_z, string asm,
1490                      SDPatternOperator op> {
1491    def IA   : SignAuthOneData<prefix, 0b00, !strconcat(asm,  "ia"), op>;
1492    def IB   : SignAuthOneData<prefix, 0b01, !strconcat(asm,  "ib"), op>;
1493    def DA   : SignAuthOneData<prefix, 0b10, !strconcat(asm,  "da"), op>;
1494    def DB   : SignAuthOneData<prefix, 0b11, !strconcat(asm,  "db"), op>;
1495    def IZA  : SignAuthZero<prefix_z,  0b00, !strconcat(asm, "iza"), op>;
1496    def DZA  : SignAuthZero<prefix_z,  0b10, !strconcat(asm, "dza"), op>;
1497    def IZB  : SignAuthZero<prefix_z,  0b01, !strconcat(asm, "izb"), op>;
1498    def DZB  : SignAuthZero<prefix_z,  0b11, !strconcat(asm, "dzb"), op>;
1499  }
1500
1501  defm PAC : SignAuth<0b000, 0b010, "pac", int_ptrauth_sign>;
1502  defm AUT : SignAuth<0b001, 0b011, "aut", null_frag>;
1503
1504  def XPACI : ClearAuth<0, "xpaci">;
1505  def : Pat<(int_ptrauth_strip GPR64:$Rd, 0), (XPACI GPR64:$Rd)>;
1506  def : Pat<(int_ptrauth_strip GPR64:$Rd, 1), (XPACI GPR64:$Rd)>;
1507
1508  def XPACD : ClearAuth<1, "xpacd">;
1509  def : Pat<(int_ptrauth_strip GPR64:$Rd, 2), (XPACD GPR64:$Rd)>;
1510  def : Pat<(int_ptrauth_strip GPR64:$Rd, 3), (XPACD GPR64:$Rd)>;
1511
1512  def PACGA : SignAuthTwoOperand<0b1100, "pacga", int_ptrauth_sign_generic>;
1513
1514  // Combined Instructions
1515  let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1  in {
1516    def BRAA    : AuthBranchTwoOperands<0, 0, "braa">;
1517    def BRAB    : AuthBranchTwoOperands<0, 1, "brab">;
1518  }
1519  let isCall = 1, Defs = [LR], Uses = [SP] in {
1520    def BLRAA   : AuthBranchTwoOperands<1, 0, "blraa">;
1521    def BLRAB   : AuthBranchTwoOperands<1, 1, "blrab">;
1522  }
1523
1524  let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1  in {
1525    def BRAAZ   : AuthOneOperand<0b000, 0, "braaz">;
1526    def BRABZ   : AuthOneOperand<0b000, 1, "brabz">;
1527  }
1528  let isCall = 1, Defs = [LR], Uses = [SP] in {
1529    def BLRAAZ  : AuthOneOperand<0b001, 0, "blraaz">;
1530    def BLRABZ  : AuthOneOperand<0b001, 1, "blrabz">;
1531  }
1532
1533  let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
1534    def RETAA   : AuthReturn<0b010, 0, "retaa">;
1535    def RETAB   : AuthReturn<0b010, 1, "retab">;
1536    def ERETAA  : AuthReturn<0b100, 0, "eretaa">;
1537    def ERETAB  : AuthReturn<0b100, 1, "eretab">;
1538  }
1539
1540  defm LDRAA  : AuthLoad<0, "ldraa", simm10Scaled>;
1541  defm LDRAB  : AuthLoad<1, "ldrab", simm10Scaled>;
1542
1543}
1544
1545// v8.3a floating point conversion for javascript
1546let Predicates = [HasJS, HasFPARMv8], Defs = [NZCV] in
1547def FJCVTZS  : BaseFPToIntegerUnscaled<0b01, 0b11, 0b110, FPR64, GPR32,
1548                                      "fjcvtzs",
1549                                      [(set GPR32:$Rd,
1550                                         (int_aarch64_fjcvtzs FPR64:$Rn))]> {
1551  let Inst{31} = 0;
1552} // HasJS, HasFPARMv8
1553
1554// v8.4 Flag manipulation instructions
1555let Predicates = [HasFlagM], Defs = [NZCV], Uses = [NZCV] in {
1556def CFINV : SimpleSystemI<0, (ins), "cfinv", "">, Sched<[WriteSys]> {
1557  let Inst{20-5} = 0b0000001000000000;
1558}
1559def SETF8  : BaseFlagManipulation<0, 0, (ins GPR32:$Rn), "setf8", "{\t$Rn}">;
1560def SETF16 : BaseFlagManipulation<0, 1, (ins GPR32:$Rn), "setf16", "{\t$Rn}">;
1561def RMIF   : FlagRotate<(ins GPR64:$Rn, uimm6:$imm, imm0_15:$mask), "rmif",
1562                        "{\t$Rn, $imm, $mask}">;
1563} // HasFlagM
1564
1565// v8.5 flag manipulation instructions
1566let Predicates = [HasAltNZCV], Uses = [NZCV], Defs = [NZCV] in {
1567
1568def XAFLAG : PstateWriteSimple<(ins), "xaflag", "">, Sched<[WriteSys]> {
1569  let Inst{18-16} = 0b000;
1570  let Inst{11-8} = 0b0000;
1571  let Unpredictable{11-8} = 0b1111;
1572  let Inst{7-5} = 0b001;
1573}
1574
1575def AXFLAG : PstateWriteSimple<(ins), "axflag", "">, Sched<[WriteSys]> {
1576  let Inst{18-16} = 0b000;
1577  let Inst{11-8} = 0b0000;
1578  let Unpredictable{11-8} = 0b1111;
1579  let Inst{7-5} = 0b010;
1580}
1581} // HasAltNZCV
1582
1583
1584// Armv8.5-A speculation barrier
1585def SB : SimpleSystemI<0, (ins), "sb", "">, Sched<[]> {
1586  let Inst{20-5} = 0b0001100110000111;
1587  let Unpredictable{11-8} = 0b1111;
1588  let Predicates = [HasSB];
1589  let hasSideEffects = 1;
1590}
1591
1592def : InstAlias<"clrex", (CLREX 0xf)>;
1593def : InstAlias<"isb", (ISB 0xf)>;
1594def : InstAlias<"ssbb", (DSB 0)>;
1595def : InstAlias<"pssbb", (DSB 4)>;
1596def : InstAlias<"dfb", (DSB 0b1100)>, Requires<[HasV8_0r]>;
1597
1598def MRS    : MRSI;
1599def MSR    : MSRI;
1600def MSRpstateImm1 : MSRpstateImm0_1;
1601def MSRpstateImm4 : MSRpstateImm0_15;
1602
1603def : Pat<(AArch64mrs imm:$id),
1604          (MRS imm:$id)>;
1605
1606// The thread pointer (on Linux, at least, where this has been implemented) is
1607// TPIDR_EL0.
1608def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins),
1609                       [(set GPR64:$dst, AArch64threadpointer)]>, Sched<[WriteSys]>;
1610
1611// This gets lowered into a 24-byte instruction sequence
1612let Defs = [ X9, X16, X17, NZCV ], Size = 24 in {
1613def KCFI_CHECK : Pseudo<
1614  (outs), (ins GPR64:$ptr, i32imm:$type), []>, Sched<[]>;
1615}
1616
1617let Uses = [ X9 ], Defs = [ X16, X17, LR, NZCV ] in {
1618def HWASAN_CHECK_MEMACCESS : Pseudo<
1619  (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo),
1620  [(int_hwasan_check_memaccess X9, GPR64noip:$ptr, (i32 timm:$accessinfo))]>,
1621  Sched<[]>;
1622}
1623
1624let Uses = [ X20 ], Defs = [ X16, X17, LR, NZCV ] in {
1625def HWASAN_CHECK_MEMACCESS_SHORTGRANULES : Pseudo<
1626  (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo),
1627  [(int_hwasan_check_memaccess_shortgranules X20, GPR64noip:$ptr, (i32 timm:$accessinfo))]>,
1628  Sched<[]>;
1629}
1630
1631// The virtual cycle counter register is CNTVCT_EL0.
1632def : Pat<(readcyclecounter), (MRS 0xdf02)>;
1633
1634// FPCR register
1635let Uses = [FPCR] in
1636def MRS_FPCR : Pseudo<(outs GPR64:$dst), (ins),
1637                      [(set GPR64:$dst, (int_aarch64_get_fpcr))]>,
1638               PseudoInstExpansion<(MRS GPR64:$dst, 0xda20)>,
1639               Sched<[WriteSys]>;
1640let Defs = [FPCR] in
1641def MSR_FPCR : Pseudo<(outs), (ins GPR64:$val),
1642                      [(int_aarch64_set_fpcr i64:$val)]>,
1643               PseudoInstExpansion<(MSR 0xda20, GPR64:$val)>,
1644               Sched<[WriteSys]>;
1645
1646// Generic system instructions
1647def SYSxt  : SystemXtI<0, "sys">;
1648def SYSLxt : SystemLXtI<1, "sysl">;
1649
1650def : InstAlias<"sys $op1, $Cn, $Cm, $op2",
1651                (SYSxt imm0_7:$op1, sys_cr_op:$Cn,
1652                 sys_cr_op:$Cm, imm0_7:$op2, XZR)>;
1653
1654
1655let Predicates = [HasTME] in {
1656
1657def TSTART : TMSystemI<0b0000, "tstart",
1658                      [(set GPR64:$Rt, (int_aarch64_tstart))]>;
1659
1660def TCOMMIT : TMSystemINoOperand<0b0000, "tcommit", [(int_aarch64_tcommit)]>;
1661
1662def TCANCEL : TMSystemException<0b011, "tcancel",
1663                                [(int_aarch64_tcancel timm64_0_65535:$imm)]>;
1664
1665def TTEST : TMSystemI<0b0001, "ttest", [(set GPR64:$Rt, (int_aarch64_ttest))]> {
1666  let mayLoad = 0;
1667  let mayStore = 0;
1668}
1669} // HasTME
1670
1671//===----------------------------------------------------------------------===//
1672// Move immediate instructions.
1673//===----------------------------------------------------------------------===//
1674
1675defm MOVK : InsertImmediate<0b11, "movk">;
1676defm MOVN : MoveImmediate<0b00, "movn">;
1677
1678let PostEncoderMethod = "fixMOVZ" in
1679defm MOVZ : MoveImmediate<0b10, "movz">;
1680
1681// First group of aliases covers an implicit "lsl #0".
1682def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, timm32_0_65535:$imm, 0), 0>;
1683def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, timm32_0_65535:$imm, 0), 0>;
1684def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, timm32_0_65535:$imm, 0)>;
1685def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, timm32_0_65535:$imm, 0)>;
1686def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, timm32_0_65535:$imm, 0)>;
1687def : InstAlias<"movz $dst, $imm", (MOVZXi GPR64:$dst, timm32_0_65535:$imm, 0)>;
1688
1689// Next, we have various ELF relocations with the ":XYZ_g0:sym" syntax.
1690def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g3:$sym, 48)>;
1691def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g2:$sym, 32)>;
1692def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g1:$sym, 16)>;
1693def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g0:$sym, 0)>;
1694
1695def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g3:$sym, 48)>;
1696def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g2:$sym, 32)>;
1697def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g1:$sym, 16)>;
1698def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g0:$sym, 0)>;
1699
1700def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g3:$sym, 48), 0>;
1701def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g2:$sym, 32), 0>;
1702def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g1:$sym, 16), 0>;
1703def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g0:$sym, 0), 0>;
1704
1705def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movw_symbol_g1:$sym, 16)>;
1706def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movw_symbol_g0:$sym, 0)>;
1707
1708def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movw_symbol_g1:$sym, 16)>;
1709def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movw_symbol_g0:$sym, 0)>;
1710
1711def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movw_symbol_g1:$sym, 16), 0>;
1712def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movw_symbol_g0:$sym, 0), 0>;
1713
1714// Final group of aliases covers true "mov $Rd, $imm" cases.
1715multiclass movw_mov_alias<string basename,Instruction INST, RegisterClass GPR,
1716                          int width, int shift> {
1717  def _asmoperand : AsmOperandClass {
1718    let Name = basename # width # "_lsl" # shift # "MovAlias";
1719    let PredicateMethod = "is" # basename # "MovAlias<" # width # ", "
1720                               # shift # ">";
1721    let RenderMethod = "add" # basename # "MovAliasOperands<" # shift # ">";
1722  }
1723
1724  def _movimm : Operand<i32> {
1725    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_asmoperand");
1726  }
1727
1728  def : InstAlias<"mov $Rd, $imm",
1729                  (INST GPR:$Rd, !cast<Operand>(NAME # "_movimm"):$imm, shift)>;
1730}
1731
1732defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 0>;
1733defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 16>;
1734
1735defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 0>;
1736defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 16>;
1737defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 32>;
1738defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 48>;
1739
1740defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 0>;
1741defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 16>;
1742
1743defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 0>;
1744defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 16>;
1745defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 32>;
1746defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 48>;
1747
1748let isReMaterializable = 1, isCodeGenOnly = 1, isMoveImm = 1,
1749    isAsCheapAsAMove = 1 in {
1750// FIXME: The following pseudo instructions are only needed because remat
1751// cannot handle multiple instructions.  When that changes, we can select
1752// directly to the real instructions and get rid of these pseudos.
1753
1754def MOVi32imm
1755    : Pseudo<(outs GPR32:$dst), (ins i32imm:$src),
1756             [(set GPR32:$dst, imm:$src)]>,
1757      Sched<[WriteImm]>;
1758def MOVi64imm
1759    : Pseudo<(outs GPR64:$dst), (ins i64imm:$src),
1760             [(set GPR64:$dst, imm:$src)]>,
1761      Sched<[WriteImm]>;
1762} // isReMaterializable, isCodeGenOnly
1763
1764// If possible, we want to use MOVi32imm even for 64-bit moves. This gives the
1765// eventual expansion code fewer bits to worry about getting right. Marshalling
1766// the types is a little tricky though:
1767def i64imm_32bit : ImmLeaf<i64, [{
1768  return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm);
1769}]>;
1770
1771def s64imm_32bit : ImmLeaf<i64, [{
1772  int64_t Imm64 = static_cast<int64_t>(Imm);
1773  return Imm64 >= std::numeric_limits<int32_t>::min() &&
1774         Imm64 <= std::numeric_limits<int32_t>::max();
1775}]>;
1776
1777def trunc_imm : SDNodeXForm<imm, [{
1778  return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i32);
1779}]>;
1780
1781def gi_trunc_imm : GICustomOperandRenderer<"renderTruncImm">,
1782  GISDNodeXFormEquiv<trunc_imm>;
1783
1784let Predicates = [OptimizedGISelOrOtherSelector] in {
1785// The SUBREG_TO_REG isn't eliminated at -O0, which can result in pointless
1786// copies.
1787def : Pat<(i64 i64imm_32bit:$src),
1788          (SUBREG_TO_REG (i64 0), (MOVi32imm (trunc_imm imm:$src)), sub_32)>;
1789}
1790
1791// Materialize FP constants via MOVi32imm/MOVi64imm (MachO large code model).
1792def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{
1793return CurDAG->getTargetConstant(
1794  N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32);
1795}]>;
1796
1797def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{
1798return CurDAG->getTargetConstant(
1799  N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64);
1800}]>;
1801
1802
1803def : Pat<(f32 fpimm:$in),
1804  (COPY_TO_REGCLASS (MOVi32imm (bitcast_fpimm_to_i32 f32:$in)), FPR32)>;
1805def : Pat<(f64 fpimm:$in),
1806  (COPY_TO_REGCLASS (MOVi64imm (bitcast_fpimm_to_i64 f64:$in)), FPR64)>;
1807
1808
1809// Deal with the various forms of (ELF) large addressing with MOVZ/MOVK
1810// sequences.
1811def : Pat<(AArch64WrapperLarge tglobaladdr:$g3, tglobaladdr:$g2,
1812                             tglobaladdr:$g1, tglobaladdr:$g0),
1813          (MOVKXi (MOVKXi (MOVKXi (MOVZXi tglobaladdr:$g0, 0),
1814                                  tglobaladdr:$g1, 16),
1815                          tglobaladdr:$g2, 32),
1816                  tglobaladdr:$g3, 48)>;
1817
1818def : Pat<(AArch64WrapperLarge tblockaddress:$g3, tblockaddress:$g2,
1819                             tblockaddress:$g1, tblockaddress:$g0),
1820          (MOVKXi (MOVKXi (MOVKXi (MOVZXi tblockaddress:$g0, 0),
1821                                  tblockaddress:$g1, 16),
1822                          tblockaddress:$g2, 32),
1823                  tblockaddress:$g3, 48)>;
1824
1825def : Pat<(AArch64WrapperLarge tconstpool:$g3, tconstpool:$g2,
1826                             tconstpool:$g1, tconstpool:$g0),
1827          (MOVKXi (MOVKXi (MOVKXi (MOVZXi tconstpool:$g0, 0),
1828                                  tconstpool:$g1, 16),
1829                          tconstpool:$g2, 32),
1830                  tconstpool:$g3, 48)>;
1831
1832def : Pat<(AArch64WrapperLarge tjumptable:$g3, tjumptable:$g2,
1833                             tjumptable:$g1, tjumptable:$g0),
1834          (MOVKXi (MOVKXi (MOVKXi (MOVZXi tjumptable:$g0, 0),
1835                                  tjumptable:$g1, 16),
1836                          tjumptable:$g2, 32),
1837                  tjumptable:$g3, 48)>;
1838
1839
1840//===----------------------------------------------------------------------===//
1841// Arithmetic instructions.
1842//===----------------------------------------------------------------------===//
1843
1844// Add/subtract with carry.
1845defm ADC : AddSubCarry<0, "adc", "adcs", AArch64adc, AArch64adc_flag>;
1846defm SBC : AddSubCarry<1, "sbc", "sbcs", AArch64sbc, AArch64sbc_flag>;
1847
1848def : InstAlias<"ngc $dst, $src",  (SBCWr  GPR32:$dst, WZR, GPR32:$src)>;
1849def : InstAlias<"ngc $dst, $src",  (SBCXr  GPR64:$dst, XZR, GPR64:$src)>;
1850def : InstAlias<"ngcs $dst, $src", (SBCSWr GPR32:$dst, WZR, GPR32:$src)>;
1851def : InstAlias<"ngcs $dst, $src", (SBCSXr GPR64:$dst, XZR, GPR64:$src)>;
1852
1853// Add/subtract
1854defm ADD : AddSub<0, "add", "sub", add>;
1855defm SUB : AddSub<1, "sub", "add">;
1856
1857def : InstAlias<"mov $dst, $src",
1858                (ADDWri GPR32sponly:$dst, GPR32sp:$src, 0, 0)>;
1859def : InstAlias<"mov $dst, $src",
1860                (ADDWri GPR32sp:$dst, GPR32sponly:$src, 0, 0)>;
1861def : InstAlias<"mov $dst, $src",
1862                (ADDXri GPR64sponly:$dst, GPR64sp:$src, 0, 0)>;
1863def : InstAlias<"mov $dst, $src",
1864                (ADDXri GPR64sp:$dst, GPR64sponly:$src, 0, 0)>;
1865
1866defm ADDS : AddSubS<0, "adds", AArch64add_flag, "cmn", "subs", "cmp">;
1867defm SUBS : AddSubS<1, "subs", AArch64sub_flag, "cmp", "adds", "cmn">;
1868
1869def copyFromSP: PatLeaf<(i64 GPR64:$src), [{
1870  return N->getOpcode() == ISD::CopyFromReg &&
1871         cast<RegisterSDNode>(N->getOperand(1))->getReg() == AArch64::SP;
1872}]>;
1873
1874// Use SUBS instead of SUB to enable CSE between SUBS and SUB.
1875def : Pat<(sub GPR32sp:$Rn, addsub_shifted_imm32:$imm),
1876          (SUBSWri GPR32sp:$Rn, addsub_shifted_imm32:$imm)>;
1877def : Pat<(sub GPR64sp:$Rn, addsub_shifted_imm64:$imm),
1878          (SUBSXri GPR64sp:$Rn, addsub_shifted_imm64:$imm)>;
1879def : Pat<(sub GPR32:$Rn, GPR32:$Rm),
1880          (SUBSWrr GPR32:$Rn, GPR32:$Rm)>;
1881def : Pat<(sub GPR64:$Rn, GPR64:$Rm),
1882          (SUBSXrr GPR64:$Rn, GPR64:$Rm)>;
1883def : Pat<(sub GPR32:$Rn, arith_shifted_reg32:$Rm),
1884          (SUBSWrs GPR32:$Rn, arith_shifted_reg32:$Rm)>;
1885def : Pat<(sub GPR64:$Rn, arith_shifted_reg64:$Rm),
1886          (SUBSXrs GPR64:$Rn, arith_shifted_reg64:$Rm)>;
1887let AddedComplexity = 1 in {
1888def : Pat<(sub GPR32sp:$R2, arith_extended_reg32_i32:$R3),
1889          (SUBSWrx GPR32sp:$R2, arith_extended_reg32_i32:$R3)>;
1890def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64_i64:$R3),
1891          (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64_i64:$R3)>;
1892def : Pat<(sub copyFromSP:$R2, (arith_uxtx GPR64:$R3, arith_extendlsl64:$imm)),
1893          (SUBXrx64 GPR64sp:$R2, GPR64:$R3, arith_extendlsl64:$imm)>;
1894}
1895
1896// Because of the immediate format for add/sub-imm instructions, the
1897// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1).
1898//  These patterns capture that transformation.
1899let AddedComplexity = 1 in {
1900def : Pat<(add GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
1901          (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
1902def : Pat<(add GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
1903          (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
1904def : Pat<(sub GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
1905          (ADDWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
1906def : Pat<(sub GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
1907          (ADDXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
1908}
1909
1910// Because of the immediate format for add/sub-imm instructions, the
1911// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1).
1912//  These patterns capture that transformation.
1913let AddedComplexity = 1 in {
1914def : Pat<(AArch64add_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
1915          (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
1916def : Pat<(AArch64add_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
1917          (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
1918def : Pat<(AArch64sub_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
1919          (ADDSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
1920def : Pat<(AArch64sub_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
1921          (ADDSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
1922}
1923
1924def : InstAlias<"neg $dst, $src", (SUBWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>;
1925def : InstAlias<"neg $dst, $src", (SUBXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>;
1926def : InstAlias<"neg $dst, $src$shift",
1927                (SUBWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>;
1928def : InstAlias<"neg $dst, $src$shift",
1929                (SUBXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>;
1930
1931def : InstAlias<"negs $dst, $src", (SUBSWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>;
1932def : InstAlias<"negs $dst, $src", (SUBSXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>;
1933def : InstAlias<"negs $dst, $src$shift",
1934                (SUBSWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>;
1935def : InstAlias<"negs $dst, $src$shift",
1936                (SUBSXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>;
1937
1938
1939// Unsigned/Signed divide
1940defm UDIV : Div<0, "udiv", udiv>;
1941defm SDIV : Div<1, "sdiv", sdiv>;
1942
1943def : Pat<(int_aarch64_udiv GPR32:$Rn, GPR32:$Rm), (UDIVWr GPR32:$Rn, GPR32:$Rm)>;
1944def : Pat<(int_aarch64_udiv GPR64:$Rn, GPR64:$Rm), (UDIVXr GPR64:$Rn, GPR64:$Rm)>;
1945def : Pat<(int_aarch64_sdiv GPR32:$Rn, GPR32:$Rm), (SDIVWr GPR32:$Rn, GPR32:$Rm)>;
1946def : Pat<(int_aarch64_sdiv GPR64:$Rn, GPR64:$Rm), (SDIVXr GPR64:$Rn, GPR64:$Rm)>;
1947
1948// Variable shift
1949defm ASRV : Shift<0b10, "asr", sra>;
1950defm LSLV : Shift<0b00, "lsl", shl>;
1951defm LSRV : Shift<0b01, "lsr", srl>;
1952defm RORV : Shift<0b11, "ror", rotr>;
1953
1954def : ShiftAlias<"asrv", ASRVWr, GPR32>;
1955def : ShiftAlias<"asrv", ASRVXr, GPR64>;
1956def : ShiftAlias<"lslv", LSLVWr, GPR32>;
1957def : ShiftAlias<"lslv", LSLVXr, GPR64>;
1958def : ShiftAlias<"lsrv", LSRVWr, GPR32>;
1959def : ShiftAlias<"lsrv", LSRVXr, GPR64>;
1960def : ShiftAlias<"rorv", RORVWr, GPR32>;
1961def : ShiftAlias<"rorv", RORVXr, GPR64>;
1962
1963// Multiply-add
1964let AddedComplexity = 5 in {
1965defm MADD : MulAccum<0, "madd">;
1966defm MSUB : MulAccum<1, "msub">;
1967
1968def : Pat<(i32 (mul GPR32:$Rn, GPR32:$Rm)),
1969          (MADDWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
1970def : Pat<(i64 (mul GPR64:$Rn, GPR64:$Rm)),
1971          (MADDXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
1972
1973def : Pat<(i32 (ineg (mul GPR32:$Rn, GPR32:$Rm))),
1974          (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
1975def : Pat<(i64 (ineg (mul GPR64:$Rn, GPR64:$Rm))),
1976          (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
1977def : Pat<(i32 (mul (ineg GPR32:$Rn), GPR32:$Rm)),
1978          (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
1979def : Pat<(i64 (mul (ineg GPR64:$Rn), GPR64:$Rm)),
1980          (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
1981} // AddedComplexity = 5
1982
1983let AddedComplexity = 5 in {
1984def SMADDLrrr : WideMulAccum<0, 0b001, "smaddl", add, sext>;
1985def SMSUBLrrr : WideMulAccum<1, 0b001, "smsubl", sub, sext>;
1986def UMADDLrrr : WideMulAccum<0, 0b101, "umaddl", add, zext>;
1987def UMSUBLrrr : WideMulAccum<1, 0b101, "umsubl", sub, zext>;
1988
1989def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (sext_inreg GPR64:$Rm, i32))),
1990          (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
1991def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (sext GPR32:$Rm))),
1992          (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
1993def : Pat<(i64 (mul (sext GPR32:$Rn), (sext GPR32:$Rm))),
1994          (SMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
1995def : Pat<(i64 (mul (and GPR64:$Rn, 0xFFFFFFFF), (and GPR64:$Rm, 0xFFFFFFFF))),
1996          (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
1997def : Pat<(i64 (mul (and GPR64:$Rn, 0xFFFFFFFF), (zext GPR32:$Rm))),
1998          (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
1999def : Pat<(i64 (mul (zext GPR32:$Rn), (zext GPR32:$Rm))),
2000          (UMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
2001
2002def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (sext GPR32:$Rm)))),
2003          (SMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
2004def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (zext GPR32:$Rm)))),
2005          (UMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
2006
2007def : Pat<(i64 (mul (sext GPR32:$Rn), (s64imm_32bit:$C))),
2008          (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
2009def : Pat<(i64 (mul (zext GPR32:$Rn), (i64imm_32bit:$C))),
2010          (UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
2011def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C))),
2012          (SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
2013                     (MOVi32imm (trunc_imm imm:$C)), XZR)>;
2014
2015def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (s64imm_32bit:$C)))),
2016          (SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
2017def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (i64imm_32bit:$C)))),
2018          (UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
2019def : Pat<(i64 (ineg (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)))),
2020          (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
2021                     (MOVi32imm (trunc_imm imm:$C)), XZR)>;
2022
2023def : Pat<(i64 (add (mul (sext GPR32:$Rn), (s64imm_32bit:$C)), GPR64:$Ra)),
2024          (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
2025def : Pat<(i64 (add (mul (zext GPR32:$Rn), (i64imm_32bit:$C)), GPR64:$Ra)),
2026          (UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
2027def : Pat<(i64 (add (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)),
2028                    GPR64:$Ra)),
2029          (SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
2030                     (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
2031
2032def : Pat<(i64 (sub GPR64:$Ra, (mul (sext GPR32:$Rn), (s64imm_32bit:$C)))),
2033          (SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
2034def : Pat<(i64 (sub GPR64:$Ra, (mul (zext GPR32:$Rn), (i64imm_32bit:$C)))),
2035          (UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
2036def : Pat<(i64 (sub GPR64:$Ra, (mul (sext_inreg GPR64:$Rn, i32),
2037                                    (s64imm_32bit:$C)))),
2038          (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
2039                     (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
2040
2041def : Pat<(i64 (smullwithsignbits GPR64:$Rn, GPR64:$Rm)),
2042          (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
2043def : Pat<(i64 (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm))),
2044          (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
2045
2046def : Pat<(i64 (add (smullwithsignbits GPR64:$Rn, GPR64:$Rm), GPR64:$Ra)),
2047          (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>;
2048def : Pat<(i64 (add (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)), GPR64:$Ra)),
2049          (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>;
2050
2051def : Pat<(i64 (ineg (smullwithsignbits GPR64:$Rn, GPR64:$Rm))),
2052          (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
2053def : Pat<(i64 (ineg (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)))),
2054          (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
2055
2056def : Pat<(i64 (sub GPR64:$Ra, (smullwithsignbits GPR64:$Rn, GPR64:$Rm))),
2057          (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>;
2058def : Pat<(i64 (sub GPR64:$Ra, (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)))),
2059          (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>;
2060
2061def : Pat<(i64 (mul top32Zero:$Rn, top32Zero:$Rm)),
2062          (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
2063def : Pat<(i64 (mul top32Zero:$Rn, (zext GPR32:$Rm))),
2064          (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
2065
2066def : Pat<(i64 (add (mul top32Zero:$Rn, top32Zero:$Rm), GPR64:$Ra)),
2067          (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>;
2068def : Pat<(i64 (add (mul top32Zero:$Rn, (zext GPR32:$Rm)), GPR64:$Ra)),
2069          (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>;
2070
2071def : Pat<(i64 (ineg (mul top32Zero:$Rn, top32Zero:$Rm))),
2072          (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
2073def : Pat<(i64 (ineg (mul top32Zero:$Rn, (zext GPR32:$Rm)))),
2074          (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
2075
2076def : Pat<(i64 (sub GPR64:$Ra, (mul top32Zero:$Rn, top32Zero:$Rm))),
2077          (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>;
2078def : Pat<(i64 (sub GPR64:$Ra, (mul top32Zero:$Rn, (zext GPR32:$Rm)))),
2079          (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>;
2080} // AddedComplexity = 5
2081
2082def : MulAccumWAlias<"mul", MADDWrrr>;
2083def : MulAccumXAlias<"mul", MADDXrrr>;
2084def : MulAccumWAlias<"mneg", MSUBWrrr>;
2085def : MulAccumXAlias<"mneg", MSUBXrrr>;
2086def : WideMulAccumAlias<"smull", SMADDLrrr>;
2087def : WideMulAccumAlias<"smnegl", SMSUBLrrr>;
2088def : WideMulAccumAlias<"umull", UMADDLrrr>;
2089def : WideMulAccumAlias<"umnegl", UMSUBLrrr>;
2090
2091// Multiply-high
2092def SMULHrr : MulHi<0b010, "smulh", mulhs>;
2093def UMULHrr : MulHi<0b110, "umulh", mulhu>;
2094
2095// CRC32
2096def CRC32Brr : BaseCRC32<0, 0b00, 0, GPR32, int_aarch64_crc32b, "crc32b">;
2097def CRC32Hrr : BaseCRC32<0, 0b01, 0, GPR32, int_aarch64_crc32h, "crc32h">;
2098def CRC32Wrr : BaseCRC32<0, 0b10, 0, GPR32, int_aarch64_crc32w, "crc32w">;
2099def CRC32Xrr : BaseCRC32<1, 0b11, 0, GPR64, int_aarch64_crc32x, "crc32x">;
2100
2101def CRC32CBrr : BaseCRC32<0, 0b00, 1, GPR32, int_aarch64_crc32cb, "crc32cb">;
2102def CRC32CHrr : BaseCRC32<0, 0b01, 1, GPR32, int_aarch64_crc32ch, "crc32ch">;
2103def CRC32CWrr : BaseCRC32<0, 0b10, 1, GPR32, int_aarch64_crc32cw, "crc32cw">;
2104def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_aarch64_crc32cx, "crc32cx">;
2105
2106// v8.1 atomic CAS
2107defm CAS   : CompareAndSwap<0, 0, "">;
2108defm CASA  : CompareAndSwap<1, 0, "a">;
2109defm CASL  : CompareAndSwap<0, 1, "l">;
2110defm CASAL : CompareAndSwap<1, 1, "al">;
2111
2112// v8.1 atomic CASP
2113defm CASP   : CompareAndSwapPair<0, 0, "">;
2114defm CASPA  : CompareAndSwapPair<1, 0, "a">;
2115defm CASPL  : CompareAndSwapPair<0, 1, "l">;
2116defm CASPAL : CompareAndSwapPair<1, 1, "al">;
2117
2118// v8.1 atomic SWP
2119defm SWP   : Swap<0, 0, "">;
2120defm SWPA  : Swap<1, 0, "a">;
2121defm SWPL  : Swap<0, 1, "l">;
2122defm SWPAL : Swap<1, 1, "al">;
2123
2124// v8.1 atomic LD<OP>(register). Performs load and then ST<OP>(register)
2125defm LDADD   : LDOPregister<0b000, "add", 0, 0, "">;
2126defm LDADDA  : LDOPregister<0b000, "add", 1, 0, "a">;
2127defm LDADDL  : LDOPregister<0b000, "add", 0, 1, "l">;
2128defm LDADDAL : LDOPregister<0b000, "add", 1, 1, "al">;
2129
2130defm LDCLR   : LDOPregister<0b001, "clr", 0, 0, "">;
2131defm LDCLRA  : LDOPregister<0b001, "clr", 1, 0, "a">;
2132defm LDCLRL  : LDOPregister<0b001, "clr", 0, 1, "l">;
2133defm LDCLRAL : LDOPregister<0b001, "clr", 1, 1, "al">;
2134
2135defm LDEOR   : LDOPregister<0b010, "eor", 0, 0, "">;
2136defm LDEORA  : LDOPregister<0b010, "eor", 1, 0, "a">;
2137defm LDEORL  : LDOPregister<0b010, "eor", 0, 1, "l">;
2138defm LDEORAL : LDOPregister<0b010, "eor", 1, 1, "al">;
2139
2140defm LDSET   : LDOPregister<0b011, "set", 0, 0, "">;
2141defm LDSETA  : LDOPregister<0b011, "set", 1, 0, "a">;
2142defm LDSETL  : LDOPregister<0b011, "set", 0, 1, "l">;
2143defm LDSETAL : LDOPregister<0b011, "set", 1, 1, "al">;
2144
2145defm LDSMAX   : LDOPregister<0b100, "smax", 0, 0, "">;
2146defm LDSMAXA  : LDOPregister<0b100, "smax", 1, 0, "a">;
2147defm LDSMAXL  : LDOPregister<0b100, "smax", 0, 1, "l">;
2148defm LDSMAXAL : LDOPregister<0b100, "smax", 1, 1, "al">;
2149
2150defm LDSMIN   : LDOPregister<0b101, "smin", 0, 0, "">;
2151defm LDSMINA  : LDOPregister<0b101, "smin", 1, 0, "a">;
2152defm LDSMINL  : LDOPregister<0b101, "smin", 0, 1, "l">;
2153defm LDSMINAL : LDOPregister<0b101, "smin", 1, 1, "al">;
2154
2155defm LDUMAX   : LDOPregister<0b110, "umax", 0, 0, "">;
2156defm LDUMAXA  : LDOPregister<0b110, "umax", 1, 0, "a">;
2157defm LDUMAXL  : LDOPregister<0b110, "umax", 0, 1, "l">;
2158defm LDUMAXAL : LDOPregister<0b110, "umax", 1, 1, "al">;
2159
2160defm LDUMIN   : LDOPregister<0b111, "umin", 0, 0, "">;
2161defm LDUMINA  : LDOPregister<0b111, "umin", 1, 0, "a">;
2162defm LDUMINL  : LDOPregister<0b111, "umin", 0, 1, "l">;
2163defm LDUMINAL : LDOPregister<0b111, "umin", 1, 1, "al">;
2164
2165// v8.1 atomic ST<OP>(register) as aliases to "LD<OP>(register) when Rt=xZR"
2166defm : STOPregister<"stadd","LDADD">; // STADDx
2167defm : STOPregister<"stclr","LDCLR">; // STCLRx
2168defm : STOPregister<"steor","LDEOR">; // STEORx
2169defm : STOPregister<"stset","LDSET">; // STSETx
2170defm : STOPregister<"stsmax","LDSMAX">;// STSMAXx
2171defm : STOPregister<"stsmin","LDSMIN">;// STSMINx
2172defm : STOPregister<"stumax","LDUMAX">;// STUMAXx
2173defm : STOPregister<"stumin","LDUMIN">;// STUMINx
2174
2175// v8.5 Memory Tagging Extension
2176let Predicates = [HasMTE] in {
2177
2178def IRG   : BaseTwoOperandRegReg<0b1, 0b0, 0b000100, GPR64sp, "irg",
2179                                 int_aarch64_irg, GPR64sp, GPR64>, Sched<[]>;
2180
2181def GMI   : BaseTwoOperandRegReg<0b1, 0b0, 0b000101, GPR64, "gmi",
2182                                 int_aarch64_gmi, GPR64sp>, Sched<[]> {
2183  let isNotDuplicable = 1;
2184}
2185def ADDG  : AddSubG<0, "addg", null_frag>;
2186def SUBG  : AddSubG<1, "subg", null_frag>;
2187
2188def : InstAlias<"irg $dst, $src", (IRG GPR64sp:$dst, GPR64sp:$src, XZR), 1>;
2189
2190def SUBP : SUBP<0, "subp", int_aarch64_subp>, Sched<[]>;
2191def SUBPS : SUBP<1, "subps", null_frag>, Sched<[]>{
2192  let Defs = [NZCV];
2193}
2194
2195def : InstAlias<"cmpp $lhs, $rhs", (SUBPS XZR, GPR64sp:$lhs, GPR64sp:$rhs), 0>;
2196
2197def LDG : MemTagLoad<"ldg", "\t$Rt, [$Rn, $offset]">;
2198
2199def : Pat<(int_aarch64_addg (am_indexedu6s128 GPR64sp:$Rn, uimm6s16:$imm6), imm0_15:$imm4),
2200          (ADDG GPR64sp:$Rn, imm0_63:$imm6, imm0_15:$imm4)>;
2201def : Pat<(int_aarch64_ldg GPR64:$Rt, (am_indexeds9s128 GPR64sp:$Rn,  simm9s16:$offset)),
2202          (LDG GPR64:$Rt, GPR64sp:$Rn,  simm9s16:$offset)>;
2203
2204def : InstAlias<"ldg $Rt, [$Rn]", (LDG GPR64:$Rt, GPR64sp:$Rn, 0), 1>;
2205
2206def LDGM : MemTagVector<1, "ldgm", "\t$Rt, [$Rn]",
2207                   (outs GPR64:$Rt), (ins GPR64sp:$Rn)>;
2208def STGM : MemTagVector<0, "stgm", "\t$Rt, [$Rn]",
2209                   (outs), (ins GPR64:$Rt, GPR64sp:$Rn)>;
2210def STZGM : MemTagVector<0, "stzgm", "\t$Rt, [$Rn]",
2211                   (outs), (ins GPR64:$Rt, GPR64sp:$Rn)> {
2212  let Inst{23} = 0;
2213}
2214
2215defm STG   : MemTagStore<0b00, "stg">;
2216defm STZG  : MemTagStore<0b01, "stzg">;
2217defm ST2G  : MemTagStore<0b10, "st2g">;
2218defm STZ2G : MemTagStore<0b11, "stz2g">;
2219
2220def : Pat<(AArch64stg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
2221          (STGi $Rn, $Rm, $imm)>;
2222def : Pat<(AArch64stzg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
2223          (STZGi $Rn, $Rm, $imm)>;
2224def : Pat<(AArch64st2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
2225          (ST2Gi $Rn, $Rm, $imm)>;
2226def : Pat<(AArch64stz2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
2227          (STZ2Gi $Rn, $Rm, $imm)>;
2228
2229defm STGP     : StorePairOffset <0b01, 0, GPR64z, simm7s16, "stgp">;
2230def  STGPpre  : StorePairPreIdx <0b01, 0, GPR64z, simm7s16, "stgp">;
2231def  STGPpost : StorePairPostIdx<0b01, 0, GPR64z, simm7s16, "stgp">;
2232
2233def : Pat<(int_aarch64_stg GPR64:$Rt, (am_indexeds9s128 GPR64sp:$Rn, simm9s16:$offset)),
2234          (STGi GPR64:$Rt, GPR64sp:$Rn,  simm9s16:$offset)>;
2235
2236def : Pat<(int_aarch64_stgp (am_indexed7s128 GPR64sp:$Rn, simm7s16:$imm), GPR64:$Rt, GPR64:$Rt2),
2237          (STGPi $Rt, $Rt2, $Rn, $imm)>;
2238
2239def IRGstack
2240    : Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rsp, GPR64:$Rm), []>,
2241      Sched<[]>;
2242def TAGPstack
2243    : Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rn, uimm6s16:$imm6, GPR64sp:$Rm, imm0_15:$imm4), []>,
2244      Sched<[]>;
2245
2246// Explicit SP in the first operand prevents ShrinkWrap optimization
2247// from leaving this instruction out of the stack frame. When IRGstack
2248// is transformed into IRG, this operand is replaced with the actual
2249// register / expression for the tagged base pointer of the current function.
2250def : Pat<(int_aarch64_irg_sp i64:$Rm), (IRGstack SP, i64:$Rm)>;
2251
2252// Large STG to be expanded into a loop. $sz is the size, $Rn is start address.
2253// $Rn_wback is one past the end of the range. $Rm is the loop counter.
2254let isCodeGenOnly=1, mayStore=1 in {
2255def STGloop_wback
2256    : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn_wback), (ins i64imm:$sz, GPR64sp:$Rn),
2257             [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,@earlyclobber $Rm" >,
2258      Sched<[WriteAdr, WriteST]>;
2259
2260def STZGloop_wback
2261    : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn_wback), (ins i64imm:$sz, GPR64sp:$Rn),
2262             [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,@earlyclobber $Rm" >,
2263      Sched<[WriteAdr, WriteST]>;
2264
2265// A variant of the above where $Rn2 is an independent register not tied to the input register $Rn.
2266// Their purpose is to use a FrameIndex operand as $Rn (which of course can not be written back).
2267def STGloop
2268    : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn2), (ins i64imm:$sz, GPR64sp:$Rn),
2269             [], "@earlyclobber $Rn2,@earlyclobber $Rm" >,
2270      Sched<[WriteAdr, WriteST]>;
2271
2272def STZGloop
2273    : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn2), (ins i64imm:$sz, GPR64sp:$Rn),
2274             [], "@earlyclobber $Rn2,@earlyclobber $Rm" >,
2275      Sched<[WriteAdr, WriteST]>;
2276}
2277
2278} // Predicates = [HasMTE]
2279
2280//===----------------------------------------------------------------------===//
2281// Logical instructions.
2282//===----------------------------------------------------------------------===//
2283
2284// (immediate)
2285defm ANDS : LogicalImmS<0b11, "ands", AArch64and_flag, "bics">;
2286defm AND  : LogicalImm<0b00, "and", and, "bic">;
2287defm EOR  : LogicalImm<0b10, "eor", xor, "eon">;
2288defm ORR  : LogicalImm<0b01, "orr", or, "orn">;
2289
2290// FIXME: these aliases *are* canonical sometimes (when movz can't be
2291// used). Actually, it seems to be working right now, but putting logical_immXX
2292// here is a bit dodgy on the AsmParser side too.
2293def : InstAlias<"mov $dst, $imm", (ORRWri GPR32sp:$dst, WZR,
2294                                          logical_imm32:$imm), 0>;
2295def : InstAlias<"mov $dst, $imm", (ORRXri GPR64sp:$dst, XZR,
2296                                          logical_imm64:$imm), 0>;
2297
2298
2299// (register)
2300defm ANDS : LogicalRegS<0b11, 0, "ands", AArch64and_flag>;
2301defm BICS : LogicalRegS<0b11, 1, "bics",
2302                        BinOpFrag<(AArch64and_flag node:$LHS, (not node:$RHS))>>;
2303defm AND  : LogicalReg<0b00, 0, "and", and>;
2304defm BIC  : LogicalReg<0b00, 1, "bic",
2305                       BinOpFrag<(and node:$LHS, (not node:$RHS))>, 3>;
2306defm EON  : LogicalReg<0b10, 1, "eon",
2307                       BinOpFrag<(not (xor node:$LHS, node:$RHS))>>;
2308defm EOR  : LogicalReg<0b10, 0, "eor", xor>;
2309defm ORN  : LogicalReg<0b01, 1, "orn",
2310                       BinOpFrag<(or node:$LHS, (not node:$RHS))>>;
2311defm ORR  : LogicalReg<0b01, 0, "orr", or>;
2312
2313def : InstAlias<"mov $dst, $src", (ORRWrs GPR32:$dst, WZR, GPR32:$src, 0), 2>;
2314def : InstAlias<"mov $dst, $src", (ORRXrs GPR64:$dst, XZR, GPR64:$src, 0), 2>;
2315
2316def : InstAlias<"mvn $Wd, $Wm", (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, 0), 3>;
2317def : InstAlias<"mvn $Xd, $Xm", (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, 0), 3>;
2318
2319def : InstAlias<"mvn $Wd, $Wm$sh",
2320                (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, logical_shift32:$sh), 2>;
2321def : InstAlias<"mvn $Xd, $Xm$sh",
2322                (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, logical_shift64:$sh), 2>;
2323
2324def : InstAlias<"tst $src1, $src2",
2325                (ANDSWri WZR, GPR32:$src1, logical_imm32:$src2), 2>;
2326def : InstAlias<"tst $src1, $src2",
2327                (ANDSXri XZR, GPR64:$src1, logical_imm64:$src2), 2>;
2328
2329def : InstAlias<"tst $src1, $src2",
2330                        (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, 0), 3>;
2331def : InstAlias<"tst $src1, $src2",
2332                        (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, 0), 3>;
2333
2334def : InstAlias<"tst $src1, $src2$sh",
2335               (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, logical_shift32:$sh), 2>;
2336def : InstAlias<"tst $src1, $src2$sh",
2337               (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, logical_shift64:$sh), 2>;
2338
2339
2340def : Pat<(not GPR32:$Wm), (ORNWrr WZR, GPR32:$Wm)>;
2341def : Pat<(not GPR64:$Xm), (ORNXrr XZR, GPR64:$Xm)>;
2342
2343
2344//===----------------------------------------------------------------------===//
2345// One operand data processing instructions.
2346//===----------------------------------------------------------------------===//
2347
2348defm CLS    : OneOperandData<0b000101, "cls">;
2349defm CLZ    : OneOperandData<0b000100, "clz", ctlz>;
2350defm RBIT   : OneOperandData<0b000000, "rbit", bitreverse>;
2351
2352def  REV16Wr : OneWRegData<0b000001, "rev16",
2353                                     UnOpFrag<(rotr (bswap node:$LHS), (i64 16))>>;
2354def  REV16Xr : OneXRegData<0b000001, "rev16", null_frag>;
2355
2356def : Pat<(cttz GPR32:$Rn),
2357          (CLZWr (RBITWr GPR32:$Rn))>;
2358def : Pat<(cttz GPR64:$Rn),
2359          (CLZXr (RBITXr GPR64:$Rn))>;
2360def : Pat<(ctlz (or (shl (xor (sra GPR32:$Rn, (i64 31)), GPR32:$Rn), (i64 1)),
2361                (i32 1))),
2362          (CLSWr GPR32:$Rn)>;
2363def : Pat<(ctlz (or (shl (xor (sra GPR64:$Rn, (i64 63)), GPR64:$Rn), (i64 1)),
2364                (i64 1))),
2365          (CLSXr GPR64:$Rn)>;
2366def : Pat<(int_aarch64_cls GPR32:$Rn), (CLSWr GPR32:$Rn)>;
2367def : Pat<(int_aarch64_cls64 GPR64:$Rm), (EXTRACT_SUBREG (CLSXr GPR64:$Rm), sub_32)>;
2368
2369// Unlike the other one operand instructions, the instructions with the "rev"
2370// mnemonic do *not* just different in the size bit, but actually use different
2371// opcode bits for the different sizes.
2372def REVWr   : OneWRegData<0b000010, "rev", bswap>;
2373def REVXr   : OneXRegData<0b000011, "rev", bswap>;
2374def REV32Xr : OneXRegData<0b000010, "rev32",
2375                                    UnOpFrag<(rotr (bswap node:$LHS), (i64 32))>>;
2376
2377def : InstAlias<"rev64 $Rd, $Rn", (REVXr GPR64:$Rd, GPR64:$Rn), 0>;
2378
2379// The bswap commutes with the rotr so we want a pattern for both possible
2380// orders.
2381def : Pat<(bswap (rotr GPR32:$Rn, (i64 16))), (REV16Wr GPR32:$Rn)>;
2382def : Pat<(bswap (rotr GPR64:$Rn, (i64 32))), (REV32Xr GPR64:$Rn)>;
2383
2384// Match (srl (bswap x), C) -> revC if the upper bswap bits are known zero.
2385def : Pat<(srl (bswap top16Zero:$Rn), (i64 16)), (REV16Wr GPR32:$Rn)>;
2386def : Pat<(srl (bswap top32Zero:$Rn), (i64 32)), (REV32Xr GPR64:$Rn)>;
2387
2388def : Pat<(or (and (srl GPR64:$Rn, (i64 8)), (i64 0x00ff00ff00ff00ff)),
2389              (and (shl GPR64:$Rn, (i64 8)), (i64 0xff00ff00ff00ff00))),
2390          (REV16Xr GPR64:$Rn)>;
2391
2392//===----------------------------------------------------------------------===//
2393// Bitfield immediate extraction instruction.
2394//===----------------------------------------------------------------------===//
2395let hasSideEffects = 0 in
2396defm EXTR : ExtractImm<"extr">;
2397def : InstAlias<"ror $dst, $src, $shift",
2398            (EXTRWrri GPR32:$dst, GPR32:$src, GPR32:$src, imm0_31:$shift)>;
2399def : InstAlias<"ror $dst, $src, $shift",
2400            (EXTRXrri GPR64:$dst, GPR64:$src, GPR64:$src, imm0_63:$shift)>;
2401
2402def : Pat<(rotr GPR32:$Rn, (i64 imm0_31:$imm)),
2403          (EXTRWrri GPR32:$Rn, GPR32:$Rn, imm0_31:$imm)>;
2404def : Pat<(rotr GPR64:$Rn, (i64 imm0_63:$imm)),
2405          (EXTRXrri GPR64:$Rn, GPR64:$Rn, imm0_63:$imm)>;
2406
2407//===----------------------------------------------------------------------===//
2408// Other bitfield immediate instructions.
2409//===----------------------------------------------------------------------===//
2410let hasSideEffects = 0 in {
2411defm BFM  : BitfieldImmWith2RegArgs<0b01, "bfm">;
2412defm SBFM : BitfieldImm<0b00, "sbfm">;
2413defm UBFM : BitfieldImm<0b10, "ubfm">;
2414}
2415
2416def i32shift_a : Operand<i64>, SDNodeXForm<imm, [{
2417  uint64_t enc = (32 - N->getZExtValue()) & 0x1f;
2418  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2419}]>;
2420
2421def i32shift_b : Operand<i64>, SDNodeXForm<imm, [{
2422  uint64_t enc = 31 - N->getZExtValue();
2423  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2424}]>;
2425
2426// min(7, 31 - shift_amt)
2427def i32shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{
2428  uint64_t enc = 31 - N->getZExtValue();
2429  enc = enc > 7 ? 7 : enc;
2430  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2431}]>;
2432
2433// min(15, 31 - shift_amt)
2434def i32shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{
2435  uint64_t enc = 31 - N->getZExtValue();
2436  enc = enc > 15 ? 15 : enc;
2437  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2438}]>;
2439
2440def i64shift_a : Operand<i64>, SDNodeXForm<imm, [{
2441  uint64_t enc = (64 - N->getZExtValue()) & 0x3f;
2442  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2443}]>;
2444
2445def i64shift_b : Operand<i64>, SDNodeXForm<imm, [{
2446  uint64_t enc = 63 - N->getZExtValue();
2447  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2448}]>;
2449
2450// min(7, 63 - shift_amt)
2451def i64shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{
2452  uint64_t enc = 63 - N->getZExtValue();
2453  enc = enc > 7 ? 7 : enc;
2454  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2455}]>;
2456
2457// min(15, 63 - shift_amt)
2458def i64shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{
2459  uint64_t enc = 63 - N->getZExtValue();
2460  enc = enc > 15 ? 15 : enc;
2461  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2462}]>;
2463
2464// min(31, 63 - shift_amt)
2465def i64shift_sext_i32 : Operand<i64>, SDNodeXForm<imm, [{
2466  uint64_t enc = 63 - N->getZExtValue();
2467  enc = enc > 31 ? 31 : enc;
2468  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2469}]>;
2470
2471def : Pat<(shl GPR32:$Rn, (i64 imm0_31:$imm)),
2472          (UBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)),
2473                              (i64 (i32shift_b imm0_31:$imm)))>;
2474def : Pat<(shl GPR64:$Rn, (i64 imm0_63:$imm)),
2475          (UBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)),
2476                              (i64 (i64shift_b imm0_63:$imm)))>;
2477
2478let AddedComplexity = 10 in {
2479def : Pat<(sra GPR32:$Rn, (i64 imm0_31:$imm)),
2480          (SBFMWri GPR32:$Rn, imm0_31:$imm, 31)>;
2481def : Pat<(sra GPR64:$Rn, (i64 imm0_63:$imm)),
2482          (SBFMXri GPR64:$Rn, imm0_63:$imm, 63)>;
2483}
2484
2485def : InstAlias<"asr $dst, $src, $shift",
2486                (SBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>;
2487def : InstAlias<"asr $dst, $src, $shift",
2488                (SBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>;
2489def : InstAlias<"sxtb $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 7)>;
2490def : InstAlias<"sxtb $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 7)>;
2491def : InstAlias<"sxth $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 15)>;
2492def : InstAlias<"sxth $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 15)>;
2493def : InstAlias<"sxtw $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 31)>;
2494
2495def : Pat<(srl GPR32:$Rn, (i64 imm0_31:$imm)),
2496          (UBFMWri GPR32:$Rn, imm0_31:$imm, 31)>;
2497def : Pat<(srl GPR64:$Rn, (i64 imm0_63:$imm)),
2498          (UBFMXri GPR64:$Rn, imm0_63:$imm, 63)>;
2499
2500def : InstAlias<"lsr $dst, $src, $shift",
2501                (UBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>;
2502def : InstAlias<"lsr $dst, $src, $shift",
2503                (UBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>;
2504def : InstAlias<"uxtb $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 7)>;
2505def : InstAlias<"uxtb $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 7)>;
2506def : InstAlias<"uxth $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 15)>;
2507def : InstAlias<"uxth $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 15)>;
2508def : InstAlias<"uxtw $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 31)>;
2509
2510//===----------------------------------------------------------------------===//
2511// Conditional comparison instructions.
2512//===----------------------------------------------------------------------===//
2513defm CCMN : CondComparison<0, "ccmn", AArch64ccmn>;
2514defm CCMP : CondComparison<1, "ccmp", AArch64ccmp>;
2515
2516//===----------------------------------------------------------------------===//
2517// Conditional select instructions.
2518//===----------------------------------------------------------------------===//
2519defm CSEL  : CondSelect<0, 0b00, "csel">;
2520
2521def inc : PatFrag<(ops node:$in), (add node:$in, 1)>;
2522defm CSINC : CondSelectOp<0, 0b01, "csinc", inc>;
2523defm CSINV : CondSelectOp<1, 0b00, "csinv", not>;
2524defm CSNEG : CondSelectOp<1, 0b01, "csneg", ineg>;
2525
2526def : Pat<(AArch64csinv GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV),
2527          (CSINVWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
2528def : Pat<(AArch64csinv GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV),
2529          (CSINVXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
2530def : Pat<(AArch64csneg GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV),
2531          (CSNEGWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
2532def : Pat<(AArch64csneg GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV),
2533          (CSNEGXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
2534def : Pat<(AArch64csinc GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV),
2535          (CSINCWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
2536def : Pat<(AArch64csinc GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV),
2537          (CSINCXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
2538
2539def : Pat<(AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV),
2540          (CSINCWr WZR, WZR, (i32 imm:$cc))>;
2541def : Pat<(AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV),
2542          (CSINCXr XZR, XZR, (i32 imm:$cc))>;
2543def : Pat<(AArch64csel GPR32:$tval, (i32 1), (i32 imm:$cc), NZCV),
2544          (CSINCWr GPR32:$tval, WZR, (i32 imm:$cc))>;
2545def : Pat<(AArch64csel GPR64:$tval, (i64 1), (i32 imm:$cc), NZCV),
2546          (CSINCXr GPR64:$tval, XZR, (i32 imm:$cc))>;
2547def : Pat<(AArch64csel (i32 1), GPR32:$fval, (i32 imm:$cc), NZCV),
2548          (CSINCWr GPR32:$fval, WZR, (i32 (inv_cond_XFORM imm:$cc)))>;
2549def : Pat<(AArch64csel (i64 1), GPR64:$fval, (i32 imm:$cc), NZCV),
2550          (CSINCXr GPR64:$fval, XZR, (i32 (inv_cond_XFORM imm:$cc)))>;
2551def : Pat<(AArch64csel (i32 0), (i32 -1), (i32 imm:$cc), NZCV),
2552          (CSINVWr WZR, WZR, (i32 imm:$cc))>;
2553def : Pat<(AArch64csel (i64 0), (i64 -1), (i32 imm:$cc), NZCV),
2554          (CSINVXr XZR, XZR, (i32 imm:$cc))>;
2555def : Pat<(AArch64csel GPR32:$tval, (i32 -1), (i32 imm:$cc), NZCV),
2556          (CSINVWr GPR32:$tval, WZR, (i32 imm:$cc))>;
2557def : Pat<(AArch64csel GPR64:$tval, (i64 -1), (i32 imm:$cc), NZCV),
2558          (CSINVXr GPR64:$tval, XZR, (i32 imm:$cc))>;
2559def : Pat<(AArch64csel (i32 -1), GPR32:$fval, (i32 imm:$cc), NZCV),
2560          (CSINVWr GPR32:$fval, WZR, (i32 (inv_cond_XFORM imm:$cc)))>;
2561def : Pat<(AArch64csel (i64 -1), GPR64:$fval, (i32 imm:$cc), NZCV),
2562          (CSINVXr GPR64:$fval, XZR, (i32 (inv_cond_XFORM imm:$cc)))>;
2563
2564def : Pat<(add GPR32:$val, (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)),
2565          (CSINCWr GPR32:$val, GPR32:$val, (i32 imm:$cc))>;
2566def : Pat<(add GPR64:$val, (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))),
2567          (CSINCXr GPR64:$val, GPR64:$val, (i32 imm:$cc))>;
2568
2569def : Pat<(or (topbitsallzero32:$val), (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)),
2570          (CSINCWr GPR32:$val, WZR, imm:$cc)>;
2571def : Pat<(or (topbitsallzero64:$val), (AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV)),
2572          (CSINCXr GPR64:$val, XZR, imm:$cc)>;
2573def : Pat<(or (topbitsallzero64:$val), (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))),
2574          (CSINCXr GPR64:$val, XZR, imm:$cc)>;
2575
2576def : Pat<(and (topbitsallzero32:$val), (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)),
2577          (CSELWr WZR, GPR32:$val, imm:$cc)>;
2578def : Pat<(and (topbitsallzero64:$val), (AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV)),
2579          (CSELXr XZR, GPR64:$val, imm:$cc)>;
2580def : Pat<(and (topbitsallzero64:$val), (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))),
2581          (CSELXr XZR, GPR64:$val, imm:$cc)>;
2582
2583// The inverse of the condition code from the alias instruction is what is used
2584// in the aliased instruction. The parser all ready inverts the condition code
2585// for these aliases.
2586def : InstAlias<"cset $dst, $cc",
2587                (CSINCWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>;
2588def : InstAlias<"cset $dst, $cc",
2589                (CSINCXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>;
2590
2591def : InstAlias<"csetm $dst, $cc",
2592                (CSINVWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>;
2593def : InstAlias<"csetm $dst, $cc",
2594                (CSINVXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>;
2595
2596def : InstAlias<"cinc $dst, $src, $cc",
2597                (CSINCWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>;
2598def : InstAlias<"cinc $dst, $src, $cc",
2599                (CSINCXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>;
2600
2601def : InstAlias<"cinv $dst, $src, $cc",
2602                (CSINVWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>;
2603def : InstAlias<"cinv $dst, $src, $cc",
2604                (CSINVXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>;
2605
2606def : InstAlias<"cneg $dst, $src, $cc",
2607                (CSNEGWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>;
2608def : InstAlias<"cneg $dst, $src, $cc",
2609                (CSNEGXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>;
2610
2611//===----------------------------------------------------------------------===//
2612// PC-relative instructions.
2613//===----------------------------------------------------------------------===//
2614let isReMaterializable = 1 in {
2615let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in {
2616def ADR  : ADRI<0, "adr", adrlabel,
2617                [(set GPR64:$Xd, (AArch64adr tglobaladdr:$label))]>;
2618} // hasSideEffects = 0
2619
2620def ADRP : ADRI<1, "adrp", adrplabel,
2621                [(set GPR64:$Xd, (AArch64adrp tglobaladdr:$label))]>;
2622} // isReMaterializable = 1
2623
2624// page address of a constant pool entry, block address
2625def : Pat<(AArch64adr tconstpool:$cp), (ADR tconstpool:$cp)>;
2626def : Pat<(AArch64adr tblockaddress:$cp), (ADR tblockaddress:$cp)>;
2627def : Pat<(AArch64adr texternalsym:$sym), (ADR texternalsym:$sym)>;
2628def : Pat<(AArch64adr tjumptable:$sym), (ADR tjumptable:$sym)>;
2629def : Pat<(AArch64adrp tconstpool:$cp), (ADRP tconstpool:$cp)>;
2630def : Pat<(AArch64adrp tblockaddress:$cp), (ADRP tblockaddress:$cp)>;
2631def : Pat<(AArch64adrp texternalsym:$sym), (ADRP texternalsym:$sym)>;
2632
2633//===----------------------------------------------------------------------===//
2634// Unconditional branch (register) instructions.
2635//===----------------------------------------------------------------------===//
2636
2637let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
2638def RET  : BranchReg<0b0010, "ret", []>;
2639def DRPS : SpecialReturn<0b0101, "drps">;
2640def ERET : SpecialReturn<0b0100, "eret">;
2641} // isReturn = 1, isTerminator = 1, isBarrier = 1
2642
2643// Default to the LR register.
2644def : InstAlias<"ret", (RET LR)>;
2645
2646let isCall = 1, Defs = [LR], Uses = [SP] in {
2647  def BLR : BranchReg<0b0001, "blr", []>;
2648  def BLRNoIP : Pseudo<(outs), (ins GPR64noip:$Rn), []>,
2649                Sched<[WriteBrReg]>,
2650                PseudoInstExpansion<(BLR GPR64:$Rn)>;
2651  def BLR_RVMARKER : Pseudo<(outs), (ins variable_ops), []>,
2652                     Sched<[WriteBrReg]>;
2653  def BLR_BTI : Pseudo<(outs), (ins variable_ops), []>,
2654                Sched<[WriteBrReg]>;
2655} // isCall
2656
2657def : Pat<(AArch64call GPR64:$Rn),
2658          (BLR GPR64:$Rn)>,
2659      Requires<[NoSLSBLRMitigation]>;
2660def : Pat<(AArch64call GPR64noip:$Rn),
2661          (BLRNoIP GPR64noip:$Rn)>,
2662      Requires<[SLSBLRMitigation]>;
2663
2664def : Pat<(AArch64call_rvmarker (i64 tglobaladdr:$rvfunc), GPR64:$Rn),
2665          (BLR_RVMARKER tglobaladdr:$rvfunc, GPR64:$Rn)>,
2666      Requires<[NoSLSBLRMitigation]>;
2667
2668def : Pat<(AArch64call_bti GPR64:$Rn),
2669          (BLR_BTI GPR64:$Rn)>,
2670      Requires<[NoSLSBLRMitigation]>;
2671def : Pat<(AArch64call_bti GPR64noip:$Rn),
2672          (BLR_BTI GPR64noip:$Rn)>,
2673      Requires<[SLSBLRMitigation]>;
2674
2675let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
2676def BR  : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>;
2677} // isBranch, isTerminator, isBarrier, isIndirectBranch
2678
2679// Create a separate pseudo-instruction for codegen to use so that we don't
2680// flag lr as used in every function. It'll be restored before the RET by the
2681// epilogue if it's legitimately used.
2682def RET_ReallyLR : Pseudo<(outs), (ins), [(AArch64retglue)]>,
2683                   Sched<[WriteBrReg]> {
2684  let isTerminator = 1;
2685  let isBarrier = 1;
2686  let isReturn = 1;
2687}
2688
2689// This is a directive-like pseudo-instruction. The purpose is to insert an
2690// R_AARCH64_TLSDESC_CALL relocation at the offset of the following instruction
2691// (which in the usual case is a BLR).
2692let hasSideEffects = 1 in
2693def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []>, Sched<[]> {
2694  let AsmString = ".tlsdesccall $sym";
2695}
2696
2697// Pseudo instruction to tell the streamer to emit a 'B' character into the
2698// augmentation string.
2699def EMITBKEY : Pseudo<(outs), (ins), []>, Sched<[]> {}
2700
2701// Pseudo instruction to tell the streamer to emit a 'G' character into the
2702// augmentation string.
2703def EMITMTETAGGED : Pseudo<(outs), (ins), []>, Sched<[]> {}
2704
2705// FIXME: maybe the scratch register used shouldn't be fixed to X1?
2706// FIXME: can "hasSideEffects be dropped?
2707// This gets lowered to an instruction sequence which takes 16 bytes
2708let isCall = 1, Defs = [NZCV, LR, X0, X1], hasSideEffects = 1, Size = 16,
2709    isCodeGenOnly = 1 in
2710def TLSDESC_CALLSEQ
2711    : Pseudo<(outs), (ins i64imm:$sym),
2712             [(AArch64tlsdesc_callseq tglobaltlsaddr:$sym)]>,
2713      Sched<[WriteI, WriteLD, WriteI, WriteBrReg]>;
2714def : Pat<(AArch64tlsdesc_callseq texternalsym:$sym),
2715          (TLSDESC_CALLSEQ texternalsym:$sym)>;
2716
2717//===----------------------------------------------------------------------===//
2718// Conditional branch (immediate) instruction.
2719//===----------------------------------------------------------------------===//
2720def Bcc : BranchCond<0, "b">;
2721
2722// Armv8.8-A variant form which hints to the branch predictor that
2723// this branch is very likely to go the same way nearly all the time
2724// (even though it is not known at compile time _which_ way that is).
2725def BCcc : BranchCond<1, "bc">, Requires<[HasHBC]>;
2726
2727//===----------------------------------------------------------------------===//
2728// Compare-and-branch instructions.
2729//===----------------------------------------------------------------------===//
2730defm CBZ  : CmpBranch<0, "cbz", AArch64cbz>;
2731defm CBNZ : CmpBranch<1, "cbnz", AArch64cbnz>;
2732
2733//===----------------------------------------------------------------------===//
2734// Test-bit-and-branch instructions.
2735//===----------------------------------------------------------------------===//
2736defm TBZ  : TestBranch<0, "tbz", AArch64tbz>;
2737defm TBNZ : TestBranch<1, "tbnz", AArch64tbnz>;
2738
2739//===----------------------------------------------------------------------===//
2740// Unconditional branch (immediate) instructions.
2741//===----------------------------------------------------------------------===//
2742let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
2743def B  : BranchImm<0, "b", [(br bb:$addr)]>;
2744} // isBranch, isTerminator, isBarrier
2745
2746let isCall = 1, Defs = [LR], Uses = [SP] in {
2747def BL : CallImm<1, "bl", [(AArch64call tglobaladdr:$addr)]>;
2748} // isCall
2749def : Pat<(AArch64call texternalsym:$func), (BL texternalsym:$func)>;
2750
2751//===----------------------------------------------------------------------===//
2752// Exception generation instructions.
2753//===----------------------------------------------------------------------===//
2754let isTrap = 1 in {
2755def BRK   : ExceptionGeneration<0b001, 0b00, "brk",
2756                                [(int_aarch64_break timm32_0_65535:$imm)]>;
2757}
2758def DCPS1 : ExceptionGeneration<0b101, 0b01, "dcps1">;
2759def DCPS2 : ExceptionGeneration<0b101, 0b10, "dcps2">;
2760def DCPS3 : ExceptionGeneration<0b101, 0b11, "dcps3">, Requires<[HasEL3]>;
2761def HLT   : ExceptionGeneration<0b010, 0b00, "hlt">;
2762def HVC   : ExceptionGeneration<0b000, 0b10, "hvc">;
2763def SMC   : ExceptionGeneration<0b000, 0b11, "smc">, Requires<[HasEL3]>;
2764def SVC   : ExceptionGeneration<0b000, 0b01, "svc">;
2765
2766// DCPSn defaults to an immediate operand of zero if unspecified.
2767def : InstAlias<"dcps1", (DCPS1 0)>;
2768def : InstAlias<"dcps2", (DCPS2 0)>;
2769def : InstAlias<"dcps3", (DCPS3 0)>, Requires<[HasEL3]>;
2770
2771def UDF : UDFType<0, "udf">;
2772
2773//===----------------------------------------------------------------------===//
2774// Load instructions.
2775//===----------------------------------------------------------------------===//
2776
2777// Pair (indexed, offset)
2778defm LDPW : LoadPairOffset<0b00, 0, GPR32z, simm7s4, "ldp">;
2779defm LDPX : LoadPairOffset<0b10, 0, GPR64z, simm7s8, "ldp">;
2780defm LDPS : LoadPairOffset<0b00, 1, FPR32Op, simm7s4, "ldp">;
2781defm LDPD : LoadPairOffset<0b01, 1, FPR64Op, simm7s8, "ldp">;
2782defm LDPQ : LoadPairOffset<0b10, 1, FPR128Op, simm7s16, "ldp">;
2783
2784defm LDPSW : LoadPairOffset<0b01, 0, GPR64z, simm7s4, "ldpsw">;
2785
2786// Pair (pre-indexed)
2787def LDPWpre : LoadPairPreIdx<0b00, 0, GPR32z, simm7s4, "ldp">;
2788def LDPXpre : LoadPairPreIdx<0b10, 0, GPR64z, simm7s8, "ldp">;
2789def LDPSpre : LoadPairPreIdx<0b00, 1, FPR32Op, simm7s4, "ldp">;
2790def LDPDpre : LoadPairPreIdx<0b01, 1, FPR64Op, simm7s8, "ldp">;
2791def LDPQpre : LoadPairPreIdx<0b10, 1, FPR128Op, simm7s16, "ldp">;
2792
2793def LDPSWpre : LoadPairPreIdx<0b01, 0, GPR64z, simm7s4, "ldpsw">;
2794
2795// Pair (post-indexed)
2796def LDPWpost : LoadPairPostIdx<0b00, 0, GPR32z, simm7s4, "ldp">;
2797def LDPXpost : LoadPairPostIdx<0b10, 0, GPR64z, simm7s8, "ldp">;
2798def LDPSpost : LoadPairPostIdx<0b00, 1, FPR32Op, simm7s4, "ldp">;
2799def LDPDpost : LoadPairPostIdx<0b01, 1, FPR64Op, simm7s8, "ldp">;
2800def LDPQpost : LoadPairPostIdx<0b10, 1, FPR128Op, simm7s16, "ldp">;
2801
2802def LDPSWpost : LoadPairPostIdx<0b01, 0, GPR64z, simm7s4, "ldpsw">;
2803
2804
2805// Pair (no allocate)
2806defm LDNPW : LoadPairNoAlloc<0b00, 0, GPR32z, simm7s4, "ldnp">;
2807defm LDNPX : LoadPairNoAlloc<0b10, 0, GPR64z, simm7s8, "ldnp">;
2808defm LDNPS : LoadPairNoAlloc<0b00, 1, FPR32Op, simm7s4, "ldnp">;
2809defm LDNPD : LoadPairNoAlloc<0b01, 1, FPR64Op, simm7s8, "ldnp">;
2810defm LDNPQ : LoadPairNoAlloc<0b10, 1, FPR128Op, simm7s16, "ldnp">;
2811
2812def : Pat<(AArch64ldp (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)),
2813          (LDPXi GPR64sp:$Rn, simm7s8:$offset)>;
2814
2815def : Pat<(AArch64ldnp (am_indexed7s128 GPR64sp:$Rn, simm7s16:$offset)),
2816          (LDNPQi GPR64sp:$Rn, simm7s16:$offset)>;
2817//---
2818// (register offset)
2819//---
2820
2821// Integer
2822defm LDRBB : Load8RO<0b00,  0, 0b01, GPR32, "ldrb", i32, zextloadi8>;
2823defm LDRHH : Load16RO<0b01, 0, 0b01, GPR32, "ldrh", i32, zextloadi16>;
2824defm LDRW  : Load32RO<0b10, 0, 0b01, GPR32, "ldr", i32, load>;
2825defm LDRX  : Load64RO<0b11, 0, 0b01, GPR64, "ldr", i64, load>;
2826
2827// Floating-point
2828defm LDRB : Load8RO<0b00,   1, 0b01, FPR8Op,   "ldr", i8, load>;
2829defm LDRH : Load16RO<0b01,  1, 0b01, FPR16Op,  "ldr", f16, load>;
2830defm LDRS : Load32RO<0b10,  1, 0b01, FPR32Op,  "ldr", f32, load>;
2831defm LDRD : Load64RO<0b11,  1, 0b01, FPR64Op,  "ldr", f64, load>;
2832defm LDRQ : Load128RO<0b00, 1, 0b11, FPR128Op, "ldr", f128, load>;
2833
2834// Load sign-extended half-word
2835defm LDRSHW : Load16RO<0b01, 0, 0b11, GPR32, "ldrsh", i32, sextloadi16>;
2836defm LDRSHX : Load16RO<0b01, 0, 0b10, GPR64, "ldrsh", i64, sextloadi16>;
2837
2838// Load sign-extended byte
2839defm LDRSBW : Load8RO<0b00, 0, 0b11, GPR32, "ldrsb", i32, sextloadi8>;
2840defm LDRSBX : Load8RO<0b00, 0, 0b10, GPR64, "ldrsb", i64, sextloadi8>;
2841
2842// Load sign-extended word
2843defm LDRSW  : Load32RO<0b10, 0, 0b10, GPR64, "ldrsw", i64, sextloadi32>;
2844
2845// Pre-fetch.
2846defm PRFM : PrefetchRO<0b11, 0, 0b10, "prfm">;
2847
2848// For regular load, we do not have any alignment requirement.
2849// Thus, it is safe to directly map the vector loads with interesting
2850// addressing modes.
2851// FIXME: We could do the same for bitconvert to floating point vectors.
2852multiclass ScalToVecROLoadPat<ROAddrMode ro, SDPatternOperator loadop,
2853                              ValueType ScalTy, ValueType VecTy,
2854                              Instruction LOADW, Instruction LOADX,
2855                              SubRegIndex sub> {
2856  def : Pat<(VecTy (scalar_to_vector (ScalTy
2857              (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset))))),
2858            (INSERT_SUBREG (VecTy (IMPLICIT_DEF)),
2859                           (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset),
2860                           sub)>;
2861
2862  def : Pat<(VecTy (scalar_to_vector (ScalTy
2863              (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset))))),
2864            (INSERT_SUBREG (VecTy (IMPLICIT_DEF)),
2865                           (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset),
2866                           sub)>;
2867}
2868
2869let AddedComplexity = 10 in {
2870defm : ScalToVecROLoadPat<ro8,  extloadi8,  i32, v8i8,  LDRBroW, LDRBroX, bsub>;
2871defm : ScalToVecROLoadPat<ro8,  extloadi8,  i32, v16i8, LDRBroW, LDRBroX, bsub>;
2872
2873defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v4i16, LDRHroW, LDRHroX, hsub>;
2874defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v8i16, LDRHroW, LDRHroX, hsub>;
2875
2876defm : ScalToVecROLoadPat<ro16, load,       i32, v4f16, LDRHroW, LDRHroX, hsub>;
2877defm : ScalToVecROLoadPat<ro16, load,       i32, v8f16, LDRHroW, LDRHroX, hsub>;
2878
2879defm : ScalToVecROLoadPat<ro32, load,       i32, v2i32, LDRSroW, LDRSroX, ssub>;
2880defm : ScalToVecROLoadPat<ro32, load,       i32, v4i32, LDRSroW, LDRSroX, ssub>;
2881
2882defm : ScalToVecROLoadPat<ro32, load,       f32, v2f32, LDRSroW, LDRSroX, ssub>;
2883defm : ScalToVecROLoadPat<ro32, load,       f32, v4f32, LDRSroW, LDRSroX, ssub>;
2884
2885defm : ScalToVecROLoadPat<ro64, load,       i64, v2i64, LDRDroW, LDRDroX, dsub>;
2886
2887defm : ScalToVecROLoadPat<ro64, load,       f64, v2f64, LDRDroW, LDRDroX, dsub>;
2888
2889
2890def : Pat <(v1i64 (scalar_to_vector (i64
2891                      (load (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm,
2892                                           ro_Wextend64:$extend))))),
2893           (LDRDroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>;
2894
2895def : Pat <(v1i64 (scalar_to_vector (i64
2896                      (load (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm,
2897                                           ro_Xextend64:$extend))))),
2898           (LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>;
2899}
2900
2901// Match all load 64 bits width whose type is compatible with FPR64
2902multiclass VecROLoadPat<ROAddrMode ro, ValueType VecTy,
2903                        Instruction LOADW, Instruction LOADX> {
2904
2905  def : Pat<(VecTy (load (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))),
2906            (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
2907
2908  def : Pat<(VecTy (load (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))),
2909            (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
2910}
2911
2912let AddedComplexity = 10 in {
2913let Predicates = [IsLE] in {
2914  // We must do vector loads with LD1 in big-endian.
2915  defm : VecROLoadPat<ro64, v2i32, LDRDroW, LDRDroX>;
2916  defm : VecROLoadPat<ro64, v2f32, LDRDroW, LDRDroX>;
2917  defm : VecROLoadPat<ro64, v8i8,  LDRDroW, LDRDroX>;
2918  defm : VecROLoadPat<ro64, v4i16, LDRDroW, LDRDroX>;
2919  defm : VecROLoadPat<ro64, v4f16, LDRDroW, LDRDroX>;
2920  defm : VecROLoadPat<ro64, v4bf16, LDRDroW, LDRDroX>;
2921}
2922
2923defm : VecROLoadPat<ro64, v1i64,  LDRDroW, LDRDroX>;
2924defm : VecROLoadPat<ro64, v1f64,  LDRDroW, LDRDroX>;
2925
2926// Match all load 128 bits width whose type is compatible with FPR128
2927let Predicates = [IsLE] in {
2928  // We must do vector loads with LD1 in big-endian.
2929  defm : VecROLoadPat<ro128, v2i64,  LDRQroW, LDRQroX>;
2930  defm : VecROLoadPat<ro128, v2f64,  LDRQroW, LDRQroX>;
2931  defm : VecROLoadPat<ro128, v4i32,  LDRQroW, LDRQroX>;
2932  defm : VecROLoadPat<ro128, v4f32,  LDRQroW, LDRQroX>;
2933  defm : VecROLoadPat<ro128, v8i16,  LDRQroW, LDRQroX>;
2934  defm : VecROLoadPat<ro128, v8f16,  LDRQroW, LDRQroX>;
2935  defm : VecROLoadPat<ro128, v8bf16,  LDRQroW, LDRQroX>;
2936  defm : VecROLoadPat<ro128, v16i8,  LDRQroW, LDRQroX>;
2937}
2938} // AddedComplexity = 10
2939
2940// zextload -> i64
2941multiclass ExtLoadTo64ROPat<ROAddrMode ro, SDPatternOperator loadop,
2942                            Instruction INSTW, Instruction INSTX> {
2943  def : Pat<(i64 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))),
2944            (SUBREG_TO_REG (i64 0),
2945                           (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend),
2946                           sub_32)>;
2947
2948  def : Pat<(i64 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))),
2949            (SUBREG_TO_REG (i64 0),
2950                           (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend),
2951                           sub_32)>;
2952}
2953
2954let AddedComplexity = 10 in {
2955  defm : ExtLoadTo64ROPat<ro8,  zextloadi8,  LDRBBroW, LDRBBroX>;
2956  defm : ExtLoadTo64ROPat<ro16, zextloadi16, LDRHHroW, LDRHHroX>;
2957  defm : ExtLoadTo64ROPat<ro32, zextloadi32, LDRWroW,  LDRWroX>;
2958
2959  // zextloadi1 -> zextloadi8
2960  defm : ExtLoadTo64ROPat<ro8,  zextloadi1,  LDRBBroW, LDRBBroX>;
2961
2962  // extload -> zextload
2963  defm : ExtLoadTo64ROPat<ro8,  extloadi8,   LDRBBroW, LDRBBroX>;
2964  defm : ExtLoadTo64ROPat<ro16, extloadi16,  LDRHHroW, LDRHHroX>;
2965  defm : ExtLoadTo64ROPat<ro32, extloadi32,  LDRWroW,  LDRWroX>;
2966
2967  // extloadi1 -> zextloadi8
2968  defm : ExtLoadTo64ROPat<ro8,  extloadi1,   LDRBBroW, LDRBBroX>;
2969}
2970
2971
2972// zextload -> i64
2973multiclass ExtLoadTo32ROPat<ROAddrMode ro, SDPatternOperator loadop,
2974                            Instruction INSTW, Instruction INSTX> {
2975  def : Pat<(i32 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))),
2976            (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
2977
2978  def : Pat<(i32 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))),
2979            (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
2980
2981}
2982
2983let AddedComplexity = 10 in {
2984  // extload -> zextload
2985  defm : ExtLoadTo32ROPat<ro8,  extloadi8,   LDRBBroW, LDRBBroX>;
2986  defm : ExtLoadTo32ROPat<ro16, extloadi16,  LDRHHroW, LDRHHroX>;
2987  defm : ExtLoadTo32ROPat<ro32, extloadi32,  LDRWroW,  LDRWroX>;
2988
2989  // zextloadi1 -> zextloadi8
2990  defm : ExtLoadTo32ROPat<ro8, zextloadi1, LDRBBroW, LDRBBroX>;
2991}
2992
2993//---
2994// (unsigned immediate)
2995//---
2996defm LDRX : LoadUI<0b11, 0, 0b01, GPR64z, uimm12s8, "ldr",
2997                   [(set GPR64z:$Rt,
2998                         (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>;
2999defm LDRW : LoadUI<0b10, 0, 0b01, GPR32z, uimm12s4, "ldr",
3000                   [(set GPR32z:$Rt,
3001                         (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>;
3002defm LDRB : LoadUI<0b00, 1, 0b01, FPR8Op, uimm12s1, "ldr",
3003                   [(set FPR8Op:$Rt,
3004                         (load (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)))]>;
3005defm LDRH : LoadUI<0b01, 1, 0b01, FPR16Op, uimm12s2, "ldr",
3006                   [(set (f16 FPR16Op:$Rt),
3007                         (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)))]>;
3008defm LDRS : LoadUI<0b10, 1, 0b01, FPR32Op, uimm12s4, "ldr",
3009                   [(set (f32 FPR32Op:$Rt),
3010                         (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>;
3011defm LDRD : LoadUI<0b11, 1, 0b01, FPR64Op, uimm12s8, "ldr",
3012                   [(set (f64 FPR64Op:$Rt),
3013                         (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>;
3014defm LDRQ : LoadUI<0b00, 1, 0b11, FPR128Op, uimm12s16, "ldr",
3015                 [(set (f128 FPR128Op:$Rt),
3016                       (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)))]>;
3017
3018// bf16 load pattern
3019def : Pat <(bf16 (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
3020           (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>;
3021
3022// For regular load, we do not have any alignment requirement.
3023// Thus, it is safe to directly map the vector loads with interesting
3024// addressing modes.
3025// FIXME: We could do the same for bitconvert to floating point vectors.
3026def : Pat <(v8i8 (scalar_to_vector (i32
3027               (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
3028           (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
3029                          (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
3030def : Pat <(v16i8 (scalar_to_vector (i32
3031               (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
3032           (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
3033                          (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
3034def : Pat <(v4i16 (scalar_to_vector (i32
3035               (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
3036           (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
3037                          (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
3038def : Pat <(v8i16 (scalar_to_vector (i32
3039               (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
3040           (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
3041                          (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
3042def : Pat <(v2i32 (scalar_to_vector (i32
3043               (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
3044           (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
3045                          (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
3046def : Pat <(v4i32 (scalar_to_vector (i32
3047               (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
3048           (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
3049                          (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
3050def : Pat <(v1i64 (scalar_to_vector (i64
3051               (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))),
3052           (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3053def : Pat <(v2i64 (scalar_to_vector (i64
3054               (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))),
3055           (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
3056                          (LDRDui GPR64sp:$Rn, uimm12s8:$offset), dsub)>;
3057
3058// Match all load 64 bits width whose type is compatible with FPR64
3059let Predicates = [IsLE] in {
3060  // We must use LD1 to perform vector loads in big-endian.
3061  def : Pat<(v2f32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3062            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3063  def : Pat<(v8i8 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3064            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3065  def : Pat<(v4i16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3066            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3067  def : Pat<(v2i32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3068            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3069  def : Pat<(v4f16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3070            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3071  def : Pat<(v4bf16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3072            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3073}
3074def : Pat<(v1f64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3075          (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3076def : Pat<(v1i64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3077          (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3078
3079// Match all load 128 bits width whose type is compatible with FPR128
3080let Predicates = [IsLE] in {
3081  // We must use LD1 to perform vector loads in big-endian.
3082  def : Pat<(v4f32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3083            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3084  def : Pat<(v2f64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3085            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3086  def : Pat<(v16i8 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3087            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3088  def : Pat<(v8i16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3089            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3090  def : Pat<(v4i32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3091            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3092  def : Pat<(v2i64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3093            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3094  def : Pat<(v8f16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3095            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3096  def : Pat<(v8bf16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3097            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3098}
3099def : Pat<(f128  (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3100          (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3101
3102defm LDRHH : LoadUI<0b01, 0, 0b01, GPR32, uimm12s2, "ldrh",
3103                    [(set GPR32:$Rt,
3104                          (zextloadi16 (am_indexed16 GPR64sp:$Rn,
3105                                                     uimm12s2:$offset)))]>;
3106defm LDRBB : LoadUI<0b00, 0, 0b01, GPR32, uimm12s1, "ldrb",
3107                    [(set GPR32:$Rt,
3108                          (zextloadi8 (am_indexed8 GPR64sp:$Rn,
3109                                                   uimm12s1:$offset)))]>;
3110// zextload -> i64
3111def : Pat<(i64 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
3112    (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
3113def : Pat<(i64 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
3114    (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>;
3115
3116// zextloadi1 -> zextloadi8
3117def : Pat<(i32 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
3118          (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>;
3119def : Pat<(i64 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
3120    (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
3121
3122// extload -> zextload
3123def : Pat<(i32 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
3124          (LDRHHui GPR64sp:$Rn, uimm12s2:$offset)>;
3125def : Pat<(i32 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
3126          (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>;
3127def : Pat<(i32 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
3128          (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>;
3129def : Pat<(i64 (extloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))),
3130    (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>;
3131def : Pat<(i64 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
3132    (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>;
3133def : Pat<(i64 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
3134    (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
3135def : Pat<(i64 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
3136    (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
3137
3138// load sign-extended half-word
3139defm LDRSHW : LoadUI<0b01, 0, 0b11, GPR32, uimm12s2, "ldrsh",
3140                     [(set GPR32:$Rt,
3141                           (sextloadi16 (am_indexed16 GPR64sp:$Rn,
3142                                                      uimm12s2:$offset)))]>;
3143defm LDRSHX : LoadUI<0b01, 0, 0b10, GPR64, uimm12s2, "ldrsh",
3144                     [(set GPR64:$Rt,
3145                           (sextloadi16 (am_indexed16 GPR64sp:$Rn,
3146                                                      uimm12s2:$offset)))]>;
3147
3148// load sign-extended byte
3149defm LDRSBW : LoadUI<0b00, 0, 0b11, GPR32, uimm12s1, "ldrsb",
3150                     [(set GPR32:$Rt,
3151                           (sextloadi8 (am_indexed8 GPR64sp:$Rn,
3152                                                    uimm12s1:$offset)))]>;
3153defm LDRSBX : LoadUI<0b00, 0, 0b10, GPR64, uimm12s1, "ldrsb",
3154                     [(set GPR64:$Rt,
3155                           (sextloadi8 (am_indexed8 GPR64sp:$Rn,
3156                                                    uimm12s1:$offset)))]>;
3157
3158// load sign-extended word
3159defm LDRSW  : LoadUI<0b10, 0, 0b10, GPR64, uimm12s4, "ldrsw",
3160                     [(set GPR64:$Rt,
3161                           (sextloadi32 (am_indexed32 GPR64sp:$Rn,
3162                                                      uimm12s4:$offset)))]>;
3163
3164// load zero-extended word
3165def : Pat<(i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))),
3166      (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>;
3167
3168// Pre-fetch.
3169def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm",
3170                        [(AArch64Prefetch timm:$Rt,
3171                                        (am_indexed64 GPR64sp:$Rn,
3172                                                      uimm12s8:$offset))]>;
3173
3174def : InstAlias<"prfm $Rt, [$Rn]", (PRFMui prfop:$Rt, GPR64sp:$Rn, 0)>;
3175
3176//---
3177// (literal)
3178
3179def alignedglobal : PatLeaf<(iPTR iPTR:$label), [{
3180  if (auto *G = dyn_cast<GlobalAddressSDNode>(N)) {
3181    const DataLayout &DL = MF->getDataLayout();
3182    Align Align = G->getGlobal()->getPointerAlignment(DL);
3183    return Align >= 4 && G->getOffset() % 4 == 0;
3184  }
3185  if (auto *C = dyn_cast<ConstantPoolSDNode>(N))
3186    return C->getAlign() >= 4 && C->getOffset() % 4 == 0;
3187  return false;
3188}]>;
3189
3190def LDRWl : LoadLiteral<0b00, 0, GPR32z, "ldr",
3191  [(set GPR32z:$Rt, (load (AArch64adr alignedglobal:$label)))]>;
3192def LDRXl : LoadLiteral<0b01, 0, GPR64z, "ldr",
3193  [(set GPR64z:$Rt, (load (AArch64adr alignedglobal:$label)))]>;
3194def LDRSl : LoadLiteral<0b00, 1, FPR32Op, "ldr",
3195  [(set (f32 FPR32Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>;
3196def LDRDl : LoadLiteral<0b01, 1, FPR64Op, "ldr",
3197  [(set (f64 FPR64Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>;
3198def LDRQl : LoadLiteral<0b10, 1, FPR128Op, "ldr",
3199  [(set (f128 FPR128Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>;
3200
3201// load sign-extended word
3202def LDRSWl : LoadLiteral<0b10, 0, GPR64z, "ldrsw",
3203  [(set GPR64z:$Rt, (sextloadi32 (AArch64adr alignedglobal:$label)))]>;
3204
3205let AddedComplexity = 20 in {
3206def : Pat<(i64 (zextloadi32 (AArch64adr alignedglobal:$label))),
3207        (SUBREG_TO_REG (i64 0), (LDRWl $label), sub_32)>;
3208}
3209
3210// prefetch
3211def PRFMl : PrefetchLiteral<0b11, 0, "prfm", []>;
3212//                   [(AArch64Prefetch imm:$Rt, tglobaladdr:$label)]>;
3213
3214//---
3215// (unscaled immediate)
3216defm LDURX : LoadUnscaled<0b11, 0, 0b01, GPR64z, "ldur",
3217                    [(set GPR64z:$Rt,
3218                          (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>;
3219defm LDURW : LoadUnscaled<0b10, 0, 0b01, GPR32z, "ldur",
3220                    [(set GPR32z:$Rt,
3221                          (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>;
3222defm LDURB : LoadUnscaled<0b00, 1, 0b01, FPR8Op, "ldur",
3223                    [(set FPR8Op:$Rt,
3224                          (load (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>;
3225defm LDURH : LoadUnscaled<0b01, 1, 0b01, FPR16Op, "ldur",
3226                    [(set (f16 FPR16Op:$Rt),
3227                          (load (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
3228defm LDURS : LoadUnscaled<0b10, 1, 0b01, FPR32Op, "ldur",
3229                    [(set (f32 FPR32Op:$Rt),
3230                          (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>;
3231defm LDURD : LoadUnscaled<0b11, 1, 0b01, FPR64Op, "ldur",
3232                    [(set (f64 FPR64Op:$Rt),
3233                          (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>;
3234defm LDURQ : LoadUnscaled<0b00, 1, 0b11, FPR128Op, "ldur",
3235                    [(set (f128 FPR128Op:$Rt),
3236                          (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset)))]>;
3237
3238defm LDURHH
3239    : LoadUnscaled<0b01, 0, 0b01, GPR32, "ldurh",
3240             [(set GPR32:$Rt,
3241                    (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
3242defm LDURBB
3243    : LoadUnscaled<0b00, 0, 0b01, GPR32, "ldurb",
3244             [(set GPR32:$Rt,
3245                    (zextloadi8 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
3246
3247// bf16 load pattern
3248def : Pat <(bf16 (load (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
3249           (LDURHi GPR64sp:$Rn, simm9:$offset)>;
3250
3251// Match all load 64 bits width whose type is compatible with FPR64
3252let Predicates = [IsLE] in {
3253  def : Pat<(v2f32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
3254            (LDURDi GPR64sp:$Rn, simm9:$offset)>;
3255  def : Pat<(v2i32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
3256            (LDURDi GPR64sp:$Rn, simm9:$offset)>;
3257  def : Pat<(v4i16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
3258            (LDURDi GPR64sp:$Rn, simm9:$offset)>;
3259  def : Pat<(v8i8 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
3260            (LDURDi GPR64sp:$Rn, simm9:$offset)>;
3261  def : Pat<(v4f16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
3262            (LDURDi GPR64sp:$Rn, simm9:$offset)>;
3263}
3264def : Pat<(v1f64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
3265          (LDURDi GPR64sp:$Rn, simm9:$offset)>;
3266def : Pat<(v1i64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
3267          (LDURDi GPR64sp:$Rn, simm9:$offset)>;
3268
3269// Match all load 128 bits width whose type is compatible with FPR128
3270let Predicates = [IsLE] in {
3271  def : Pat<(v2f64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
3272            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
3273  def : Pat<(v2i64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
3274            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
3275  def : Pat<(v4f32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
3276            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
3277  def : Pat<(v4i32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
3278            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
3279  def : Pat<(v8i16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
3280            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
3281  def : Pat<(v16i8 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
3282            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
3283  def : Pat<(v8f16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
3284            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
3285}
3286
3287//  anyext -> zext
3288def : Pat<(i32 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
3289          (LDURHHi GPR64sp:$Rn, simm9:$offset)>;
3290def : Pat<(i32 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3291          (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
3292def : Pat<(i32 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3293          (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
3294def : Pat<(i64 (extloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))),
3295    (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3296def : Pat<(i64 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
3297    (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3298def : Pat<(i64 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3299    (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3300def : Pat<(i64 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3301    (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3302// unscaled zext
3303def : Pat<(i32 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
3304          (LDURHHi GPR64sp:$Rn, simm9:$offset)>;
3305def : Pat<(i32 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3306          (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
3307def : Pat<(i32 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3308          (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
3309def : Pat<(i64 (zextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))),
3310    (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3311def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
3312    (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3313def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3314    (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3315def : Pat<(i64 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3316    (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3317
3318
3319//---
3320// LDR mnemonics fall back to LDUR for negative or unaligned offsets.
3321
3322// Define new assembler match classes as we want to only match these when
3323// the don't otherwise match the scaled addressing mode for LDR/STR. Don't
3324// associate a DiagnosticType either, as we want the diagnostic for the
3325// canonical form (the scaled operand) to take precedence.
3326class SImm9OffsetOperand<int Width> : AsmOperandClass {
3327  let Name = "SImm9OffsetFB" # Width;
3328  let PredicateMethod = "isSImm9OffsetFB<" # Width # ">";
3329  let RenderMethod = "addImmOperands";
3330}
3331
3332def SImm9OffsetFB8Operand : SImm9OffsetOperand<8>;
3333def SImm9OffsetFB16Operand : SImm9OffsetOperand<16>;
3334def SImm9OffsetFB32Operand : SImm9OffsetOperand<32>;
3335def SImm9OffsetFB64Operand : SImm9OffsetOperand<64>;
3336def SImm9OffsetFB128Operand : SImm9OffsetOperand<128>;
3337
3338def simm9_offset_fb8 : Operand<i64> {
3339  let ParserMatchClass = SImm9OffsetFB8Operand;
3340}
3341def simm9_offset_fb16 : Operand<i64> {
3342  let ParserMatchClass = SImm9OffsetFB16Operand;
3343}
3344def simm9_offset_fb32 : Operand<i64> {
3345  let ParserMatchClass = SImm9OffsetFB32Operand;
3346}
3347def simm9_offset_fb64 : Operand<i64> {
3348  let ParserMatchClass = SImm9OffsetFB64Operand;
3349}
3350def simm9_offset_fb128 : Operand<i64> {
3351  let ParserMatchClass = SImm9OffsetFB128Operand;
3352}
3353
3354def : InstAlias<"ldr $Rt, [$Rn, $offset]",
3355                (LDURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
3356def : InstAlias<"ldr $Rt, [$Rn, $offset]",
3357                (LDURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
3358def : InstAlias<"ldr $Rt, [$Rn, $offset]",
3359                (LDURBi FPR8Op:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
3360def : InstAlias<"ldr $Rt, [$Rn, $offset]",
3361                (LDURHi FPR16Op:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
3362def : InstAlias<"ldr $Rt, [$Rn, $offset]",
3363                (LDURSi FPR32Op:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
3364def : InstAlias<"ldr $Rt, [$Rn, $offset]",
3365                (LDURDi FPR64Op:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
3366def : InstAlias<"ldr $Rt, [$Rn, $offset]",
3367               (LDURQi FPR128Op:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>;
3368
3369// zextload -> i64
3370def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3371  (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3372def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
3373  (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3374
3375// load sign-extended half-word
3376defm LDURSHW
3377    : LoadUnscaled<0b01, 0, 0b11, GPR32, "ldursh",
3378               [(set GPR32:$Rt,
3379                    (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
3380defm LDURSHX
3381    : LoadUnscaled<0b01, 0, 0b10, GPR64, "ldursh",
3382              [(set GPR64:$Rt,
3383                    (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
3384
3385// load sign-extended byte
3386defm LDURSBW
3387    : LoadUnscaled<0b00, 0, 0b11, GPR32, "ldursb",
3388                [(set GPR32:$Rt,
3389                      (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>;
3390defm LDURSBX
3391    : LoadUnscaled<0b00, 0, 0b10, GPR64, "ldursb",
3392                [(set GPR64:$Rt,
3393                      (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>;
3394
3395// load sign-extended word
3396defm LDURSW
3397    : LoadUnscaled<0b10, 0, 0b10, GPR64, "ldursw",
3398              [(set GPR64:$Rt,
3399                    (sextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>;
3400
3401// zero and sign extending aliases from generic LDR* mnemonics to LDUR*.
3402def : InstAlias<"ldrb $Rt, [$Rn, $offset]",
3403                (LDURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
3404def : InstAlias<"ldrh $Rt, [$Rn, $offset]",
3405                (LDURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
3406def : InstAlias<"ldrsb $Rt, [$Rn, $offset]",
3407                (LDURSBWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
3408def : InstAlias<"ldrsb $Rt, [$Rn, $offset]",
3409                (LDURSBXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
3410def : InstAlias<"ldrsh $Rt, [$Rn, $offset]",
3411                (LDURSHWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
3412def : InstAlias<"ldrsh $Rt, [$Rn, $offset]",
3413                (LDURSHXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
3414def : InstAlias<"ldrsw $Rt, [$Rn, $offset]",
3415                (LDURSWi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
3416
3417// A LDR will implicitly zero the rest of the vector, so vector_insert(zeros,
3418// load, 0) can use a single load.
3419multiclass LoadInsertZeroPatterns<SDPatternOperator LoadOp, ValueType VT, ValueType HVT, ValueType SVT,
3420                                  ValueType ScalarVT, Instruction LoadInst, Instruction UnscaledLoadInst,
3421                                  ComplexPattern Addr, ComplexPattern UnscaledAddr, Operand AddrImm,
3422                                  SubRegIndex SubReg> {
3423  // Scaled
3424  def : Pat <(vector_insert (VT immAllZerosV),
3425                (ScalarVT (LoadOp (Addr GPR64sp:$Rn, AddrImm:$offset))), (i64 0)),
3426            (SUBREG_TO_REG (i64 0), (LoadInst GPR64sp:$Rn, AddrImm:$offset), SubReg)>;
3427  // Unscaled
3428  def : Pat <(vector_insert (VT immAllZerosV),
3429                 (ScalarVT (LoadOp (UnscaledAddr GPR64sp:$Rn, simm9:$offset))), (i64 0)),
3430             (SUBREG_TO_REG (i64 0), (UnscaledLoadInst GPR64sp:$Rn, simm9:$offset), SubReg)>;
3431
3432  // Half-vector patterns
3433  def : Pat <(vector_insert (HVT immAllZerosV),
3434                 (ScalarVT (LoadOp (Addr GPR64sp:$Rn, AddrImm:$offset))), (i64 0)),
3435             (SUBREG_TO_REG (i64 0), (LoadInst GPR64sp:$Rn, AddrImm:$offset), SubReg)>;
3436  // Unscaled
3437  def : Pat <(vector_insert (HVT immAllZerosV),
3438                 (ScalarVT (LoadOp (UnscaledAddr GPR64sp:$Rn, simm9:$offset))), (i64 0)),
3439             (SUBREG_TO_REG (i64 0), (UnscaledLoadInst GPR64sp:$Rn, simm9:$offset), SubReg)>;
3440
3441  // SVE patterns
3442  def : Pat <(vector_insert (SVT immAllZerosV),
3443                 (ScalarVT (LoadOp (Addr GPR64sp:$Rn, AddrImm:$offset))), (i64 0)),
3444             (SUBREG_TO_REG (i64 0), (LoadInst GPR64sp:$Rn, AddrImm:$offset), SubReg)>;
3445  // Unscaled
3446  def : Pat <(vector_insert (SVT immAllZerosV),
3447                 (ScalarVT (LoadOp (UnscaledAddr GPR64sp:$Rn, simm9:$offset))), (i64 0)),
3448             (SUBREG_TO_REG (i64 0), (UnscaledLoadInst GPR64sp:$Rn, simm9:$offset), SubReg)>;
3449}
3450
3451defm : LoadInsertZeroPatterns<extloadi8,  v16i8,  v8i8,   nxv16i8,  i32,  LDRBui, LDURBi,
3452                              am_indexed8,  am_unscaled8,  uimm12s1, bsub>;
3453defm : LoadInsertZeroPatterns<extloadi16, v8i16,  v4i16,  nxv8i16,  i32,  LDRHui, LDURHi,
3454                              am_indexed16, am_unscaled16, uimm12s2, hsub>;
3455defm : LoadInsertZeroPatterns<load,       v4i32,  v2i32,  nxv4i32,  i32,  LDRSui, LDURSi,
3456                              am_indexed32, am_unscaled32, uimm12s4, ssub>;
3457defm : LoadInsertZeroPatterns<load,       v2i64,  v1i64,  nxv2i64,  i64,  LDRDui, LDURDi,
3458                              am_indexed64, am_unscaled64, uimm12s8, dsub>;
3459defm : LoadInsertZeroPatterns<load,       v8f16,  v4f16,  nxv8f16,  f16,  LDRHui, LDURHi,
3460                              am_indexed16, am_unscaled16, uimm12s2, hsub>;
3461defm : LoadInsertZeroPatterns<load,       v8bf16, v4bf16, nxv8bf16, bf16, LDRHui, LDURHi,
3462                              am_indexed16, am_unscaled16, uimm12s2, hsub>;
3463defm : LoadInsertZeroPatterns<load,       v4f32,  v2f32,  nxv4f32,  f32,  LDRSui, LDURSi,
3464                              am_indexed32, am_unscaled32, uimm12s4, ssub>;
3465defm : LoadInsertZeroPatterns<load,       v2f64,  v1f64,  nxv2f64,  f64,  LDRDui, LDURDi,
3466                              am_indexed64, am_unscaled64, uimm12s8, dsub>;
3467
3468// Pre-fetch.
3469defm PRFUM : PrefetchUnscaled<0b11, 0, 0b10, "prfum",
3470                  [(AArch64Prefetch timm:$Rt,
3471                                  (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>;
3472
3473//---
3474// (unscaled immediate, unprivileged)
3475defm LDTRX : LoadUnprivileged<0b11, 0, 0b01, GPR64, "ldtr">;
3476defm LDTRW : LoadUnprivileged<0b10, 0, 0b01, GPR32, "ldtr">;
3477
3478defm LDTRH : LoadUnprivileged<0b01, 0, 0b01, GPR32, "ldtrh">;
3479defm LDTRB : LoadUnprivileged<0b00, 0, 0b01, GPR32, "ldtrb">;
3480
3481// load sign-extended half-word
3482defm LDTRSHW : LoadUnprivileged<0b01, 0, 0b11, GPR32, "ldtrsh">;
3483defm LDTRSHX : LoadUnprivileged<0b01, 0, 0b10, GPR64, "ldtrsh">;
3484
3485// load sign-extended byte
3486defm LDTRSBW : LoadUnprivileged<0b00, 0, 0b11, GPR32, "ldtrsb">;
3487defm LDTRSBX : LoadUnprivileged<0b00, 0, 0b10, GPR64, "ldtrsb">;
3488
3489// load sign-extended word
3490defm LDTRSW  : LoadUnprivileged<0b10, 0, 0b10, GPR64, "ldtrsw">;
3491
3492//---
3493// (immediate pre-indexed)
3494def LDRWpre : LoadPreIdx<0b10, 0, 0b01, GPR32z, "ldr">;
3495def LDRXpre : LoadPreIdx<0b11, 0, 0b01, GPR64z, "ldr">;
3496def LDRBpre : LoadPreIdx<0b00, 1, 0b01, FPR8Op,  "ldr">;
3497def LDRHpre : LoadPreIdx<0b01, 1, 0b01, FPR16Op, "ldr">;
3498def LDRSpre : LoadPreIdx<0b10, 1, 0b01, FPR32Op, "ldr">;
3499def LDRDpre : LoadPreIdx<0b11, 1, 0b01, FPR64Op, "ldr">;
3500def LDRQpre : LoadPreIdx<0b00, 1, 0b11, FPR128Op, "ldr">;
3501
3502// load sign-extended half-word
3503def LDRSHWpre : LoadPreIdx<0b01, 0, 0b11, GPR32z, "ldrsh">;
3504def LDRSHXpre : LoadPreIdx<0b01, 0, 0b10, GPR64z, "ldrsh">;
3505
3506// load sign-extended byte
3507def LDRSBWpre : LoadPreIdx<0b00, 0, 0b11, GPR32z, "ldrsb">;
3508def LDRSBXpre : LoadPreIdx<0b00, 0, 0b10, GPR64z, "ldrsb">;
3509
3510// load zero-extended byte
3511def LDRBBpre : LoadPreIdx<0b00, 0, 0b01, GPR32z, "ldrb">;
3512def LDRHHpre : LoadPreIdx<0b01, 0, 0b01, GPR32z, "ldrh">;
3513
3514// load sign-extended word
3515def LDRSWpre : LoadPreIdx<0b10, 0, 0b10, GPR64z, "ldrsw">;
3516
3517//---
3518// (immediate post-indexed)
3519def LDRWpost : LoadPostIdx<0b10, 0, 0b01, GPR32z, "ldr">;
3520def LDRXpost : LoadPostIdx<0b11, 0, 0b01, GPR64z, "ldr">;
3521def LDRBpost : LoadPostIdx<0b00, 1, 0b01, FPR8Op,  "ldr">;
3522def LDRHpost : LoadPostIdx<0b01, 1, 0b01, FPR16Op, "ldr">;
3523def LDRSpost : LoadPostIdx<0b10, 1, 0b01, FPR32Op, "ldr">;
3524def LDRDpost : LoadPostIdx<0b11, 1, 0b01, FPR64Op, "ldr">;
3525def LDRQpost : LoadPostIdx<0b00, 1, 0b11, FPR128Op, "ldr">;
3526
3527// load sign-extended half-word
3528def LDRSHWpost : LoadPostIdx<0b01, 0, 0b11, GPR32z, "ldrsh">;
3529def LDRSHXpost : LoadPostIdx<0b01, 0, 0b10, GPR64z, "ldrsh">;
3530
3531// load sign-extended byte
3532def LDRSBWpost : LoadPostIdx<0b00, 0, 0b11, GPR32z, "ldrsb">;
3533def LDRSBXpost : LoadPostIdx<0b00, 0, 0b10, GPR64z, "ldrsb">;
3534
3535// load zero-extended byte
3536def LDRBBpost : LoadPostIdx<0b00, 0, 0b01, GPR32z, "ldrb">;
3537def LDRHHpost : LoadPostIdx<0b01, 0, 0b01, GPR32z, "ldrh">;
3538
3539// load sign-extended word
3540def LDRSWpost : LoadPostIdx<0b10, 0, 0b10, GPR64z, "ldrsw">;
3541
3542//===----------------------------------------------------------------------===//
3543// Store instructions.
3544//===----------------------------------------------------------------------===//
3545
3546// Pair (indexed, offset)
3547// FIXME: Use dedicated range-checked addressing mode operand here.
3548defm STPW : StorePairOffset<0b00, 0, GPR32z, simm7s4, "stp">;
3549defm STPX : StorePairOffset<0b10, 0, GPR64z, simm7s8, "stp">;
3550defm STPS : StorePairOffset<0b00, 1, FPR32Op, simm7s4, "stp">;
3551defm STPD : StorePairOffset<0b01, 1, FPR64Op, simm7s8, "stp">;
3552defm STPQ : StorePairOffset<0b10, 1, FPR128Op, simm7s16, "stp">;
3553
3554// Pair (pre-indexed)
3555def STPWpre : StorePairPreIdx<0b00, 0, GPR32z, simm7s4, "stp">;
3556def STPXpre : StorePairPreIdx<0b10, 0, GPR64z, simm7s8, "stp">;
3557def STPSpre : StorePairPreIdx<0b00, 1, FPR32Op, simm7s4, "stp">;
3558def STPDpre : StorePairPreIdx<0b01, 1, FPR64Op, simm7s8, "stp">;
3559def STPQpre : StorePairPreIdx<0b10, 1, FPR128Op, simm7s16, "stp">;
3560
3561// Pair (post-indexed)
3562def STPWpost : StorePairPostIdx<0b00, 0, GPR32z, simm7s4, "stp">;
3563def STPXpost : StorePairPostIdx<0b10, 0, GPR64z, simm7s8, "stp">;
3564def STPSpost : StorePairPostIdx<0b00, 1, FPR32Op, simm7s4, "stp">;
3565def STPDpost : StorePairPostIdx<0b01, 1, FPR64Op, simm7s8, "stp">;
3566def STPQpost : StorePairPostIdx<0b10, 1, FPR128Op, simm7s16, "stp">;
3567
3568// Pair (no allocate)
3569defm STNPW : StorePairNoAlloc<0b00, 0, GPR32z, simm7s4, "stnp">;
3570defm STNPX : StorePairNoAlloc<0b10, 0, GPR64z, simm7s8, "stnp">;
3571defm STNPS : StorePairNoAlloc<0b00, 1, FPR32Op, simm7s4, "stnp">;
3572defm STNPD : StorePairNoAlloc<0b01, 1, FPR64Op, simm7s8, "stnp">;
3573defm STNPQ : StorePairNoAlloc<0b10, 1, FPR128Op, simm7s16, "stnp">;
3574
3575def : Pat<(AArch64stp GPR64z:$Rt, GPR64z:$Rt2, (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)),
3576          (STPXi GPR64z:$Rt, GPR64z:$Rt2, GPR64sp:$Rn, simm7s8:$offset)>;
3577
3578def : Pat<(AArch64stnp FPR128:$Rt, FPR128:$Rt2, (am_indexed7s128 GPR64sp:$Rn, simm7s16:$offset)),
3579          (STNPQi FPR128:$Rt, FPR128:$Rt2, GPR64sp:$Rn, simm7s16:$offset)>;
3580
3581
3582//---
3583// (Register offset)
3584
3585// Integer
3586defm STRBB : Store8RO< 0b00, 0, 0b00, GPR32, "strb", i32, truncstorei8>;
3587defm STRHH : Store16RO<0b01, 0, 0b00, GPR32, "strh", i32, truncstorei16>;
3588defm STRW  : Store32RO<0b10, 0, 0b00, GPR32, "str",  i32, store>;
3589defm STRX  : Store64RO<0b11, 0, 0b00, GPR64, "str",  i64, store>;
3590
3591
3592// Floating-point
3593defm STRB : Store8RO< 0b00,  1, 0b00, FPR8Op,   "str", i8, store>;
3594defm STRH : Store16RO<0b01,  1, 0b00, FPR16Op,  "str", f16,     store>;
3595defm STRS : Store32RO<0b10,  1, 0b00, FPR32Op,  "str", f32,     store>;
3596defm STRD : Store64RO<0b11,  1, 0b00, FPR64Op,  "str", f64,     store>;
3597defm STRQ : Store128RO<0b00, 1, 0b10, FPR128Op, "str">;
3598
3599let Predicates = [UseSTRQro], AddedComplexity = 10 in {
3600  def : Pat<(store (f128 FPR128:$Rt),
3601                        (ro_Windexed128 GPR64sp:$Rn, GPR32:$Rm,
3602                                        ro_Wextend128:$extend)),
3603            (STRQroW FPR128:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend128:$extend)>;
3604  def : Pat<(store (f128 FPR128:$Rt),
3605                        (ro_Xindexed128 GPR64sp:$Rn, GPR64:$Rm,
3606                                        ro_Xextend128:$extend)),
3607            (STRQroX FPR128:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Wextend128:$extend)>;
3608}
3609
3610multiclass TruncStoreFrom64ROPat<ROAddrMode ro, SDPatternOperator storeop,
3611                                 Instruction STRW, Instruction STRX> {
3612
3613  def : Pat<(storeop GPR64:$Rt,
3614                     (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
3615            (STRW (EXTRACT_SUBREG GPR64:$Rt, sub_32),
3616                  GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
3617
3618  def : Pat<(storeop GPR64:$Rt,
3619                     (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
3620            (STRX (EXTRACT_SUBREG GPR64:$Rt, sub_32),
3621                  GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
3622}
3623
3624let AddedComplexity = 10 in {
3625  // truncstore i64
3626  defm : TruncStoreFrom64ROPat<ro8,  truncstorei8,  STRBBroW, STRBBroX>;
3627  defm : TruncStoreFrom64ROPat<ro16, truncstorei16, STRHHroW, STRHHroX>;
3628  defm : TruncStoreFrom64ROPat<ro32, truncstorei32, STRWroW,  STRWroX>;
3629}
3630
3631multiclass VecROStorePat<ROAddrMode ro, ValueType VecTy, RegisterClass FPR,
3632                         Instruction STRW, Instruction STRX> {
3633  def : Pat<(store (VecTy FPR:$Rt),
3634                   (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
3635            (STRW FPR:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
3636
3637  def : Pat<(store (VecTy FPR:$Rt),
3638                   (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
3639            (STRX FPR:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
3640}
3641
3642let AddedComplexity = 10 in {
3643// Match all store 64 bits width whose type is compatible with FPR64
3644let Predicates = [IsLE] in {
3645  // We must use ST1 to store vectors in big-endian.
3646  defm : VecROStorePat<ro64, v2i32, FPR64, STRDroW, STRDroX>;
3647  defm : VecROStorePat<ro64, v2f32, FPR64, STRDroW, STRDroX>;
3648  defm : VecROStorePat<ro64, v4i16, FPR64, STRDroW, STRDroX>;
3649  defm : VecROStorePat<ro64, v8i8, FPR64, STRDroW, STRDroX>;
3650  defm : VecROStorePat<ro64, v4f16, FPR64, STRDroW, STRDroX>;
3651  defm : VecROStorePat<ro64, v4bf16, FPR64, STRDroW, STRDroX>;
3652}
3653
3654defm : VecROStorePat<ro64, v1i64, FPR64, STRDroW, STRDroX>;
3655defm : VecROStorePat<ro64, v1f64, FPR64, STRDroW, STRDroX>;
3656
3657// Match all store 128 bits width whose type is compatible with FPR128
3658let Predicates = [IsLE, UseSTRQro] in {
3659  // We must use ST1 to store vectors in big-endian.
3660  defm : VecROStorePat<ro128, v2i64, FPR128, STRQroW, STRQroX>;
3661  defm : VecROStorePat<ro128, v2f64, FPR128, STRQroW, STRQroX>;
3662  defm : VecROStorePat<ro128, v4i32, FPR128, STRQroW, STRQroX>;
3663  defm : VecROStorePat<ro128, v4f32, FPR128, STRQroW, STRQroX>;
3664  defm : VecROStorePat<ro128, v8i16, FPR128, STRQroW, STRQroX>;
3665  defm : VecROStorePat<ro128, v16i8, FPR128, STRQroW, STRQroX>;
3666  defm : VecROStorePat<ro128, v8f16, FPR128, STRQroW, STRQroX>;
3667  defm : VecROStorePat<ro128, v8bf16, FPR128, STRQroW, STRQroX>;
3668}
3669} // AddedComplexity = 10
3670
3671// Match stores from lane 0 to the appropriate subreg's store.
3672multiclass VecROStoreLane0Pat<ROAddrMode ro, SDPatternOperator storeop,
3673                              ValueType VecTy, ValueType STy,
3674                              SubRegIndex SubRegIdx,
3675                              Instruction STRW, Instruction STRX> {
3676
3677  def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), 0)),
3678                     (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
3679            (STRW (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx),
3680                  GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
3681
3682  def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), 0)),
3683                     (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
3684            (STRX (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx),
3685                  GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
3686}
3687
3688let AddedComplexity = 19 in {
3689  defm : VecROStoreLane0Pat<ro16, truncstorei16, v8i16, i32, hsub, STRHroW, STRHroX>;
3690  defm : VecROStoreLane0Pat<ro16,         store, v8f16, f16, hsub, STRHroW, STRHroX>;
3691  defm : VecROStoreLane0Pat<ro32,         store, v4i32, i32, ssub, STRSroW, STRSroX>;
3692  defm : VecROStoreLane0Pat<ro32,         store, v4f32, f32, ssub, STRSroW, STRSroX>;
3693  defm : VecROStoreLane0Pat<ro64,         store, v2i64, i64, dsub, STRDroW, STRDroX>;
3694  defm : VecROStoreLane0Pat<ro64,         store, v2f64, f64, dsub, STRDroW, STRDroX>;
3695}
3696
3697//---
3698// (unsigned immediate)
3699defm STRX : StoreUIz<0b11, 0, 0b00, GPR64z, uimm12s8, "str",
3700                   [(store GPR64z:$Rt,
3701                            (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>;
3702defm STRW : StoreUIz<0b10, 0, 0b00, GPR32z, uimm12s4, "str",
3703                    [(store GPR32z:$Rt,
3704                            (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>;
3705defm STRB : StoreUI<0b00, 1, 0b00, FPR8Op, uimm12s1, "str",
3706                    [(store FPR8Op:$Rt,
3707                            (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))]>;
3708defm STRH : StoreUI<0b01, 1, 0b00, FPR16Op, uimm12s2, "str",
3709                    [(store (f16 FPR16Op:$Rt),
3710                            (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))]>;
3711defm STRS : StoreUI<0b10, 1, 0b00, FPR32Op, uimm12s4, "str",
3712                    [(store (f32 FPR32Op:$Rt),
3713                            (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>;
3714defm STRD : StoreUI<0b11, 1, 0b00, FPR64Op, uimm12s8, "str",
3715                    [(store (f64 FPR64Op:$Rt),
3716                            (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>;
3717defm STRQ : StoreUI<0b00, 1, 0b10, FPR128Op, uimm12s16, "str", []>;
3718
3719defm STRHH : StoreUIz<0b01, 0, 0b00, GPR32z, uimm12s2, "strh",
3720                     [(truncstorei16 GPR32z:$Rt,
3721                                     (am_indexed16 GPR64sp:$Rn,
3722                                                   uimm12s2:$offset))]>;
3723defm STRBB : StoreUIz<0b00, 0, 0b00, GPR32z, uimm12s1,  "strb",
3724                     [(truncstorei8 GPR32z:$Rt,
3725                                    (am_indexed8 GPR64sp:$Rn,
3726                                                 uimm12s1:$offset))]>;
3727
3728// bf16 store pattern
3729def : Pat<(store (bf16 FPR16Op:$Rt),
3730                 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
3731          (STRHui FPR16:$Rt, GPR64sp:$Rn, uimm12s2:$offset)>;
3732
3733let AddedComplexity = 10 in {
3734
3735// Match all store 64 bits width whose type is compatible with FPR64
3736def : Pat<(store (v1i64 FPR64:$Rt),
3737                 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
3738          (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
3739def : Pat<(store (v1f64 FPR64:$Rt),
3740                 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
3741          (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
3742
3743let Predicates = [IsLE] in {
3744  // We must use ST1 to store vectors in big-endian.
3745  def : Pat<(store (v2f32 FPR64:$Rt),
3746                   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
3747            (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
3748  def : Pat<(store (v8i8 FPR64:$Rt),
3749                   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
3750            (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
3751  def : Pat<(store (v4i16 FPR64:$Rt),
3752                   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
3753            (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
3754  def : Pat<(store (v2i32 FPR64:$Rt),
3755                   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
3756            (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
3757  def : Pat<(store (v4f16 FPR64:$Rt),
3758                   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
3759            (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
3760  def : Pat<(store (v4bf16 FPR64:$Rt),
3761                   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
3762            (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
3763}
3764
3765// Match all store 128 bits width whose type is compatible with FPR128
3766def : Pat<(store (f128  FPR128:$Rt),
3767                 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
3768          (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
3769
3770let Predicates = [IsLE] in {
3771  // We must use ST1 to store vectors in big-endian.
3772  def : Pat<(store (v4f32 FPR128:$Rt),
3773                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
3774            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
3775  def : Pat<(store (v2f64 FPR128:$Rt),
3776                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
3777            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
3778  def : Pat<(store (v16i8 FPR128:$Rt),
3779                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
3780            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
3781  def : Pat<(store (v8i16 FPR128:$Rt),
3782                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
3783            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
3784  def : Pat<(store (v4i32 FPR128:$Rt),
3785                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
3786            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
3787  def : Pat<(store (v2i64 FPR128:$Rt),
3788                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
3789            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
3790  def : Pat<(store (v8f16 FPR128:$Rt),
3791                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
3792            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
3793  def : Pat<(store (v8bf16 FPR128:$Rt),
3794                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
3795            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
3796}
3797
3798// truncstore i64
3799def : Pat<(truncstorei32 GPR64:$Rt,
3800                         (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)),
3801  (STRWui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s4:$offset)>;
3802def : Pat<(truncstorei16 GPR64:$Rt,
3803                         (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
3804  (STRHHui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s2:$offset)>;
3805def : Pat<(truncstorei8 GPR64:$Rt, (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)),
3806  (STRBBui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s1:$offset)>;
3807
3808} // AddedComplexity = 10
3809
3810// Match stores from lane 0 to the appropriate subreg's store.
3811multiclass VecStoreLane0Pat<ComplexPattern UIAddrMode, SDPatternOperator storeop,
3812                            ValueType VTy, ValueType STy,
3813                            SubRegIndex SubRegIdx, Operand IndexType,
3814                            Instruction STR> {
3815  def : Pat<(storeop (STy (vector_extract (VTy VecListOne128:$Vt), 0)),
3816                     (UIAddrMode GPR64sp:$Rn, IndexType:$offset)),
3817            (STR (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx),
3818                 GPR64sp:$Rn, IndexType:$offset)>;
3819}
3820
3821let AddedComplexity = 19 in {
3822  defm : VecStoreLane0Pat<am_indexed16, truncstorei16, v8i16, i32, hsub, uimm12s2, STRHui>;
3823  defm : VecStoreLane0Pat<am_indexed16,         store, v8f16, f16, hsub, uimm12s2, STRHui>;
3824  defm : VecStoreLane0Pat<am_indexed32,         store, v4i32, i32, ssub, uimm12s4, STRSui>;
3825  defm : VecStoreLane0Pat<am_indexed32,         store, v4f32, f32, ssub, uimm12s4, STRSui>;
3826  defm : VecStoreLane0Pat<am_indexed64,         store, v2i64, i64, dsub, uimm12s8, STRDui>;
3827  defm : VecStoreLane0Pat<am_indexed64,         store, v2f64, f64, dsub, uimm12s8, STRDui>;
3828}
3829
3830//---
3831// (unscaled immediate)
3832defm STURX : StoreUnscaled<0b11, 0, 0b00, GPR64z, "stur",
3833                         [(store GPR64z:$Rt,
3834                                 (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>;
3835defm STURW : StoreUnscaled<0b10, 0, 0b00, GPR32z, "stur",
3836                         [(store GPR32z:$Rt,
3837                                 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>;
3838defm STURB : StoreUnscaled<0b00, 1, 0b00, FPR8Op, "stur",
3839                         [(store FPR8Op:$Rt,
3840                                 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>;
3841defm STURH : StoreUnscaled<0b01, 1, 0b00, FPR16Op, "stur",
3842                         [(store (f16 FPR16Op:$Rt),
3843                                 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>;
3844defm STURS : StoreUnscaled<0b10, 1, 0b00, FPR32Op, "stur",
3845                         [(store (f32 FPR32Op:$Rt),
3846                                 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>;
3847defm STURD : StoreUnscaled<0b11, 1, 0b00, FPR64Op, "stur",
3848                         [(store (f64 FPR64Op:$Rt),
3849                                 (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>;
3850defm STURQ : StoreUnscaled<0b00, 1, 0b10, FPR128Op, "stur",
3851                         [(store (f128 FPR128Op:$Rt),
3852                                 (am_unscaled128 GPR64sp:$Rn, simm9:$offset))]>;
3853defm STURHH : StoreUnscaled<0b01, 0, 0b00, GPR32z, "sturh",
3854                         [(truncstorei16 GPR32z:$Rt,
3855                                 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>;
3856defm STURBB : StoreUnscaled<0b00, 0, 0b00, GPR32z, "sturb",
3857                         [(truncstorei8 GPR32z:$Rt,
3858                                  (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>;
3859
3860// Armv8.4 Weaker Release Consistency enhancements
3861//         LDAPR & STLR with Immediate Offset instructions
3862let Predicates = [HasRCPC_IMMO] in {
3863defm STLURB     : BaseStoreUnscaleV84<"stlurb",  0b00, 0b00, GPR32>;
3864defm STLURH     : BaseStoreUnscaleV84<"stlurh",  0b01, 0b00, GPR32>;
3865defm STLURW     : BaseStoreUnscaleV84<"stlur",   0b10, 0b00, GPR32>;
3866defm STLURX     : BaseStoreUnscaleV84<"stlur",   0b11, 0b00, GPR64>;
3867defm LDAPURB    : BaseLoadUnscaleV84<"ldapurb",  0b00, 0b01, GPR32>;
3868defm LDAPURSBW  : BaseLoadUnscaleV84<"ldapursb", 0b00, 0b11, GPR32>;
3869defm LDAPURSBX  : BaseLoadUnscaleV84<"ldapursb", 0b00, 0b10, GPR64>;
3870defm LDAPURH    : BaseLoadUnscaleV84<"ldapurh",  0b01, 0b01, GPR32>;
3871defm LDAPURSHW  : BaseLoadUnscaleV84<"ldapursh", 0b01, 0b11, GPR32>;
3872defm LDAPURSHX  : BaseLoadUnscaleV84<"ldapursh", 0b01, 0b10, GPR64>;
3873defm LDAPUR     : BaseLoadUnscaleV84<"ldapur",   0b10, 0b01, GPR32>;
3874defm LDAPURSW   : BaseLoadUnscaleV84<"ldapursw", 0b10, 0b10, GPR64>;
3875defm LDAPURX    : BaseLoadUnscaleV84<"ldapur",   0b11, 0b01, GPR64>;
3876}
3877
3878// Match all store 64 bits width whose type is compatible with FPR64
3879def : Pat<(store (v1f64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
3880          (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3881def : Pat<(store (v1i64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
3882          (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3883
3884let AddedComplexity = 10 in {
3885
3886let Predicates = [IsLE] in {
3887  // We must use ST1 to store vectors in big-endian.
3888  def : Pat<(store (v2f32 FPR64:$Rt),
3889                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
3890            (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3891  def : Pat<(store (v8i8 FPR64:$Rt),
3892                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
3893            (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3894  def : Pat<(store (v4i16 FPR64:$Rt),
3895                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
3896            (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3897  def : Pat<(store (v2i32 FPR64:$Rt),
3898                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
3899            (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3900  def : Pat<(store (v4f16 FPR64:$Rt),
3901                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
3902            (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3903  def : Pat<(store (v4bf16 FPR64:$Rt),
3904                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
3905            (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3906}
3907
3908// Match all store 128 bits width whose type is compatible with FPR128
3909def : Pat<(store (f128 FPR128:$Rt), (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
3910          (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3911
3912let Predicates = [IsLE] in {
3913  // We must use ST1 to store vectors in big-endian.
3914  def : Pat<(store (v4f32 FPR128:$Rt),
3915                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
3916            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3917  def : Pat<(store (v2f64 FPR128:$Rt),
3918                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
3919            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3920  def : Pat<(store (v16i8 FPR128:$Rt),
3921                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
3922            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3923  def : Pat<(store (v8i16 FPR128:$Rt),
3924                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
3925            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3926  def : Pat<(store (v4i32 FPR128:$Rt),
3927                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
3928            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3929  def : Pat<(store (v2i64 FPR128:$Rt),
3930                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
3931            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3932  def : Pat<(store (v2f64 FPR128:$Rt),
3933                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
3934            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3935  def : Pat<(store (v8f16 FPR128:$Rt),
3936                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
3937            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3938  def : Pat<(store (v8bf16 FPR128:$Rt),
3939                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
3940            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
3941}
3942
3943} // AddedComplexity = 10
3944
3945// unscaled i64 truncating stores
3946def : Pat<(truncstorei32 GPR64:$Rt, (am_unscaled32 GPR64sp:$Rn, simm9:$offset)),
3947  (STURWi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>;
3948def : Pat<(truncstorei16 GPR64:$Rt, (am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
3949  (STURHHi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>;
3950def : Pat<(truncstorei8 GPR64:$Rt, (am_unscaled8 GPR64sp:$Rn, simm9:$offset)),
3951  (STURBBi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>;
3952
3953// Match stores from lane 0 to the appropriate subreg's store.
3954multiclass VecStoreULane0Pat<SDPatternOperator StoreOp,
3955                             ValueType VTy, ValueType STy,
3956                             SubRegIndex SubRegIdx, Instruction STR> {
3957  defm : VecStoreLane0Pat<am_unscaled128, StoreOp, VTy, STy, SubRegIdx, simm9, STR>;
3958}
3959
3960let AddedComplexity = 19 in {
3961  defm : VecStoreULane0Pat<truncstorei16, v8i16, i32, hsub, STURHi>;
3962  defm : VecStoreULane0Pat<store,         v8f16, f16, hsub, STURHi>;
3963  defm : VecStoreULane0Pat<store,         v4i32, i32, ssub, STURSi>;
3964  defm : VecStoreULane0Pat<store,         v4f32, f32, ssub, STURSi>;
3965  defm : VecStoreULane0Pat<store,         v2i64, i64, dsub, STURDi>;
3966  defm : VecStoreULane0Pat<store,         v2f64, f64, dsub, STURDi>;
3967}
3968
3969//---
3970// STR mnemonics fall back to STUR for negative or unaligned offsets.
3971def : InstAlias<"str $Rt, [$Rn, $offset]",
3972                (STURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
3973def : InstAlias<"str $Rt, [$Rn, $offset]",
3974                (STURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
3975def : InstAlias<"str $Rt, [$Rn, $offset]",
3976                (STURBi FPR8Op:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
3977def : InstAlias<"str $Rt, [$Rn, $offset]",
3978                (STURHi FPR16Op:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
3979def : InstAlias<"str $Rt, [$Rn, $offset]",
3980                (STURSi FPR32Op:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
3981def : InstAlias<"str $Rt, [$Rn, $offset]",
3982                (STURDi FPR64Op:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
3983def : InstAlias<"str $Rt, [$Rn, $offset]",
3984                (STURQi FPR128Op:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>;
3985
3986def : InstAlias<"strb $Rt, [$Rn, $offset]",
3987                (STURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
3988def : InstAlias<"strh $Rt, [$Rn, $offset]",
3989                (STURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
3990
3991//---
3992// (unscaled immediate, unprivileged)
3993defm STTRW : StoreUnprivileged<0b10, 0, 0b00, GPR32, "sttr">;
3994defm STTRX : StoreUnprivileged<0b11, 0, 0b00, GPR64, "sttr">;
3995
3996defm STTRH : StoreUnprivileged<0b01, 0, 0b00, GPR32, "sttrh">;
3997defm STTRB : StoreUnprivileged<0b00, 0, 0b00, GPR32, "sttrb">;
3998
3999//---
4000// (immediate pre-indexed)
4001def STRWpre : StorePreIdx<0b10, 0, 0b00, GPR32z, "str",  pre_store, i32>;
4002def STRXpre : StorePreIdx<0b11, 0, 0b00, GPR64z, "str",  pre_store, i64>;
4003def STRBpre : StorePreIdx<0b00, 1, 0b00, FPR8Op,  "str",  pre_store, i8>;
4004def STRHpre : StorePreIdx<0b01, 1, 0b00, FPR16Op, "str",  pre_store, f16>;
4005def STRSpre : StorePreIdx<0b10, 1, 0b00, FPR32Op, "str",  pre_store, f32>;
4006def STRDpre : StorePreIdx<0b11, 1, 0b00, FPR64Op, "str",  pre_store, f64>;
4007def STRQpre : StorePreIdx<0b00, 1, 0b10, FPR128Op, "str", pre_store, f128>;
4008
4009def STRBBpre : StorePreIdx<0b00, 0, 0b00, GPR32z, "strb", pre_truncsti8,  i32>;
4010def STRHHpre : StorePreIdx<0b01, 0, 0b00, GPR32z, "strh", pre_truncsti16, i32>;
4011
4012// truncstore i64
4013def : Pat<(pre_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
4014  (STRWpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
4015           simm9:$off)>;
4016def : Pat<(pre_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
4017  (STRHHpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
4018            simm9:$off)>;
4019def : Pat<(pre_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
4020  (STRBBpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
4021            simm9:$off)>;
4022
4023def : Pat<(pre_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4024          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4025def : Pat<(pre_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4026          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4027def : Pat<(pre_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4028          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4029def : Pat<(pre_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4030          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4031def : Pat<(pre_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4032          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4033def : Pat<(pre_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4034          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4035def : Pat<(pre_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4036          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4037
4038def : Pat<(pre_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4039          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4040def : Pat<(pre_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4041          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4042def : Pat<(pre_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4043          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4044def : Pat<(pre_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4045          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4046def : Pat<(pre_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4047          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4048def : Pat<(pre_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4049          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4050def : Pat<(pre_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4051          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4052
4053//---
4054// (immediate post-indexed)
4055def STRWpost : StorePostIdx<0b10, 0, 0b00, GPR32z,  "str", post_store, i32>;
4056def STRXpost : StorePostIdx<0b11, 0, 0b00, GPR64z,  "str", post_store, i64>;
4057def STRBpost : StorePostIdx<0b00, 1, 0b00, FPR8Op,   "str", post_store, i8>;
4058def STRHpost : StorePostIdx<0b01, 1, 0b00, FPR16Op,  "str", post_store, f16>;
4059def STRSpost : StorePostIdx<0b10, 1, 0b00, FPR32Op,  "str", post_store, f32>;
4060def STRDpost : StorePostIdx<0b11, 1, 0b00, FPR64Op,  "str", post_store, f64>;
4061def STRQpost : StorePostIdx<0b00, 1, 0b10, FPR128Op, "str", post_store, f128>;
4062
4063def STRBBpost : StorePostIdx<0b00, 0, 0b00, GPR32z, "strb", post_truncsti8, i32>;
4064def STRHHpost : StorePostIdx<0b01, 0, 0b00, GPR32z, "strh", post_truncsti16, i32>;
4065
4066// truncstore i64
4067def : Pat<(post_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
4068  (STRWpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
4069            simm9:$off)>;
4070def : Pat<(post_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
4071  (STRHHpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
4072             simm9:$off)>;
4073def : Pat<(post_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
4074  (STRBBpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
4075             simm9:$off)>;
4076
4077def : Pat<(post_store (bf16 FPR16:$Rt), GPR64sp:$addr, simm9:$off),
4078          (STRHpost FPR16:$Rt, GPR64sp:$addr, simm9:$off)>;
4079
4080def : Pat<(post_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4081          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4082def : Pat<(post_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4083          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4084def : Pat<(post_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4085          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4086def : Pat<(post_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4087          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4088def : Pat<(post_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4089          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4090def : Pat<(post_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4091          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4092def : Pat<(post_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4093          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4094def : Pat<(post_store (v4bf16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4095          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4096
4097def : Pat<(post_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4098          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4099def : Pat<(post_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4100          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4101def : Pat<(post_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4102          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4103def : Pat<(post_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4104          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4105def : Pat<(post_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4106          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4107def : Pat<(post_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4108          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4109def : Pat<(post_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4110          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4111def : Pat<(post_store (v8bf16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4112          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4113
4114//===----------------------------------------------------------------------===//
4115// Load/store exclusive instructions.
4116//===----------------------------------------------------------------------===//
4117
4118def LDARW  : LoadAcquire   <0b10, 1, 1, 0, 1, GPR32, "ldar">;
4119def LDARX  : LoadAcquire   <0b11, 1, 1, 0, 1, GPR64, "ldar">;
4120def LDARB  : LoadAcquire   <0b00, 1, 1, 0, 1, GPR32, "ldarb">;
4121def LDARH  : LoadAcquire   <0b01, 1, 1, 0, 1, GPR32, "ldarh">;
4122
4123def LDAXRW : LoadExclusive <0b10, 0, 1, 0, 1, GPR32, "ldaxr">;
4124def LDAXRX : LoadExclusive <0b11, 0, 1, 0, 1, GPR64, "ldaxr">;
4125def LDAXRB : LoadExclusive <0b00, 0, 1, 0, 1, GPR32, "ldaxrb">;
4126def LDAXRH : LoadExclusive <0b01, 0, 1, 0, 1, GPR32, "ldaxrh">;
4127
4128def LDXRW  : LoadExclusive <0b10, 0, 1, 0, 0, GPR32, "ldxr">;
4129def LDXRX  : LoadExclusive <0b11, 0, 1, 0, 0, GPR64, "ldxr">;
4130def LDXRB  : LoadExclusive <0b00, 0, 1, 0, 0, GPR32, "ldxrb">;
4131def LDXRH  : LoadExclusive <0b01, 0, 1, 0, 0, GPR32, "ldxrh">;
4132
4133def STLRW  : StoreRelease  <0b10, 1, 0, 0, 1, GPR32, "stlr">;
4134def STLRX  : StoreRelease  <0b11, 1, 0, 0, 1, GPR64, "stlr">;
4135def STLRB  : StoreRelease  <0b00, 1, 0, 0, 1, GPR32, "stlrb">;
4136def STLRH  : StoreRelease  <0b01, 1, 0, 0, 1, GPR32, "stlrh">;
4137
4138/*
4139Aliases for when offset=0. Note that in contrast to LoadAcquire which has a $Rn
4140of type GPR64sp0, we deliberately choose to make $Rn of type GPR64sp and add an
4141alias for the case of immediate #0. This is because new STLR versions (from
4142LRCPC3 extension) do have a non-zero immediate value, so GPR64sp0 is not
4143appropriate anymore (it parses and discards the optional zero). This is not the
4144case for LoadAcquire because the new LRCPC3 LDAR instructions are post-indexed,
4145and the immediate values are not inside the [] brackets and thus not accepted
4146by GPR64sp0 parser.
4147*/
4148def STLRW0  : InstAlias<"stlr\t$Rt, [$Rn, #0]" , (STLRW   GPR32: $Rt, GPR64sp:$Rn)>;
4149def STLRX0  : InstAlias<"stlr\t$Rt, [$Rn, #0]" , (STLRX   GPR64: $Rt, GPR64sp:$Rn)>;
4150def STLRB0  : InstAlias<"stlrb\t$Rt, [$Rn, #0]", (STLRB   GPR32: $Rt, GPR64sp:$Rn)>;
4151def STLRH0  : InstAlias<"stlrh\t$Rt, [$Rn, #0]", (STLRH   GPR32: $Rt, GPR64sp:$Rn)>;
4152
4153def STLXRW : StoreExclusive<0b10, 0, 0, 0, 1, GPR32, "stlxr">;
4154def STLXRX : StoreExclusive<0b11, 0, 0, 0, 1, GPR64, "stlxr">;
4155def STLXRB : StoreExclusive<0b00, 0, 0, 0, 1, GPR32, "stlxrb">;
4156def STLXRH : StoreExclusive<0b01, 0, 0, 0, 1, GPR32, "stlxrh">;
4157
4158def STXRW  : StoreExclusive<0b10, 0, 0, 0, 0, GPR32, "stxr">;
4159def STXRX  : StoreExclusive<0b11, 0, 0, 0, 0, GPR64, "stxr">;
4160def STXRB  : StoreExclusive<0b00, 0, 0, 0, 0, GPR32, "stxrb">;
4161def STXRH  : StoreExclusive<0b01, 0, 0, 0, 0, GPR32, "stxrh">;
4162
4163def LDAXPW : LoadExclusivePair<0b10, 0, 1, 1, 1, GPR32, "ldaxp">;
4164def LDAXPX : LoadExclusivePair<0b11, 0, 1, 1, 1, GPR64, "ldaxp">;
4165
4166def LDXPW  : LoadExclusivePair<0b10, 0, 1, 1, 0, GPR32, "ldxp">;
4167def LDXPX  : LoadExclusivePair<0b11, 0, 1, 1, 0, GPR64, "ldxp">;
4168
4169def STLXPW : StoreExclusivePair<0b10, 0, 0, 1, 1, GPR32, "stlxp">;
4170def STLXPX : StoreExclusivePair<0b11, 0, 0, 1, 1, GPR64, "stlxp">;
4171
4172def STXPW  : StoreExclusivePair<0b10, 0, 0, 1, 0, GPR32, "stxp">;
4173def STXPX  : StoreExclusivePair<0b11, 0, 0, 1, 0, GPR64, "stxp">;
4174
4175let Predicates = [HasLOR] in {
4176  // v8.1a "Limited Order Region" extension load-acquire instructions
4177  def LDLARW  : LoadAcquire   <0b10, 1, 1, 0, 0, GPR32, "ldlar">;
4178  def LDLARX  : LoadAcquire   <0b11, 1, 1, 0, 0, GPR64, "ldlar">;
4179  def LDLARB  : LoadAcquire   <0b00, 1, 1, 0, 0, GPR32, "ldlarb">;
4180  def LDLARH  : LoadAcquire   <0b01, 1, 1, 0, 0, GPR32, "ldlarh">;
4181
4182  // v8.1a "Limited Order Region" extension store-release instructions
4183  def STLLRW  : StoreRelease   <0b10, 1, 0, 0, 0, GPR32, "stllr">;
4184  def STLLRX  : StoreRelease   <0b11, 1, 0, 0, 0, GPR64, "stllr">;
4185  def STLLRB  : StoreRelease   <0b00, 1, 0, 0, 0, GPR32, "stllrb">;
4186  def STLLRH  : StoreRelease   <0b01, 1, 0, 0, 0, GPR32, "stllrh">;
4187
4188  // Aliases for when offset=0
4189  def STLLRW0 : InstAlias<"stllr\t$Rt,  [$Rn, #0]",  (STLLRW   GPR32: $Rt, GPR64sp:$Rn)>;
4190  def STLLRX0 : InstAlias<"stllr\t$Rt,  [$Rn, #0]",  (STLLRX   GPR64: $Rt, GPR64sp:$Rn)>;
4191  def STLLRB0 : InstAlias<"stllrb\t$Rt, [$Rn, #0]",  (STLLRB   GPR32: $Rt, GPR64sp:$Rn)>;
4192  def STLLRH0 : InstAlias<"stllrh\t$Rt, [$Rn, #0]",  (STLLRH   GPR32: $Rt, GPR64sp:$Rn)>;
4193}
4194
4195//===----------------------------------------------------------------------===//
4196// Scaled floating point to integer conversion instructions.
4197//===----------------------------------------------------------------------===//
4198
4199defm FCVTAS : FPToIntegerUnscaled<0b00, 0b100, "fcvtas", int_aarch64_neon_fcvtas>;
4200defm FCVTAU : FPToIntegerUnscaled<0b00, 0b101, "fcvtau", int_aarch64_neon_fcvtau>;
4201defm FCVTMS : FPToIntegerUnscaled<0b10, 0b000, "fcvtms", int_aarch64_neon_fcvtms>;
4202defm FCVTMU : FPToIntegerUnscaled<0b10, 0b001, "fcvtmu", int_aarch64_neon_fcvtmu>;
4203defm FCVTNS : FPToIntegerUnscaled<0b00, 0b000, "fcvtns", int_aarch64_neon_fcvtns>;
4204defm FCVTNU : FPToIntegerUnscaled<0b00, 0b001, "fcvtnu", int_aarch64_neon_fcvtnu>;
4205defm FCVTPS : FPToIntegerUnscaled<0b01, 0b000, "fcvtps", int_aarch64_neon_fcvtps>;
4206defm FCVTPU : FPToIntegerUnscaled<0b01, 0b001, "fcvtpu", int_aarch64_neon_fcvtpu>;
4207defm FCVTZS : FPToIntegerUnscaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>;
4208defm FCVTZU : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>;
4209defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>;
4210defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>;
4211
4212// AArch64's FCVT instructions saturate when out of range.
4213multiclass FPToIntegerSatPats<SDNode to_int_sat, string INST> {
4214  let Predicates = [HasFullFP16] in {
4215  def : Pat<(i32 (to_int_sat f16:$Rn, i32)),
4216            (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
4217  def : Pat<(i64 (to_int_sat f16:$Rn, i64)),
4218            (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
4219  }
4220  def : Pat<(i32 (to_int_sat f32:$Rn, i32)),
4221            (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
4222  def : Pat<(i64 (to_int_sat f32:$Rn, i64)),
4223            (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
4224  def : Pat<(i32 (to_int_sat f64:$Rn, i32)),
4225            (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
4226  def : Pat<(i64 (to_int_sat f64:$Rn, i64)),
4227            (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
4228
4229  let Predicates = [HasFullFP16] in {
4230  def : Pat<(i32 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i32:$scale), i32)),
4231            (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
4232  def : Pat<(i64 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i64:$scale), i64)),
4233            (!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
4234  }
4235  def : Pat<(i32 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i32:$scale), i32)),
4236            (!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
4237  def : Pat<(i64 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i64:$scale), i64)),
4238            (!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
4239  def : Pat<(i32 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i32:$scale), i32)),
4240            (!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
4241  def : Pat<(i64 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i64:$scale), i64)),
4242            (!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
4243}
4244
4245defm : FPToIntegerSatPats<fp_to_sint_sat, "FCVTZS">;
4246defm : FPToIntegerSatPats<fp_to_uint_sat, "FCVTZU">;
4247
4248multiclass FPToIntegerIntPats<Intrinsic round, string INST> {
4249  let Predicates = [HasFullFP16] in {
4250  def : Pat<(i32 (round f16:$Rn)), (!cast<Instruction>(INST # UWHr) $Rn)>;
4251  def : Pat<(i64 (round f16:$Rn)), (!cast<Instruction>(INST # UXHr) $Rn)>;
4252  }
4253  def : Pat<(i32 (round f32:$Rn)), (!cast<Instruction>(INST # UWSr) $Rn)>;
4254  def : Pat<(i64 (round f32:$Rn)), (!cast<Instruction>(INST # UXSr) $Rn)>;
4255  def : Pat<(i32 (round f64:$Rn)), (!cast<Instruction>(INST # UWDr) $Rn)>;
4256  def : Pat<(i64 (round f64:$Rn)), (!cast<Instruction>(INST # UXDr) $Rn)>;
4257
4258  let Predicates = [HasFullFP16] in {
4259  def : Pat<(i32 (round (fmul f16:$Rn, fixedpoint_f16_i32:$scale))),
4260            (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
4261  def : Pat<(i64 (round (fmul f16:$Rn, fixedpoint_f16_i64:$scale))),
4262            (!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
4263  }
4264  def : Pat<(i32 (round (fmul f32:$Rn, fixedpoint_f32_i32:$scale))),
4265            (!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
4266  def : Pat<(i64 (round (fmul f32:$Rn, fixedpoint_f32_i64:$scale))),
4267            (!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
4268  def : Pat<(i32 (round (fmul f64:$Rn, fixedpoint_f64_i32:$scale))),
4269            (!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
4270  def : Pat<(i64 (round (fmul f64:$Rn, fixedpoint_f64_i64:$scale))),
4271            (!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
4272}
4273
4274defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzs, "FCVTZS">;
4275defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzu, "FCVTZU">;
4276
4277multiclass FPToIntegerPats<SDNode to_int, SDNode to_int_sat, SDNode round, string INST> {
4278  def : Pat<(i32 (to_int (round f32:$Rn))),
4279            (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
4280  def : Pat<(i64 (to_int (round f32:$Rn))),
4281            (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
4282  def : Pat<(i32 (to_int (round f64:$Rn))),
4283            (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
4284  def : Pat<(i64 (to_int (round f64:$Rn))),
4285            (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
4286
4287  // These instructions saturate like fp_to_[su]int_sat.
4288  let Predicates = [HasFullFP16] in {
4289  def : Pat<(i32 (to_int_sat (round f16:$Rn), i32)),
4290            (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
4291  def : Pat<(i64 (to_int_sat (round f16:$Rn), i64)),
4292            (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
4293  }
4294  def : Pat<(i32 (to_int_sat (round f32:$Rn), i32)),
4295            (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
4296  def : Pat<(i64 (to_int_sat (round f32:$Rn), i64)),
4297            (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
4298  def : Pat<(i32 (to_int_sat (round f64:$Rn), i32)),
4299            (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
4300  def : Pat<(i64 (to_int_sat (round f64:$Rn), i64)),
4301            (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
4302}
4303
4304defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fceil,  "FCVTPS">;
4305defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fceil,  "FCVTPU">;
4306defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ffloor, "FCVTMS">;
4307defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ffloor, "FCVTMU">;
4308defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ftrunc, "FCVTZS">;
4309defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ftrunc, "FCVTZU">;
4310defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fround, "FCVTAS">;
4311defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fround, "FCVTAU">;
4312
4313
4314
4315let Predicates = [HasFullFP16] in {
4316  def : Pat<(i32 (any_lround f16:$Rn)),
4317            (FCVTASUWHr f16:$Rn)>;
4318  def : Pat<(i64 (any_lround f16:$Rn)),
4319            (FCVTASUXHr f16:$Rn)>;
4320  def : Pat<(i64 (any_llround f16:$Rn)),
4321            (FCVTASUXHr f16:$Rn)>;
4322}
4323def : Pat<(i32 (any_lround f32:$Rn)),
4324          (FCVTASUWSr f32:$Rn)>;
4325def : Pat<(i32 (any_lround f64:$Rn)),
4326          (FCVTASUWDr f64:$Rn)>;
4327def : Pat<(i64 (any_lround f32:$Rn)),
4328          (FCVTASUXSr f32:$Rn)>;
4329def : Pat<(i64 (any_lround f64:$Rn)),
4330          (FCVTASUXDr f64:$Rn)>;
4331def : Pat<(i64 (any_llround f32:$Rn)),
4332          (FCVTASUXSr f32:$Rn)>;
4333def : Pat<(i64 (any_llround f64:$Rn)),
4334          (FCVTASUXDr f64:$Rn)>;
4335
4336//===----------------------------------------------------------------------===//
4337// Scaled integer to floating point conversion instructions.
4338//===----------------------------------------------------------------------===//
4339
4340defm SCVTF : IntegerToFP<0, "scvtf", any_sint_to_fp>;
4341defm UCVTF : IntegerToFP<1, "ucvtf", any_uint_to_fp>;
4342
4343//===----------------------------------------------------------------------===//
4344// Unscaled integer to floating point conversion instruction.
4345//===----------------------------------------------------------------------===//
4346
4347defm FMOV : UnscaledConversion<"fmov">;
4348
4349// Add pseudo ops for FMOV 0 so we can mark them as isReMaterializable
4350let isReMaterializable = 1, isCodeGenOnly = 1, isAsCheapAsAMove = 1 in {
4351def FMOVH0 : Pseudo<(outs FPR16:$Rd), (ins), [(set f16:$Rd, (fpimm0))]>,
4352    Sched<[WriteF]>;
4353def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>,
4354    Sched<[WriteF]>;
4355def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>,
4356    Sched<[WriteF]>;
4357}
4358// Similarly add aliases
4359def : InstAlias<"fmov $Rd, #0.0", (FMOVWHr FPR16:$Rd, WZR), 0>,
4360    Requires<[HasFullFP16]>;
4361def : InstAlias<"fmov $Rd, #0.0", (FMOVWSr FPR32:$Rd, WZR), 0>;
4362def : InstAlias<"fmov $Rd, #0.0", (FMOVXDr FPR64:$Rd, XZR), 0>;
4363
4364// Pattern for FP16 immediates
4365let Predicates = [HasFullFP16] in {
4366  def : Pat<(f16 fpimm:$in),
4367    (FMOVWHr (MOVi32imm (bitcast_fpimm_to_i32 f16:$in)))>;
4368}
4369
4370//===----------------------------------------------------------------------===//
4371// Floating point conversion instruction.
4372//===----------------------------------------------------------------------===//
4373
4374defm FCVT : FPConversion<"fcvt">;
4375
4376//===----------------------------------------------------------------------===//
4377// Floating point single operand instructions.
4378//===----------------------------------------------------------------------===//
4379
4380defm FABS   : SingleOperandFPDataNoException<0b0001, "fabs", fabs>;
4381defm FMOV   : SingleOperandFPDataNoException<0b0000, "fmov">;
4382defm FNEG   : SingleOperandFPDataNoException<0b0010, "fneg", fneg>;
4383defm FRINTA : SingleOperandFPData<0b1100, "frinta", any_fround>;
4384defm FRINTI : SingleOperandFPData<0b1111, "frinti", any_fnearbyint>;
4385defm FRINTM : SingleOperandFPData<0b1010, "frintm", any_ffloor>;
4386defm FRINTN : SingleOperandFPData<0b1000, "frintn", any_froundeven>;
4387defm FRINTP : SingleOperandFPData<0b1001, "frintp", any_fceil>;
4388
4389defm FRINTX : SingleOperandFPData<0b1110, "frintx", any_frint>;
4390defm FRINTZ : SingleOperandFPData<0b1011, "frintz", any_ftrunc>;
4391
4392let SchedRW = [WriteFDiv] in {
4393defm FSQRT  : SingleOperandFPData<0b0011, "fsqrt", any_fsqrt>;
4394}
4395
4396let Predicates = [HasFRInt3264] in {
4397  defm FRINT32Z : FRIntNNT<0b00, "frint32z", int_aarch64_frint32z>;
4398  defm FRINT64Z : FRIntNNT<0b10, "frint64z", int_aarch64_frint64z>;
4399  defm FRINT32X : FRIntNNT<0b01, "frint32x", int_aarch64_frint32x>;
4400  defm FRINT64X : FRIntNNT<0b11, "frint64x", int_aarch64_frint64x>;
4401} // HasFRInt3264
4402
4403// Emitting strict_lrint as two instructions is valid as any exceptions that
4404// occur will happen in exactly one of the instructions (e.g. if the input is
4405// not an integer the inexact exception will happen in the FRINTX but not then
4406// in the FCVTZS as the output of FRINTX is an integer).
4407let Predicates = [HasFullFP16] in {
4408  def : Pat<(i32 (any_lrint f16:$Rn)),
4409            (FCVTZSUWHr (FRINTXHr f16:$Rn))>;
4410  def : Pat<(i64 (any_lrint f16:$Rn)),
4411            (FCVTZSUXHr (FRINTXHr f16:$Rn))>;
4412  def : Pat<(i64 (any_llrint f16:$Rn)),
4413            (FCVTZSUXHr (FRINTXHr f16:$Rn))>;
4414}
4415def : Pat<(i32 (any_lrint f32:$Rn)),
4416          (FCVTZSUWSr (FRINTXSr f32:$Rn))>;
4417def : Pat<(i32 (any_lrint f64:$Rn)),
4418          (FCVTZSUWDr (FRINTXDr f64:$Rn))>;
4419def : Pat<(i64 (any_lrint f32:$Rn)),
4420          (FCVTZSUXSr (FRINTXSr f32:$Rn))>;
4421def : Pat<(i64 (any_lrint f64:$Rn)),
4422          (FCVTZSUXDr (FRINTXDr f64:$Rn))>;
4423def : Pat<(i64 (any_llrint f32:$Rn)),
4424          (FCVTZSUXSr (FRINTXSr f32:$Rn))>;
4425def : Pat<(i64 (any_llrint f64:$Rn)),
4426          (FCVTZSUXDr (FRINTXDr f64:$Rn))>;
4427
4428//===----------------------------------------------------------------------===//
4429// Floating point two operand instructions.
4430//===----------------------------------------------------------------------===//
4431
4432defm FADD   : TwoOperandFPData<0b0010, "fadd", any_fadd>;
4433let SchedRW = [WriteFDiv] in {
4434defm FDIV   : TwoOperandFPData<0b0001, "fdiv", any_fdiv>;
4435}
4436defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", any_fmaxnum>;
4437defm FMAX   : TwoOperandFPData<0b0100, "fmax", any_fmaximum>;
4438defm FMINNM : TwoOperandFPData<0b0111, "fminnm", any_fminnum>;
4439defm FMIN   : TwoOperandFPData<0b0101, "fmin", any_fminimum>;
4440let SchedRW = [WriteFMul] in {
4441defm FMUL   : TwoOperandFPData<0b0000, "fmul", any_fmul>;
4442defm FNMUL  : TwoOperandFPDataNeg<0b1000, "fnmul", any_fmul>;
4443}
4444defm FSUB   : TwoOperandFPData<0b0011, "fsub", any_fsub>;
4445
4446multiclass FMULScalarFromIndexedLane0Patterns<string inst,
4447                                              string inst_f16_suffix,
4448                                              string inst_f32_suffix,
4449                                              string inst_f64_suffix,
4450                                              SDPatternOperator OpNode,
4451                                              list<Predicate> preds = []> {
4452  let Predicates = !listconcat(preds, [HasFullFP16]) in {
4453  def : Pat<(f16 (OpNode (f16 FPR16:$Rn),
4454                         (f16 (vector_extract (v8f16 V128:$Rm), (i64 0))))),
4455            (!cast<Instruction>(inst # inst_f16_suffix)
4456              FPR16:$Rn, (EXTRACT_SUBREG V128:$Rm, hsub))>;
4457  }
4458  let Predicates = preds in {
4459  def : Pat<(f32 (OpNode (f32 FPR32:$Rn),
4460                         (f32 (vector_extract (v4f32 V128:$Rm), (i64 0))))),
4461            (!cast<Instruction>(inst # inst_f32_suffix)
4462              FPR32:$Rn, (EXTRACT_SUBREG V128:$Rm, ssub))>;
4463  def : Pat<(f64 (OpNode (f64 FPR64:$Rn),
4464                         (f64 (vector_extract (v2f64 V128:$Rm), (i64 0))))),
4465            (!cast<Instruction>(inst # inst_f64_suffix)
4466              FPR64:$Rn, (EXTRACT_SUBREG V128:$Rm, dsub))>;
4467  }
4468}
4469
4470defm : FMULScalarFromIndexedLane0Patterns<"FMUL", "Hrr", "Srr", "Drr",
4471                                          any_fmul>;
4472
4473// Match reassociated forms of FNMUL.
4474def : Pat<(fmul (fneg FPR16:$a), (f16 FPR16:$b)),
4475          (FNMULHrr FPR16:$a, FPR16:$b)>,
4476          Requires<[HasFullFP16]>;
4477def : Pat<(fmul (fneg FPR32:$a), (f32 FPR32:$b)),
4478          (FNMULSrr FPR32:$a, FPR32:$b)>;
4479def : Pat<(fmul (fneg FPR64:$a), (f64 FPR64:$b)),
4480          (FNMULDrr FPR64:$a, FPR64:$b)>;
4481
4482def : Pat<(v1f64 (fmaximum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
4483          (FMAXDrr FPR64:$Rn, FPR64:$Rm)>;
4484def : Pat<(v1f64 (fminimum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
4485          (FMINDrr FPR64:$Rn, FPR64:$Rm)>;
4486def : Pat<(v1f64 (fmaxnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
4487          (FMAXNMDrr FPR64:$Rn, FPR64:$Rm)>;
4488def : Pat<(v1f64 (fminnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
4489          (FMINNMDrr FPR64:$Rn, FPR64:$Rm)>;
4490
4491//===----------------------------------------------------------------------===//
4492// Floating point three operand instructions.
4493//===----------------------------------------------------------------------===//
4494
4495defm FMADD  : ThreeOperandFPData<0, 0, "fmadd", any_fma>;
4496defm FMSUB  : ThreeOperandFPData<0, 1, "fmsub",
4497     TriOpFrag<(any_fma node:$LHS, (fneg node:$MHS), node:$RHS)> >;
4498defm FNMADD : ThreeOperandFPData<1, 0, "fnmadd",
4499     TriOpFrag<(fneg (any_fma node:$LHS, node:$MHS, node:$RHS))> >;
4500defm FNMSUB : ThreeOperandFPData<1, 1, "fnmsub",
4501     TriOpFrag<(any_fma node:$LHS, node:$MHS, (fneg node:$RHS))> >;
4502
4503// The following def pats catch the case where the LHS of an FMA is negated.
4504// The TriOpFrag above catches the case where the middle operand is negated.
4505
4506// N.b. FMSUB etc have the accumulator at the *end* of (outs), unlike
4507// the NEON variant.
4508
4509// Here we handle first -(a + b*c) for FNMADD:
4510
4511let Predicates = [HasNEON, HasFullFP16] in
4512def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, FPR16:$Ra)),
4513          (FMSUBHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>;
4514
4515def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, FPR32:$Ra)),
4516          (FMSUBSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
4517
4518def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, FPR64:$Ra)),
4519          (FMSUBDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
4520
4521// Now it's time for "(-a) + (-b)*c"
4522
4523let Predicates = [HasNEON, HasFullFP16] in
4524def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, (fneg FPR16:$Ra))),
4525          (FNMADDHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>;
4526
4527def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, (fneg FPR32:$Ra))),
4528          (FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
4529
4530def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, (fneg FPR64:$Ra))),
4531          (FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
4532
4533//===----------------------------------------------------------------------===//
4534// Floating point comparison instructions.
4535//===----------------------------------------------------------------------===//
4536
4537defm FCMPE : FPComparison<1, "fcmpe", AArch64strict_fcmpe>;
4538defm FCMP  : FPComparison<0, "fcmp", AArch64any_fcmp>;
4539
4540//===----------------------------------------------------------------------===//
4541// Floating point conditional comparison instructions.
4542//===----------------------------------------------------------------------===//
4543
4544defm FCCMPE : FPCondComparison<1, "fccmpe">;
4545defm FCCMP  : FPCondComparison<0, "fccmp", AArch64fccmp>;
4546
4547//===----------------------------------------------------------------------===//
4548// Floating point conditional select instruction.
4549//===----------------------------------------------------------------------===//
4550
4551defm FCSEL : FPCondSelect<"fcsel">;
4552
4553let Predicates = [HasFullFP16] in
4554def : Pat<(bf16 (AArch64csel (bf16 FPR16:$Rn), (bf16 FPR16:$Rm), (i32 imm:$cond), NZCV)),
4555          (FCSELHrrr FPR16:$Rn, FPR16:$Rm, imm:$cond)>;
4556
4557// CSEL instructions providing f128 types need to be handled by a
4558// pseudo-instruction since the eventual code will need to introduce basic
4559// blocks and control flow.
4560def F128CSEL : Pseudo<(outs FPR128:$Rd),
4561                      (ins FPR128:$Rn, FPR128:$Rm, ccode:$cond),
4562                      [(set (f128 FPR128:$Rd),
4563                            (AArch64csel FPR128:$Rn, FPR128:$Rm,
4564                                       (i32 imm:$cond), NZCV))]> {
4565  let Uses = [NZCV];
4566  let usesCustomInserter = 1;
4567  let hasNoSchedulingInfo = 1;
4568}
4569
4570//===----------------------------------------------------------------------===//
4571// Instructions used for emitting unwind opcodes on ARM64 Windows.
4572//===----------------------------------------------------------------------===//
4573let isPseudo = 1 in {
4574  def SEH_StackAlloc : Pseudo<(outs), (ins i32imm:$size), []>, Sched<[]>;
4575  def SEH_SaveFPLR : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>;
4576  def SEH_SaveFPLR_X : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>;
4577  def SEH_SaveReg : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
4578  def SEH_SaveReg_X : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
4579  def SEH_SaveRegP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
4580  def SEH_SaveRegP_X : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
4581  def SEH_SaveFReg : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
4582  def SEH_SaveFReg_X :  Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
4583  def SEH_SaveFRegP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
4584  def SEH_SaveFRegP_X : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
4585  def SEH_SetFP : Pseudo<(outs), (ins), []>, Sched<[]>;
4586  def SEH_AddFP : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>;
4587  def SEH_Nop : Pseudo<(outs), (ins), []>, Sched<[]>;
4588  def SEH_PrologEnd : Pseudo<(outs), (ins), []>, Sched<[]>;
4589  def SEH_EpilogStart : Pseudo<(outs), (ins), []>, Sched<[]>;
4590  def SEH_EpilogEnd : Pseudo<(outs), (ins), []>, Sched<[]>;
4591  def SEH_PACSignLR : Pseudo<(outs), (ins), []>, Sched<[]>;
4592}
4593
4594// Pseudo instructions for Windows EH
4595//===----------------------------------------------------------------------===//
4596let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1,
4597    isCodeGenOnly = 1, isReturn = 1, isEHScopeReturn = 1, isPseudo = 1 in {
4598   def CLEANUPRET : Pseudo<(outs), (ins), [(cleanupret)]>, Sched<[]>;
4599   let usesCustomInserter = 1 in
4600     def CATCHRET : Pseudo<(outs), (ins am_brcond:$dst, am_brcond:$src), [(catchret bb:$dst, bb:$src)]>,
4601                    Sched<[]>;
4602}
4603
4604// Pseudo instructions for homogeneous prolog/epilog
4605let isPseudo = 1 in {
4606  // Save CSRs in order, {FPOffset}
4607  def HOM_Prolog : Pseudo<(outs), (ins variable_ops), []>, Sched<[]>;
4608  // Restore CSRs in order
4609  def HOM_Epilog : Pseudo<(outs), (ins variable_ops), []>, Sched<[]>;
4610}
4611
4612//===----------------------------------------------------------------------===//
4613// Floating point immediate move.
4614//===----------------------------------------------------------------------===//
4615
4616let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
4617defm FMOV : FPMoveImmediate<"fmov">;
4618}
4619
4620//===----------------------------------------------------------------------===//
4621// Advanced SIMD two vector instructions.
4622//===----------------------------------------------------------------------===//
4623
4624defm UABDL   : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl",
4625                                          AArch64uabd>;
4626// Match UABDL in log2-shuffle patterns.
4627def : Pat<(abs (v8i16 (sub (zext (v8i8 V64:$opA)),
4628                           (zext (v8i8 V64:$opB))))),
4629          (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>;
4630def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))),
4631               (v8i16 (add (sub (zext (v8i8 V64:$opA)),
4632                                (zext (v8i8 V64:$opB))),
4633                           (AArch64vashr v8i16:$src, (i32 15))))),
4634          (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>;
4635def : Pat<(abs (v8i16 (sub (zext (extract_high_v16i8 (v16i8 V128:$opA))),
4636                           (zext (extract_high_v16i8 (v16i8 V128:$opB)))))),
4637          (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>;
4638def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))),
4639               (v8i16 (add (sub (zext (extract_high_v16i8 (v16i8 V128:$opA))),
4640                                (zext (extract_high_v16i8 (v16i8 V128:$opB)))),
4641                           (AArch64vashr v8i16:$src, (i32 15))))),
4642          (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>;
4643def : Pat<(abs (v4i32 (sub (zext (v4i16 V64:$opA)),
4644                           (zext (v4i16 V64:$opB))))),
4645          (UABDLv4i16_v4i32 V64:$opA, V64:$opB)>;
4646def : Pat<(abs (v4i32 (sub (zext (extract_high_v8i16 (v8i16 V128:$opA))),
4647                           (zext (extract_high_v8i16 (v8i16 V128:$opB)))))),
4648          (UABDLv8i16_v4i32 V128:$opA, V128:$opB)>;
4649def : Pat<(abs (v2i64 (sub (zext (v2i32 V64:$opA)),
4650                           (zext (v2i32 V64:$opB))))),
4651          (UABDLv2i32_v2i64 V64:$opA, V64:$opB)>;
4652def : Pat<(abs (v2i64 (sub (zext (extract_high_v4i32 (v4i32 V128:$opA))),
4653                           (zext (extract_high_v4i32 (v4i32 V128:$opB)))))),
4654          (UABDLv4i32_v2i64 V128:$opA, V128:$opB)>;
4655
4656defm ABS    : SIMDTwoVectorBHSD<0, 0b01011, "abs", abs>;
4657defm CLS    : SIMDTwoVectorBHS<0, 0b00100, "cls", int_aarch64_neon_cls>;
4658defm CLZ    : SIMDTwoVectorBHS<1, 0b00100, "clz", ctlz>;
4659defm CMEQ   : SIMDCmpTwoVector<0, 0b01001, "cmeq", AArch64cmeqz>;
4660defm CMGE   : SIMDCmpTwoVector<1, 0b01000, "cmge", AArch64cmgez>;
4661defm CMGT   : SIMDCmpTwoVector<0, 0b01000, "cmgt", AArch64cmgtz>;
4662defm CMLE   : SIMDCmpTwoVector<1, 0b01001, "cmle", AArch64cmlez>;
4663defm CMLT   : SIMDCmpTwoVector<0, 0b01010, "cmlt", AArch64cmltz>;
4664defm CNT    : SIMDTwoVectorB<0, 0b00, 0b00101, "cnt", ctpop>;
4665defm FABS   : SIMDTwoVectorFPNoException<0, 1, 0b01111, "fabs", fabs>;
4666
4667def : Pat<(v8i8 (AArch64vashr (v8i8 V64:$Rn), (i32 7))),
4668          (CMLTv8i8rz V64:$Rn)>;
4669def : Pat<(v4i16 (AArch64vashr (v4i16 V64:$Rn), (i32 15))),
4670          (CMLTv4i16rz V64:$Rn)>;
4671def : Pat<(v2i32 (AArch64vashr (v2i32 V64:$Rn), (i32 31))),
4672          (CMLTv2i32rz V64:$Rn)>;
4673def : Pat<(v16i8 (AArch64vashr (v16i8 V128:$Rn), (i32 7))),
4674          (CMLTv16i8rz V128:$Rn)>;
4675def : Pat<(v8i16 (AArch64vashr (v8i16 V128:$Rn), (i32 15))),
4676          (CMLTv8i16rz V128:$Rn)>;
4677def : Pat<(v4i32 (AArch64vashr (v4i32 V128:$Rn), (i32 31))),
4678          (CMLTv4i32rz V128:$Rn)>;
4679def : Pat<(v2i64 (AArch64vashr (v2i64 V128:$Rn), (i32 63))),
4680          (CMLTv2i64rz V128:$Rn)>;
4681
4682defm FCMEQ  : SIMDFPCmpTwoVector<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>;
4683defm FCMGE  : SIMDFPCmpTwoVector<1, 1, 0b01100, "fcmge", AArch64fcmgez>;
4684defm FCMGT  : SIMDFPCmpTwoVector<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>;
4685defm FCMLE  : SIMDFPCmpTwoVector<1, 1, 0b01101, "fcmle", AArch64fcmlez>;
4686defm FCMLT  : SIMDFPCmpTwoVector<0, 1, 0b01110, "fcmlt", AArch64fcmltz>;
4687defm FCVTAS : SIMDTwoVectorFPToInt<0,0,0b11100, "fcvtas",int_aarch64_neon_fcvtas>;
4688defm FCVTAU : SIMDTwoVectorFPToInt<1,0,0b11100, "fcvtau",int_aarch64_neon_fcvtau>;
4689defm FCVTL  : SIMDFPWidenTwoVector<0, 0, 0b10111, "fcvtl">;
4690def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (v4i16 V64:$Rn))),
4691          (FCVTLv4i16 V64:$Rn)>;
4692def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (extract_subvector (v8i16 V128:$Rn),
4693                                                                (i64 4)))),
4694          (FCVTLv8i16 V128:$Rn)>;
4695def : Pat<(v2f64 (any_fpextend (v2f32 V64:$Rn))),
4696          (FCVTLv2i32 V64:$Rn)>;
4697def : Pat<(v2f64 (any_fpextend (v2f32 (extract_high_v4f32 (v4f32 V128:$Rn))))),
4698          (FCVTLv4i32 V128:$Rn)>;
4699def : Pat<(v4f32 (any_fpextend (v4f16 V64:$Rn))),
4700          (FCVTLv4i16 V64:$Rn)>;
4701def : Pat<(v4f32 (any_fpextend (v4f16 (extract_high_v8f16 (v8f16 V128:$Rn))))),
4702          (FCVTLv8i16 V128:$Rn)>;
4703
4704defm FCVTMS : SIMDTwoVectorFPToInt<0,0,0b11011, "fcvtms",int_aarch64_neon_fcvtms>;
4705defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_aarch64_neon_fcvtmu>;
4706defm FCVTNS : SIMDTwoVectorFPToInt<0,0,0b11010, "fcvtns",int_aarch64_neon_fcvtns>;
4707defm FCVTNU : SIMDTwoVectorFPToInt<1,0,0b11010, "fcvtnu",int_aarch64_neon_fcvtnu>;
4708defm FCVTN  : SIMDFPNarrowTwoVector<0, 0, 0b10110, "fcvtn">;
4709def : Pat<(v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn))),
4710          (FCVTNv4i16 V128:$Rn)>;
4711def : Pat<(concat_vectors V64:$Rd,
4712                          (v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn)))),
4713          (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
4714def : Pat<(v2f32 (any_fpround (v2f64 V128:$Rn))),
4715          (FCVTNv2i32 V128:$Rn)>;
4716def : Pat<(v4f16 (any_fpround (v4f32 V128:$Rn))),
4717          (FCVTNv4i16 V128:$Rn)>;
4718def : Pat<(concat_vectors V64:$Rd, (v2f32 (any_fpround (v2f64 V128:$Rn)))),
4719          (FCVTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
4720def : Pat<(concat_vectors V64:$Rd, (v4f16 (any_fpround (v4f32 V128:$Rn)))),
4721          (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
4722defm FCVTPS : SIMDTwoVectorFPToInt<0,1,0b11010, "fcvtps",int_aarch64_neon_fcvtps>;
4723defm FCVTPU : SIMDTwoVectorFPToInt<1,1,0b11010, "fcvtpu",int_aarch64_neon_fcvtpu>;
4724defm FCVTXN : SIMDFPInexactCvtTwoVector<1, 0, 0b10110, "fcvtxn",
4725                                        int_aarch64_neon_fcvtxn>;
4726defm FCVTZS : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", any_fp_to_sint>;
4727defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", any_fp_to_uint>;
4728
4729// AArch64's FCVT instructions saturate when out of range.
4730multiclass SIMDTwoVectorFPToIntSatPats<SDNode to_int_sat, string INST> {
4731  let Predicates = [HasFullFP16] in {
4732  def : Pat<(v4i16 (to_int_sat v4f16:$Rn, i16)),
4733            (!cast<Instruction>(INST # v4f16) v4f16:$Rn)>;
4734  def : Pat<(v8i16 (to_int_sat v8f16:$Rn, i16)),
4735            (!cast<Instruction>(INST # v8f16) v8f16:$Rn)>;
4736  }
4737  def : Pat<(v2i32 (to_int_sat v2f32:$Rn, i32)),
4738            (!cast<Instruction>(INST # v2f32) v2f32:$Rn)>;
4739  def : Pat<(v4i32 (to_int_sat v4f32:$Rn, i32)),
4740            (!cast<Instruction>(INST # v4f32) v4f32:$Rn)>;
4741  def : Pat<(v2i64 (to_int_sat v2f64:$Rn, i64)),
4742            (!cast<Instruction>(INST # v2f64) v2f64:$Rn)>;
4743}
4744defm : SIMDTwoVectorFPToIntSatPats<fp_to_sint_sat, "FCVTZS">;
4745defm : SIMDTwoVectorFPToIntSatPats<fp_to_uint_sat, "FCVTZU">;
4746
4747def : Pat<(v4i16 (int_aarch64_neon_fcvtzs v4f16:$Rn)), (FCVTZSv4f16 $Rn)>;
4748def : Pat<(v8i16 (int_aarch64_neon_fcvtzs v8f16:$Rn)), (FCVTZSv8f16 $Rn)>;
4749def : Pat<(v2i32 (int_aarch64_neon_fcvtzs v2f32:$Rn)), (FCVTZSv2f32 $Rn)>;
4750def : Pat<(v4i32 (int_aarch64_neon_fcvtzs v4f32:$Rn)), (FCVTZSv4f32 $Rn)>;
4751def : Pat<(v2i64 (int_aarch64_neon_fcvtzs v2f64:$Rn)), (FCVTZSv2f64 $Rn)>;
4752
4753def : Pat<(v4i16 (int_aarch64_neon_fcvtzu v4f16:$Rn)), (FCVTZUv4f16 $Rn)>;
4754def : Pat<(v8i16 (int_aarch64_neon_fcvtzu v8f16:$Rn)), (FCVTZUv8f16 $Rn)>;
4755def : Pat<(v2i32 (int_aarch64_neon_fcvtzu v2f32:$Rn)), (FCVTZUv2f32 $Rn)>;
4756def : Pat<(v4i32 (int_aarch64_neon_fcvtzu v4f32:$Rn)), (FCVTZUv4f32 $Rn)>;
4757def : Pat<(v2i64 (int_aarch64_neon_fcvtzu v2f64:$Rn)), (FCVTZUv2f64 $Rn)>;
4758
4759defm FNEG   : SIMDTwoVectorFPNoException<1, 1, 0b01111, "fneg", fneg>;
4760defm FRECPE : SIMDTwoVectorFP<0, 1, 0b11101, "frecpe", int_aarch64_neon_frecpe>;
4761defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", any_fround>;
4762defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", any_fnearbyint>;
4763defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", any_ffloor>;
4764defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", any_froundeven>;
4765defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", any_fceil>;
4766defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", any_frint>;
4767defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", any_ftrunc>;
4768
4769let Predicates = [HasFRInt3264] in {
4770  defm FRINT32Z : FRIntNNTVector<0, 0, "frint32z", int_aarch64_neon_frint32z>;
4771  defm FRINT64Z : FRIntNNTVector<0, 1, "frint64z", int_aarch64_neon_frint64z>;
4772  defm FRINT32X : FRIntNNTVector<1, 0, "frint32x", int_aarch64_neon_frint32x>;
4773  defm FRINT64X : FRIntNNTVector<1, 1, "frint64x", int_aarch64_neon_frint64x>;
4774} // HasFRInt3264
4775
4776defm FRSQRTE: SIMDTwoVectorFP<1, 1, 0b11101, "frsqrte", int_aarch64_neon_frsqrte>;
4777defm FSQRT  : SIMDTwoVectorFP<1, 1, 0b11111, "fsqrt", any_fsqrt>;
4778defm NEG    : SIMDTwoVectorBHSD<1, 0b01011, "neg",
4779                               UnOpFrag<(sub immAllZerosV, node:$LHS)> >;
4780defm NOT    : SIMDTwoVectorB<1, 0b00, 0b00101, "not", vnot>;
4781// Aliases for MVN -> NOT.
4782def : InstAlias<"mvn{ $Vd.8b, $Vn.8b|.8b $Vd, $Vn}",
4783                (NOTv8i8 V64:$Vd, V64:$Vn)>;
4784def : InstAlias<"mvn{ $Vd.16b, $Vn.16b|.16b $Vd, $Vn}",
4785                (NOTv16i8 V128:$Vd, V128:$Vn)>;
4786
4787def : Pat<(vnot (v4i16 V64:$Rn)),  (NOTv8i8  V64:$Rn)>;
4788def : Pat<(vnot (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
4789def : Pat<(vnot (v2i32 V64:$Rn)),  (NOTv8i8  V64:$Rn)>;
4790def : Pat<(vnot (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
4791def : Pat<(vnot (v1i64 V64:$Rn)),  (NOTv8i8  V64:$Rn)>;
4792def : Pat<(vnot (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
4793
4794defm RBIT   : SIMDTwoVectorB<1, 0b01, 0b00101, "rbit", bitreverse>;
4795defm REV16  : SIMDTwoVectorB<0, 0b00, 0b00001, "rev16", AArch64rev16>;
4796defm REV32  : SIMDTwoVectorBH<1, 0b00000, "rev32", AArch64rev32>;
4797defm REV64  : SIMDTwoVectorBHS<0, 0b00000, "rev64", AArch64rev64>;
4798defm SADALP : SIMDLongTwoVectorTied<0, 0b00110, "sadalp",
4799       BinOpFrag<(add node:$LHS, (AArch64saddlp node:$RHS))> >;
4800defm SADDLP : SIMDLongTwoVector<0, 0b00010, "saddlp", AArch64saddlp>;
4801defm SCVTF  : SIMDTwoVectorIntToFP<0, 0, 0b11101, "scvtf", any_sint_to_fp>;
4802defm SHLL   : SIMDVectorLShiftLongBySizeBHS;
4803defm SQABS  : SIMDTwoVectorBHSD<0, 0b00111, "sqabs", int_aarch64_neon_sqabs>;
4804defm SQNEG  : SIMDTwoVectorBHSD<1, 0b00111, "sqneg", int_aarch64_neon_sqneg>;
4805defm SQXTN  : SIMDMixedTwoVector<0, 0b10100, "sqxtn", int_aarch64_neon_sqxtn>;
4806defm SQXTUN : SIMDMixedTwoVector<1, 0b10010, "sqxtun", int_aarch64_neon_sqxtun>;
4807defm SUQADD : SIMDTwoVectorBHSDTied<0, 0b00011, "suqadd",int_aarch64_neon_suqadd>;
4808defm UADALP : SIMDLongTwoVectorTied<1, 0b00110, "uadalp",
4809       BinOpFrag<(add node:$LHS, (AArch64uaddlp node:$RHS))> >;
4810defm UADDLP : SIMDLongTwoVector<1, 0b00010, "uaddlp", AArch64uaddlp>;
4811defm UCVTF  : SIMDTwoVectorIntToFP<1, 0, 0b11101, "ucvtf", any_uint_to_fp>;
4812defm UQXTN  : SIMDMixedTwoVector<1, 0b10100, "uqxtn", int_aarch64_neon_uqxtn>;
4813defm URECPE : SIMDTwoVectorS<0, 1, 0b11100, "urecpe", int_aarch64_neon_urecpe>;
4814defm URSQRTE: SIMDTwoVectorS<1, 1, 0b11100, "ursqrte", int_aarch64_neon_ursqrte>;
4815defm USQADD : SIMDTwoVectorBHSDTied<1, 0b00011, "usqadd",int_aarch64_neon_usqadd>;
4816defm XTN    : SIMDMixedTwoVector<0, 0b10010, "xtn", trunc>;
4817
4818def : Pat<(v4f16  (AArch64rev32 V64:$Rn)),  (REV32v4i16 V64:$Rn)>;
4819def : Pat<(v4f16  (AArch64rev64 V64:$Rn)),  (REV64v4i16 V64:$Rn)>;
4820def : Pat<(v4bf16 (AArch64rev32 V64:$Rn)),  (REV32v4i16 V64:$Rn)>;
4821def : Pat<(v4bf16 (AArch64rev64 V64:$Rn)),  (REV64v4i16 V64:$Rn)>;
4822def : Pat<(v8f16  (AArch64rev32 V128:$Rn)), (REV32v8i16 V128:$Rn)>;
4823def : Pat<(v8f16  (AArch64rev64 V128:$Rn)), (REV64v8i16 V128:$Rn)>;
4824def : Pat<(v8bf16 (AArch64rev32 V128:$Rn)), (REV32v8i16 V128:$Rn)>;
4825def : Pat<(v8bf16 (AArch64rev64 V128:$Rn)), (REV64v8i16 V128:$Rn)>;
4826def : Pat<(v2f32  (AArch64rev64 V64:$Rn)),  (REV64v2i32 V64:$Rn)>;
4827def : Pat<(v4f32  (AArch64rev64 V128:$Rn)), (REV64v4i32 V128:$Rn)>;
4828
4829// Patterns for vector long shift (by element width). These need to match all
4830// three of zext, sext and anyext so it's easier to pull the patterns out of the
4831// definition.
4832multiclass SIMDVectorLShiftLongBySizeBHSPats<SDPatternOperator ext> {
4833  def : Pat<(AArch64vshl (v8i16 (ext (v8i8 V64:$Rn))), (i32 8)),
4834            (SHLLv8i8 V64:$Rn)>;
4835  def : Pat<(AArch64vshl (v8i16 (ext (extract_high_v16i8 (v16i8 V128:$Rn)))), (i32 8)),
4836            (SHLLv16i8 V128:$Rn)>;
4837  def : Pat<(AArch64vshl (v4i32 (ext (v4i16 V64:$Rn))), (i32 16)),
4838            (SHLLv4i16 V64:$Rn)>;
4839  def : Pat<(AArch64vshl (v4i32 (ext (extract_high_v8i16 (v8i16 V128:$Rn)))), (i32 16)),
4840            (SHLLv8i16 V128:$Rn)>;
4841  def : Pat<(AArch64vshl (v2i64 (ext (v2i32 V64:$Rn))), (i32 32)),
4842            (SHLLv2i32 V64:$Rn)>;
4843  def : Pat<(AArch64vshl (v2i64 (ext (extract_high_v4i32 (v4i32 V128:$Rn)))), (i32 32)),
4844            (SHLLv4i32 V128:$Rn)>;
4845}
4846
4847defm : SIMDVectorLShiftLongBySizeBHSPats<anyext>;
4848defm : SIMDVectorLShiftLongBySizeBHSPats<zext>;
4849defm : SIMDVectorLShiftLongBySizeBHSPats<sext>;
4850
4851// Constant vector values, used in the S/UQXTN patterns below.
4852def VImmFF:   PatLeaf<(AArch64NvCast (v2i64 (AArch64movi_edit (i32 85))))>;
4853def VImmFFFF: PatLeaf<(AArch64NvCast (v2i64 (AArch64movi_edit (i32 51))))>;
4854def VImm7F:   PatLeaf<(AArch64movi_shift (i32 127), (i32 0))>;
4855def VImm80:   PatLeaf<(AArch64mvni_shift (i32 127), (i32 0))>;
4856def VImm7FFF: PatLeaf<(AArch64movi_msl (i32 127), (i32 264))>;
4857def VImm8000: PatLeaf<(AArch64mvni_msl (i32 127), (i32 264))>;
4858
4859// trunc(umin(X, 255)) -> UQXTRN v8i8
4860def : Pat<(v8i8 (trunc (umin (v8i16 V128:$Vn), (v8i16 VImmFF)))),
4861          (UQXTNv8i8 V128:$Vn)>;
4862// trunc(umin(X, 65535)) -> UQXTRN v4i16
4863def : Pat<(v4i16 (trunc (umin (v4i32 V128:$Vn), (v4i32 VImmFFFF)))),
4864          (UQXTNv4i16 V128:$Vn)>;
4865// trunc(smin(smax(X, -128), 128)) -> SQXTRN
4866//  with reversed min/max
4867def : Pat<(v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)),
4868                             (v8i16 VImm7F)))),
4869          (SQXTNv8i8 V128:$Vn)>;
4870def : Pat<(v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)),
4871                             (v8i16 VImm80)))),
4872          (SQXTNv8i8 V128:$Vn)>;
4873// trunc(smin(smax(X, -32768), 32767)) -> SQXTRN
4874//  with reversed min/max
4875def : Pat<(v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)),
4876                              (v4i32 VImm7FFF)))),
4877          (SQXTNv4i16 V128:$Vn)>;
4878def : Pat<(v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)),
4879                              (v4i32 VImm8000)))),
4880          (SQXTNv4i16 V128:$Vn)>;
4881
4882// concat_vectors(Vd, trunc(smin(smax Vm, -128), 127) ~> SQXTN2(Vd, Vn)
4883// with reversed min/max
4884def : Pat<(v16i8 (concat_vectors
4885                 (v8i8 V64:$Vd),
4886                 (v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)),
4887                                          (v8i16 VImm7F)))))),
4888          (SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
4889def : Pat<(v16i8 (concat_vectors
4890                 (v8i8 V64:$Vd),
4891                 (v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)),
4892                                          (v8i16 VImm80)))))),
4893          (SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
4894
4895// concat_vectors(Vd, trunc(smin(smax Vm, -32768), 32767) ~> SQXTN2(Vd, Vn)
4896// with reversed min/max
4897def : Pat<(v8i16 (concat_vectors
4898                 (v4i16 V64:$Vd),
4899                 (v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)),
4900                                           (v4i32 VImm7FFF)))))),
4901          (SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
4902def : Pat<(v8i16 (concat_vectors
4903                 (v4i16 V64:$Vd),
4904                 (v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)),
4905                                           (v4i32 VImm8000)))))),
4906          (SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
4907
4908//===----------------------------------------------------------------------===//
4909// Advanced SIMD three vector instructions.
4910//===----------------------------------------------------------------------===//
4911
4912defm ADD     : SIMDThreeSameVector<0, 0b10000, "add", add>;
4913defm ADDP    : SIMDThreeSameVector<0, 0b10111, "addp", AArch64addp>;
4914defm CMEQ    : SIMDThreeSameVector<1, 0b10001, "cmeq", AArch64cmeq>;
4915defm CMGE    : SIMDThreeSameVector<0, 0b00111, "cmge", AArch64cmge>;
4916defm CMGT    : SIMDThreeSameVector<0, 0b00110, "cmgt", AArch64cmgt>;
4917defm CMHI    : SIMDThreeSameVector<1, 0b00110, "cmhi", AArch64cmhi>;
4918defm CMHS    : SIMDThreeSameVector<1, 0b00111, "cmhs", AArch64cmhs>;
4919defm CMTST   : SIMDThreeSameVector<0, 0b10001, "cmtst", AArch64cmtst>;
4920foreach VT = [ v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64 ] in {
4921def : Pat<(vnot (AArch64cmeqz VT:$Rn)), (!cast<Instruction>("CMTST"#VT) VT:$Rn, VT:$Rn)>;
4922}
4923defm FABD    : SIMDThreeSameVectorFP<1,1,0b010,"fabd", int_aarch64_neon_fabd>;
4924let Predicates = [HasNEON] in {
4925foreach VT = [ v2f32, v4f32, v2f64 ] in
4926def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast<Instruction>("FABD"#VT) VT:$Rn, VT:$Rm)>;
4927}
4928let Predicates = [HasNEON, HasFullFP16] in {
4929foreach VT = [ v4f16, v8f16 ] in
4930def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast<Instruction>("FABD"#VT) VT:$Rn, VT:$Rm)>;
4931}
4932defm FACGE   : SIMDThreeSameVectorFPCmp<1,0,0b101,"facge",AArch64facge>;
4933defm FACGT   : SIMDThreeSameVectorFPCmp<1,1,0b101,"facgt",AArch64facgt>;
4934defm FADDP   : SIMDThreeSameVectorFP<1,0,0b010,"faddp", AArch64faddp>;
4935defm FADD    : SIMDThreeSameVectorFP<0,0,0b010,"fadd", any_fadd>;
4936defm FCMEQ   : SIMDThreeSameVectorFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>;
4937defm FCMGE   : SIMDThreeSameVectorFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>;
4938defm FCMGT   : SIMDThreeSameVectorFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>;
4939defm FDIV    : SIMDThreeSameVectorFP<1,0,0b111,"fdiv", any_fdiv>;
4940defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b000,"fmaxnmp", int_aarch64_neon_fmaxnmp>;
4941defm FMAXNM  : SIMDThreeSameVectorFP<0,0,0b000,"fmaxnm", any_fmaxnum>;
4942defm FMAXP   : SIMDThreeSameVectorFP<1,0,0b110,"fmaxp", int_aarch64_neon_fmaxp>;
4943defm FMAX    : SIMDThreeSameVectorFP<0,0,0b110,"fmax", any_fmaximum>;
4944defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b000,"fminnmp", int_aarch64_neon_fminnmp>;
4945defm FMINNM  : SIMDThreeSameVectorFP<0,1,0b000,"fminnm", any_fminnum>;
4946defm FMINP   : SIMDThreeSameVectorFP<1,1,0b110,"fminp", int_aarch64_neon_fminp>;
4947defm FMIN    : SIMDThreeSameVectorFP<0,1,0b110,"fmin", any_fminimum>;
4948
4949// NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the
4950// instruction expects the addend first, while the fma intrinsic puts it last.
4951defm FMLA     : SIMDThreeSameVectorFPTied<0, 0, 0b001, "fmla",
4952            TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)> >;
4953defm FMLS     : SIMDThreeSameVectorFPTied<0, 1, 0b001, "fmls",
4954            TriOpFrag<(any_fma node:$MHS, (fneg node:$RHS), node:$LHS)> >;
4955
4956defm FMULX    : SIMDThreeSameVectorFP<0,0,0b011,"fmulx", int_aarch64_neon_fmulx>;
4957defm FMUL     : SIMDThreeSameVectorFP<1,0,0b011,"fmul", any_fmul>;
4958defm FRECPS   : SIMDThreeSameVectorFP<0,0,0b111,"frecps", int_aarch64_neon_frecps>;
4959defm FRSQRTS  : SIMDThreeSameVectorFP<0,1,0b111,"frsqrts", int_aarch64_neon_frsqrts>;
4960defm FSUB     : SIMDThreeSameVectorFP<0,1,0b010,"fsub", any_fsub>;
4961
4962// MLA and MLS are generated in MachineCombine
4963defm MLA      : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla", null_frag>;
4964defm MLS      : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls", null_frag>;
4965
4966defm MUL      : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>;
4967defm PMUL     : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>;
4968defm SABA     : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba",
4969      TriOpFrag<(add node:$LHS, (AArch64sabd node:$MHS, node:$RHS))> >;
4970defm SABD     : SIMDThreeSameVectorBHS<0,0b01110,"sabd", AArch64sabd>;
4971defm SHADD    : SIMDThreeSameVectorBHS<0,0b00000,"shadd", avgfloors>;
4972defm SHSUB    : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>;
4973defm SMAXP    : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>;
4974defm SMAX     : SIMDThreeSameVectorBHS<0,0b01100,"smax", smax>;
4975defm SMINP    : SIMDThreeSameVectorBHS<0,0b10101,"sminp", int_aarch64_neon_sminp>;
4976defm SMIN     : SIMDThreeSameVectorBHS<0,0b01101,"smin", smin>;
4977defm SQADD    : SIMDThreeSameVector<0,0b00001,"sqadd", int_aarch64_neon_sqadd>;
4978defm SQDMULH  : SIMDThreeSameVectorHS<0,0b10110,"sqdmulh",int_aarch64_neon_sqdmulh>;
4979defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_aarch64_neon_sqrdmulh>;
4980defm SQRSHL   : SIMDThreeSameVector<0,0b01011,"sqrshl", int_aarch64_neon_sqrshl>;
4981defm SQSHL    : SIMDThreeSameVector<0,0b01001,"sqshl", int_aarch64_neon_sqshl>;
4982defm SQSUB    : SIMDThreeSameVector<0,0b00101,"sqsub", int_aarch64_neon_sqsub>;
4983defm SRHADD   : SIMDThreeSameVectorBHS<0,0b00010,"srhadd", avgceils>;
4984defm SRSHL    : SIMDThreeSameVector<0,0b01010,"srshl", int_aarch64_neon_srshl>;
4985defm SSHL     : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>;
4986defm SUB      : SIMDThreeSameVector<1,0b10000,"sub", sub>;
4987defm UABA     : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba",
4988      TriOpFrag<(add node:$LHS, (AArch64uabd node:$MHS, node:$RHS))> >;
4989defm UABD     : SIMDThreeSameVectorBHS<1,0b01110,"uabd", AArch64uabd>;
4990defm UHADD    : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", avgflooru>;
4991defm UHSUB    : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>;
4992defm UMAXP    : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>;
4993defm UMAX     : SIMDThreeSameVectorBHS<1,0b01100,"umax", umax>;
4994defm UMINP    : SIMDThreeSameVectorBHS<1,0b10101,"uminp", int_aarch64_neon_uminp>;
4995defm UMIN     : SIMDThreeSameVectorBHS<1,0b01101,"umin", umin>;
4996defm UQADD    : SIMDThreeSameVector<1,0b00001,"uqadd", int_aarch64_neon_uqadd>;
4997defm UQRSHL   : SIMDThreeSameVector<1,0b01011,"uqrshl", int_aarch64_neon_uqrshl>;
4998defm UQSHL    : SIMDThreeSameVector<1,0b01001,"uqshl", int_aarch64_neon_uqshl>;
4999defm UQSUB    : SIMDThreeSameVector<1,0b00101,"uqsub", int_aarch64_neon_uqsub>;
5000defm URHADD   : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", avgceilu>;
5001defm URSHL    : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>;
5002defm USHL     : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>;
5003defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah",
5004                                                  int_aarch64_neon_sqrdmlah>;
5005defm SQRDMLSH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10001,"sqrdmlsh",
5006                                                    int_aarch64_neon_sqrdmlsh>;
5007
5008// Extra saturate patterns, other than the intrinsics matches above
5009defm : SIMDThreeSameVectorExtraPatterns<"SQADD", saddsat>;
5010defm : SIMDThreeSameVectorExtraPatterns<"UQADD", uaddsat>;
5011defm : SIMDThreeSameVectorExtraPatterns<"SQSUB", ssubsat>;
5012defm : SIMDThreeSameVectorExtraPatterns<"UQSUB", usubsat>;
5013
5014defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>;
5015defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic",
5016                                  BinOpFrag<(and node:$LHS, (vnot node:$RHS))> >;
5017defm EOR : SIMDLogicalThreeVector<1, 0b00, "eor", xor>;
5018defm ORN : SIMDLogicalThreeVector<0, 0b11, "orn",
5019                                  BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >;
5020defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>;
5021
5022// Pseudo bitwise select pattern BSP.
5023// It is expanded into BSL/BIT/BIF after register allocation.
5024defm BSP : SIMDLogicalThreeVectorPseudo<TriOpFrag<(or (and node:$LHS, node:$MHS),
5025                                                      (and (vnot node:$LHS), node:$RHS))>>;
5026defm BSL : SIMDLogicalThreeVectorTied<1, 0b01, "bsl">;
5027defm BIT : SIMDLogicalThreeVectorTied<1, 0b10, "bit", AArch64bit>;
5028defm BIF : SIMDLogicalThreeVectorTied<1, 0b11, "bif">;
5029
5030def : Pat<(AArch64bsp (v8i8 V64:$Rd), V64:$Rn, V64:$Rm),
5031          (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
5032def : Pat<(AArch64bsp (v4i16 V64:$Rd), V64:$Rn, V64:$Rm),
5033          (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
5034def : Pat<(AArch64bsp (v2i32 V64:$Rd), V64:$Rn, V64:$Rm),
5035          (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
5036def : Pat<(AArch64bsp (v1i64 V64:$Rd), V64:$Rn, V64:$Rm),
5037          (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
5038
5039def : Pat<(AArch64bsp (v16i8 V128:$Rd), V128:$Rn, V128:$Rm),
5040          (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
5041def : Pat<(AArch64bsp (v8i16 V128:$Rd), V128:$Rn, V128:$Rm),
5042          (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
5043def : Pat<(AArch64bsp (v4i32 V128:$Rd), V128:$Rn, V128:$Rm),
5044          (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
5045def : Pat<(AArch64bsp (v2i64 V128:$Rd), V128:$Rn, V128:$Rm),
5046          (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
5047
5048def : InstAlias<"mov{\t$dst.16b, $src.16b|.16b\t$dst, $src}",
5049                (ORRv16i8 V128:$dst, V128:$src, V128:$src), 1>;
5050def : InstAlias<"mov{\t$dst.8h, $src.8h|.8h\t$dst, $src}",
5051                (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>;
5052def : InstAlias<"mov{\t$dst.4s, $src.4s|.4s\t$dst, $src}",
5053                (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>;
5054def : InstAlias<"mov{\t$dst.2d, $src.2d|.2d\t$dst, $src}",
5055                (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>;
5056
5057def : InstAlias<"mov{\t$dst.8b, $src.8b|.8b\t$dst, $src}",
5058                (ORRv8i8 V64:$dst, V64:$src, V64:$src), 1>;
5059def : InstAlias<"mov{\t$dst.4h, $src.4h|.4h\t$dst, $src}",
5060                (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>;
5061def : InstAlias<"mov{\t$dst.2s, $src.2s|.2s\t$dst, $src}",
5062                (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>;
5063def : InstAlias<"mov{\t$dst.1d, $src.1d|.1d\t$dst, $src}",
5064                (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>;
5065
5066def : InstAlias<"{cmls\t$dst.8b, $src1.8b, $src2.8b" #
5067                "|cmls.8b\t$dst, $src1, $src2}",
5068                (CMHSv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
5069def : InstAlias<"{cmls\t$dst.16b, $src1.16b, $src2.16b" #
5070                "|cmls.16b\t$dst, $src1, $src2}",
5071                (CMHSv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
5072def : InstAlias<"{cmls\t$dst.4h, $src1.4h, $src2.4h" #
5073                "|cmls.4h\t$dst, $src1, $src2}",
5074                (CMHSv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
5075def : InstAlias<"{cmls\t$dst.8h, $src1.8h, $src2.8h" #
5076                "|cmls.8h\t$dst, $src1, $src2}",
5077                (CMHSv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
5078def : InstAlias<"{cmls\t$dst.2s, $src1.2s, $src2.2s" #
5079                "|cmls.2s\t$dst, $src1, $src2}",
5080                (CMHSv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
5081def : InstAlias<"{cmls\t$dst.4s, $src1.4s, $src2.4s" #
5082                "|cmls.4s\t$dst, $src1, $src2}",
5083                (CMHSv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
5084def : InstAlias<"{cmls\t$dst.2d, $src1.2d, $src2.2d" #
5085                "|cmls.2d\t$dst, $src1, $src2}",
5086                (CMHSv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
5087
5088def : InstAlias<"{cmlo\t$dst.8b, $src1.8b, $src2.8b" #
5089                "|cmlo.8b\t$dst, $src1, $src2}",
5090                (CMHIv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
5091def : InstAlias<"{cmlo\t$dst.16b, $src1.16b, $src2.16b" #
5092                "|cmlo.16b\t$dst, $src1, $src2}",
5093                (CMHIv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
5094def : InstAlias<"{cmlo\t$dst.4h, $src1.4h, $src2.4h" #
5095                "|cmlo.4h\t$dst, $src1, $src2}",
5096                (CMHIv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
5097def : InstAlias<"{cmlo\t$dst.8h, $src1.8h, $src2.8h" #
5098                "|cmlo.8h\t$dst, $src1, $src2}",
5099                (CMHIv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
5100def : InstAlias<"{cmlo\t$dst.2s, $src1.2s, $src2.2s" #
5101                "|cmlo.2s\t$dst, $src1, $src2}",
5102                (CMHIv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
5103def : InstAlias<"{cmlo\t$dst.4s, $src1.4s, $src2.4s" #
5104                "|cmlo.4s\t$dst, $src1, $src2}",
5105                (CMHIv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
5106def : InstAlias<"{cmlo\t$dst.2d, $src1.2d, $src2.2d" #
5107                "|cmlo.2d\t$dst, $src1, $src2}",
5108                (CMHIv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
5109
5110def : InstAlias<"{cmle\t$dst.8b, $src1.8b, $src2.8b" #
5111                "|cmle.8b\t$dst, $src1, $src2}",
5112                (CMGEv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
5113def : InstAlias<"{cmle\t$dst.16b, $src1.16b, $src2.16b" #
5114                "|cmle.16b\t$dst, $src1, $src2}",
5115                (CMGEv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
5116def : InstAlias<"{cmle\t$dst.4h, $src1.4h, $src2.4h" #
5117                "|cmle.4h\t$dst, $src1, $src2}",
5118                (CMGEv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
5119def : InstAlias<"{cmle\t$dst.8h, $src1.8h, $src2.8h" #
5120                "|cmle.8h\t$dst, $src1, $src2}",
5121                (CMGEv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
5122def : InstAlias<"{cmle\t$dst.2s, $src1.2s, $src2.2s" #
5123                "|cmle.2s\t$dst, $src1, $src2}",
5124                (CMGEv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
5125def : InstAlias<"{cmle\t$dst.4s, $src1.4s, $src2.4s" #
5126                "|cmle.4s\t$dst, $src1, $src2}",
5127                (CMGEv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
5128def : InstAlias<"{cmle\t$dst.2d, $src1.2d, $src2.2d" #
5129                "|cmle.2d\t$dst, $src1, $src2}",
5130                (CMGEv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
5131
5132def : InstAlias<"{cmlt\t$dst.8b, $src1.8b, $src2.8b" #
5133                "|cmlt.8b\t$dst, $src1, $src2}",
5134                (CMGTv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
5135def : InstAlias<"{cmlt\t$dst.16b, $src1.16b, $src2.16b" #
5136                "|cmlt.16b\t$dst, $src1, $src2}",
5137                (CMGTv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
5138def : InstAlias<"{cmlt\t$dst.4h, $src1.4h, $src2.4h" #
5139                "|cmlt.4h\t$dst, $src1, $src2}",
5140                (CMGTv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
5141def : InstAlias<"{cmlt\t$dst.8h, $src1.8h, $src2.8h" #
5142                "|cmlt.8h\t$dst, $src1, $src2}",
5143                (CMGTv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
5144def : InstAlias<"{cmlt\t$dst.2s, $src1.2s, $src2.2s" #
5145                "|cmlt.2s\t$dst, $src1, $src2}",
5146                (CMGTv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
5147def : InstAlias<"{cmlt\t$dst.4s, $src1.4s, $src2.4s" #
5148                "|cmlt.4s\t$dst, $src1, $src2}",
5149                (CMGTv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
5150def : InstAlias<"{cmlt\t$dst.2d, $src1.2d, $src2.2d" #
5151                "|cmlt.2d\t$dst, $src1, $src2}",
5152                (CMGTv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
5153
5154let Predicates = [HasNEON, HasFullFP16] in {
5155def : InstAlias<"{fcmle\t$dst.4h, $src1.4h, $src2.4h" #
5156                "|fcmle.4h\t$dst, $src1, $src2}",
5157                (FCMGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
5158def : InstAlias<"{fcmle\t$dst.8h, $src1.8h, $src2.8h" #
5159                "|fcmle.8h\t$dst, $src1, $src2}",
5160                (FCMGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
5161}
5162def : InstAlias<"{fcmle\t$dst.2s, $src1.2s, $src2.2s" #
5163                "|fcmle.2s\t$dst, $src1, $src2}",
5164                (FCMGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
5165def : InstAlias<"{fcmle\t$dst.4s, $src1.4s, $src2.4s" #
5166                "|fcmle.4s\t$dst, $src1, $src2}",
5167                (FCMGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
5168def : InstAlias<"{fcmle\t$dst.2d, $src1.2d, $src2.2d" #
5169                "|fcmle.2d\t$dst, $src1, $src2}",
5170                (FCMGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
5171
5172let Predicates = [HasNEON, HasFullFP16] in {
5173def : InstAlias<"{fcmlt\t$dst.4h, $src1.4h, $src2.4h" #
5174                "|fcmlt.4h\t$dst, $src1, $src2}",
5175                (FCMGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
5176def : InstAlias<"{fcmlt\t$dst.8h, $src1.8h, $src2.8h" #
5177                "|fcmlt.8h\t$dst, $src1, $src2}",
5178                (FCMGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
5179}
5180def : InstAlias<"{fcmlt\t$dst.2s, $src1.2s, $src2.2s" #
5181                "|fcmlt.2s\t$dst, $src1, $src2}",
5182                (FCMGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
5183def : InstAlias<"{fcmlt\t$dst.4s, $src1.4s, $src2.4s" #
5184                "|fcmlt.4s\t$dst, $src1, $src2}",
5185                (FCMGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
5186def : InstAlias<"{fcmlt\t$dst.2d, $src1.2d, $src2.2d" #
5187                "|fcmlt.2d\t$dst, $src1, $src2}",
5188                (FCMGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
5189
5190let Predicates = [HasNEON, HasFullFP16] in {
5191def : InstAlias<"{facle\t$dst.4h, $src1.4h, $src2.4h" #
5192                "|facle.4h\t$dst, $src1, $src2}",
5193                (FACGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
5194def : InstAlias<"{facle\t$dst.8h, $src1.8h, $src2.8h" #
5195                "|facle.8h\t$dst, $src1, $src2}",
5196                (FACGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
5197}
5198def : InstAlias<"{facle\t$dst.2s, $src1.2s, $src2.2s" #
5199                "|facle.2s\t$dst, $src1, $src2}",
5200                (FACGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
5201def : InstAlias<"{facle\t$dst.4s, $src1.4s, $src2.4s" #
5202                "|facle.4s\t$dst, $src1, $src2}",
5203                (FACGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
5204def : InstAlias<"{facle\t$dst.2d, $src1.2d, $src2.2d" #
5205                "|facle.2d\t$dst, $src1, $src2}",
5206                (FACGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
5207
5208let Predicates = [HasNEON, HasFullFP16] in {
5209def : InstAlias<"{faclt\t$dst.4h, $src1.4h, $src2.4h" #
5210                "|faclt.4h\t$dst, $src1, $src2}",
5211                (FACGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
5212def : InstAlias<"{faclt\t$dst.8h, $src1.8h, $src2.8h" #
5213                "|faclt.8h\t$dst, $src1, $src2}",
5214                (FACGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
5215}
5216def : InstAlias<"{faclt\t$dst.2s, $src1.2s, $src2.2s" #
5217                "|faclt.2s\t$dst, $src1, $src2}",
5218                (FACGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
5219def : InstAlias<"{faclt\t$dst.4s, $src1.4s, $src2.4s" #
5220                "|faclt.4s\t$dst, $src1, $src2}",
5221                (FACGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
5222def : InstAlias<"{faclt\t$dst.2d, $src1.2d, $src2.2d" #
5223                "|faclt.2d\t$dst, $src1, $src2}",
5224                (FACGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
5225
5226//===----------------------------------------------------------------------===//
5227// Advanced SIMD three scalar instructions.
5228//===----------------------------------------------------------------------===//
5229
5230defm ADD      : SIMDThreeScalarD<0, 0b10000, "add", add>;
5231defm CMEQ     : SIMDThreeScalarD<1, 0b10001, "cmeq", AArch64cmeq>;
5232defm CMGE     : SIMDThreeScalarD<0, 0b00111, "cmge", AArch64cmge>;
5233defm CMGT     : SIMDThreeScalarD<0, 0b00110, "cmgt", AArch64cmgt>;
5234defm CMHI     : SIMDThreeScalarD<1, 0b00110, "cmhi", AArch64cmhi>;
5235defm CMHS     : SIMDThreeScalarD<1, 0b00111, "cmhs", AArch64cmhs>;
5236defm CMTST    : SIMDThreeScalarD<0, 0b10001, "cmtst", AArch64cmtst>;
5237defm FABD     : SIMDFPThreeScalar<1, 1, 0b010, "fabd", int_aarch64_sisd_fabd>;
5238def : Pat<(v1f64 (int_aarch64_neon_fabd (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
5239          (FABD64 FPR64:$Rn, FPR64:$Rm)>;
5240let Predicates = [HasNEON, HasFullFP16] in {
5241def : Pat<(fabs (fsub f16:$Rn, f16:$Rm)), (FABD16 f16:$Rn, f16:$Rm)>;
5242}
5243let Predicates = [HasNEON] in {
5244def : Pat<(fabs (fsub f32:$Rn, f32:$Rm)), (FABD32 f32:$Rn, f32:$Rm)>;
5245def : Pat<(fabs (fsub f64:$Rn, f64:$Rm)), (FABD64 f64:$Rn, f64:$Rm)>;
5246}
5247defm FACGE    : SIMDThreeScalarFPCmp<1, 0, 0b101, "facge",
5248                                     int_aarch64_neon_facge>;
5249defm FACGT    : SIMDThreeScalarFPCmp<1, 1, 0b101, "facgt",
5250                                     int_aarch64_neon_facgt>;
5251defm FCMEQ    : SIMDThreeScalarFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>;
5252defm FCMGE    : SIMDThreeScalarFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>;
5253defm FCMGT    : SIMDThreeScalarFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>;
5254defm FMULX    : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx, HasNEONorSME>;
5255defm FRECPS   : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps, HasNEONorSME>;
5256defm FRSQRTS  : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts, HasNEONorSME>;
5257defm SQADD    : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>;
5258defm SQDMULH  : SIMDThreeScalarHS<  0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>;
5259defm SQRDMULH : SIMDThreeScalarHS<  1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
5260defm SQRSHL   : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl",int_aarch64_neon_sqrshl>;
5261defm SQSHL    : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", int_aarch64_neon_sqshl>;
5262defm SQSUB    : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", int_aarch64_neon_sqsub>;
5263defm SRSHL    : SIMDThreeScalarD<   0, 0b01010, "srshl", int_aarch64_neon_srshl>;
5264defm SSHL     : SIMDThreeScalarD<   0, 0b01000, "sshl", int_aarch64_neon_sshl>;
5265defm SUB      : SIMDThreeScalarD<   1, 0b10000, "sub", sub>;
5266defm UQADD    : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", int_aarch64_neon_uqadd>;
5267defm UQRSHL   : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl",int_aarch64_neon_uqrshl>;
5268defm UQSHL    : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl>;
5269defm UQSUB    : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub>;
5270defm URSHL    : SIMDThreeScalarD<   1, 0b01010, "urshl", int_aarch64_neon_urshl>;
5271defm USHL     : SIMDThreeScalarD<   1, 0b01000, "ushl", int_aarch64_neon_ushl>;
5272let Predicates = [HasRDM] in {
5273  defm SQRDMLAH : SIMDThreeScalarHSTied<1, 0, 0b10000, "sqrdmlah">;
5274  defm SQRDMLSH : SIMDThreeScalarHSTied<1, 0, 0b10001, "sqrdmlsh">;
5275  def : Pat<(i32 (int_aarch64_neon_sqrdmlah (i32 FPR32:$Rd), (i32 FPR32:$Rn),
5276                                            (i32 FPR32:$Rm))),
5277            (SQRDMLAHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>;
5278  def : Pat<(i32 (int_aarch64_neon_sqrdmlsh (i32 FPR32:$Rd), (i32 FPR32:$Rn),
5279                                            (i32 FPR32:$Rm))),
5280            (SQRDMLSHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>;
5281}
5282
5283defm : FMULScalarFromIndexedLane0Patterns<"FMULX", "16", "32", "64",
5284                                          int_aarch64_neon_fmulx,
5285                                          [HasNEONorSME]>;
5286
5287def : InstAlias<"cmls $dst, $src1, $src2",
5288                (CMHSv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
5289def : InstAlias<"cmle $dst, $src1, $src2",
5290                (CMGEv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
5291def : InstAlias<"cmlo $dst, $src1, $src2",
5292                (CMHIv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
5293def : InstAlias<"cmlt $dst, $src1, $src2",
5294                (CMGTv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
5295def : InstAlias<"fcmle $dst, $src1, $src2",
5296                (FCMGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
5297def : InstAlias<"fcmle $dst, $src1, $src2",
5298                (FCMGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
5299def : InstAlias<"fcmlt $dst, $src1, $src2",
5300                (FCMGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
5301def : InstAlias<"fcmlt $dst, $src1, $src2",
5302                (FCMGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
5303def : InstAlias<"facle $dst, $src1, $src2",
5304                (FACGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
5305def : InstAlias<"facle $dst, $src1, $src2",
5306                (FACGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
5307def : InstAlias<"faclt $dst, $src1, $src2",
5308                (FACGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
5309def : InstAlias<"faclt $dst, $src1, $src2",
5310                (FACGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
5311
5312//===----------------------------------------------------------------------===//
5313// Advanced SIMD three scalar instructions (mixed operands).
5314//===----------------------------------------------------------------------===//
5315defm SQDMULL  : SIMDThreeScalarMixedHS<0, 0b11010, "sqdmull",
5316                                       int_aarch64_neon_sqdmulls_scalar>;
5317defm SQDMLAL  : SIMDThreeScalarMixedTiedHS<0, 0b10010, "sqdmlal">;
5318defm SQDMLSL  : SIMDThreeScalarMixedTiedHS<0, 0b10110, "sqdmlsl">;
5319
5320def : Pat<(i64 (int_aarch64_neon_sqadd (i64 FPR64:$Rd),
5321                   (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
5322                                                        (i32 FPR32:$Rm))))),
5323          (SQDMLALi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>;
5324def : Pat<(i64 (int_aarch64_neon_sqsub (i64 FPR64:$Rd),
5325                   (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
5326                                                        (i32 FPR32:$Rm))))),
5327          (SQDMLSLi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>;
5328
5329//===----------------------------------------------------------------------===//
5330// Advanced SIMD two scalar instructions.
5331//===----------------------------------------------------------------------===//
5332
5333defm ABS    : SIMDTwoScalarD<    0, 0b01011, "abs", abs, [HasNoCSSC]>;
5334defm CMEQ   : SIMDCmpTwoScalarD< 0, 0b01001, "cmeq", AArch64cmeqz>;
5335defm CMGE   : SIMDCmpTwoScalarD< 1, 0b01000, "cmge", AArch64cmgez>;
5336defm CMGT   : SIMDCmpTwoScalarD< 0, 0b01000, "cmgt", AArch64cmgtz>;
5337defm CMLE   : SIMDCmpTwoScalarD< 1, 0b01001, "cmle", AArch64cmlez>;
5338defm CMLT   : SIMDCmpTwoScalarD< 0, 0b01010, "cmlt", AArch64cmltz>;
5339defm FCMEQ  : SIMDFPCmpTwoScalar<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>;
5340defm FCMGE  : SIMDFPCmpTwoScalar<1, 1, 0b01100, "fcmge", AArch64fcmgez>;
5341defm FCMGT  : SIMDFPCmpTwoScalar<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>;
5342defm FCMLE  : SIMDFPCmpTwoScalar<1, 1, 0b01101, "fcmle", AArch64fcmlez>;
5343defm FCMLT  : SIMDFPCmpTwoScalar<0, 1, 0b01110, "fcmlt", AArch64fcmltz>;
5344defm FCVTAS : SIMDFPTwoScalar<   0, 0, 0b11100, "fcvtas">;
5345defm FCVTAU : SIMDFPTwoScalar<   1, 0, 0b11100, "fcvtau">;
5346defm FCVTMS : SIMDFPTwoScalar<   0, 0, 0b11011, "fcvtms">;
5347defm FCVTMU : SIMDFPTwoScalar<   1, 0, 0b11011, "fcvtmu">;
5348defm FCVTNS : SIMDFPTwoScalar<   0, 0, 0b11010, "fcvtns">;
5349defm FCVTNU : SIMDFPTwoScalar<   1, 0, 0b11010, "fcvtnu">;
5350defm FCVTPS : SIMDFPTwoScalar<   0, 1, 0b11010, "fcvtps">;
5351defm FCVTPU : SIMDFPTwoScalar<   1, 1, 0b11010, "fcvtpu">;
5352def  FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">;
5353defm FCVTZS : SIMDFPTwoScalar<   0, 1, 0b11011, "fcvtzs">;
5354defm FCVTZU : SIMDFPTwoScalar<   1, 1, 0b11011, "fcvtzu">;
5355defm FRECPE : SIMDFPTwoScalar<   0, 1, 0b11101, "frecpe", HasNEONorSME>;
5356defm FRECPX : SIMDFPTwoScalar<   0, 1, 0b11111, "frecpx", HasNEONorSME>;
5357defm FRSQRTE : SIMDFPTwoScalar<  1, 1, 0b11101, "frsqrte", HasNEONorSME>;
5358defm NEG    : SIMDTwoScalarD<    1, 0b01011, "neg",
5359                                 UnOpFrag<(sub immAllZerosV, node:$LHS)> >;
5360defm SCVTF  : SIMDFPTwoScalarCVT<   0, 0, 0b11101, "scvtf", AArch64sitof>;
5361defm SQABS  : SIMDTwoScalarBHSD< 0, 0b00111, "sqabs", int_aarch64_neon_sqabs>;
5362defm SQNEG  : SIMDTwoScalarBHSD< 1, 0b00111, "sqneg", int_aarch64_neon_sqneg>;
5363defm SQXTN  : SIMDTwoScalarMixedBHS< 0, 0b10100, "sqxtn", int_aarch64_neon_scalar_sqxtn>;
5364defm SQXTUN : SIMDTwoScalarMixedBHS< 1, 0b10010, "sqxtun", int_aarch64_neon_scalar_sqxtun>;
5365defm SUQADD : SIMDTwoScalarBHSDTied< 0, 0b00011, "suqadd",
5366                                     int_aarch64_neon_suqadd>;
5367defm UCVTF  : SIMDFPTwoScalarCVT<   1, 0, 0b11101, "ucvtf", AArch64uitof>;
5368defm UQXTN  : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar_uqxtn>;
5369defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd",
5370                                    int_aarch64_neon_usqadd>;
5371
5372def : Pat<(v1i64 (AArch64vashr (v1i64 V64:$Rn), (i32 63))),
5373          (CMLTv1i64rz V64:$Rn)>;
5374
5375def : Pat<(v1i64 (int_aarch64_neon_fcvtas (v1f64 FPR64:$Rn))),
5376          (FCVTASv1i64 FPR64:$Rn)>;
5377def : Pat<(v1i64 (int_aarch64_neon_fcvtau (v1f64 FPR64:$Rn))),
5378          (FCVTAUv1i64 FPR64:$Rn)>;
5379def : Pat<(v1i64 (int_aarch64_neon_fcvtms (v1f64 FPR64:$Rn))),
5380          (FCVTMSv1i64 FPR64:$Rn)>;
5381def : Pat<(v1i64 (int_aarch64_neon_fcvtmu (v1f64 FPR64:$Rn))),
5382          (FCVTMUv1i64 FPR64:$Rn)>;
5383def : Pat<(v1i64 (int_aarch64_neon_fcvtns (v1f64 FPR64:$Rn))),
5384          (FCVTNSv1i64 FPR64:$Rn)>;
5385def : Pat<(v1i64 (int_aarch64_neon_fcvtnu (v1f64 FPR64:$Rn))),
5386          (FCVTNUv1i64 FPR64:$Rn)>;
5387def : Pat<(v1i64 (int_aarch64_neon_fcvtps (v1f64 FPR64:$Rn))),
5388          (FCVTPSv1i64 FPR64:$Rn)>;
5389def : Pat<(v1i64 (int_aarch64_neon_fcvtpu (v1f64 FPR64:$Rn))),
5390          (FCVTPUv1i64 FPR64:$Rn)>;
5391def : Pat<(v1i64 (int_aarch64_neon_fcvtzs (v1f64 FPR64:$Rn))),
5392          (FCVTZSv1i64 FPR64:$Rn)>;
5393def : Pat<(v1i64 (int_aarch64_neon_fcvtzu (v1f64 FPR64:$Rn))),
5394          (FCVTZUv1i64 FPR64:$Rn)>;
5395
5396def : Pat<(f16 (int_aarch64_neon_frecpe (f16 FPR16:$Rn))),
5397          (FRECPEv1f16 FPR16:$Rn)>;
5398def : Pat<(f32 (int_aarch64_neon_frecpe (f32 FPR32:$Rn))),
5399          (FRECPEv1i32 FPR32:$Rn)>;
5400def : Pat<(f64 (int_aarch64_neon_frecpe (f64 FPR64:$Rn))),
5401          (FRECPEv1i64 FPR64:$Rn)>;
5402def : Pat<(v1f64 (int_aarch64_neon_frecpe (v1f64 FPR64:$Rn))),
5403          (FRECPEv1i64 FPR64:$Rn)>;
5404
5405def : Pat<(f32 (AArch64frecpe (f32 FPR32:$Rn))),
5406          (FRECPEv1i32 FPR32:$Rn)>;
5407def : Pat<(v2f32 (AArch64frecpe (v2f32 V64:$Rn))),
5408          (FRECPEv2f32 V64:$Rn)>;
5409def : Pat<(v4f32 (AArch64frecpe (v4f32 FPR128:$Rn))),
5410          (FRECPEv4f32 FPR128:$Rn)>;
5411def : Pat<(f64 (AArch64frecpe (f64 FPR64:$Rn))),
5412          (FRECPEv1i64 FPR64:$Rn)>;
5413def : Pat<(v1f64 (AArch64frecpe (v1f64 FPR64:$Rn))),
5414          (FRECPEv1i64 FPR64:$Rn)>;
5415def : Pat<(v2f64 (AArch64frecpe (v2f64 FPR128:$Rn))),
5416          (FRECPEv2f64 FPR128:$Rn)>;
5417
5418def : Pat<(f32 (AArch64frecps (f32 FPR32:$Rn), (f32 FPR32:$Rm))),
5419          (FRECPS32 FPR32:$Rn, FPR32:$Rm)>;
5420def : Pat<(v2f32 (AArch64frecps (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
5421          (FRECPSv2f32 V64:$Rn, V64:$Rm)>;
5422def : Pat<(v4f32 (AArch64frecps (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))),
5423          (FRECPSv4f32 FPR128:$Rn, FPR128:$Rm)>;
5424def : Pat<(f64 (AArch64frecps (f64 FPR64:$Rn), (f64 FPR64:$Rm))),
5425          (FRECPS64 FPR64:$Rn, FPR64:$Rm)>;
5426def : Pat<(v2f64 (AArch64frecps (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
5427          (FRECPSv2f64 FPR128:$Rn, FPR128:$Rm)>;
5428
5429def : Pat<(f16 (int_aarch64_neon_frecpx (f16 FPR16:$Rn))),
5430          (FRECPXv1f16 FPR16:$Rn)>;
5431def : Pat<(f32 (int_aarch64_neon_frecpx (f32 FPR32:$Rn))),
5432          (FRECPXv1i32 FPR32:$Rn)>;
5433def : Pat<(f64 (int_aarch64_neon_frecpx (f64 FPR64:$Rn))),
5434          (FRECPXv1i64 FPR64:$Rn)>;
5435
5436def : Pat<(f16 (int_aarch64_neon_frsqrte (f16 FPR16:$Rn))),
5437          (FRSQRTEv1f16 FPR16:$Rn)>;
5438def : Pat<(f32 (int_aarch64_neon_frsqrte (f32 FPR32:$Rn))),
5439          (FRSQRTEv1i32 FPR32:$Rn)>;
5440def : Pat<(f64 (int_aarch64_neon_frsqrte (f64 FPR64:$Rn))),
5441          (FRSQRTEv1i64 FPR64:$Rn)>;
5442def : Pat<(v1f64 (int_aarch64_neon_frsqrte (v1f64 FPR64:$Rn))),
5443          (FRSQRTEv1i64 FPR64:$Rn)>;
5444
5445def : Pat<(f32 (AArch64frsqrte (f32 FPR32:$Rn))),
5446          (FRSQRTEv1i32 FPR32:$Rn)>;
5447def : Pat<(v2f32 (AArch64frsqrte (v2f32 V64:$Rn))),
5448          (FRSQRTEv2f32 V64:$Rn)>;
5449def : Pat<(v4f32 (AArch64frsqrte (v4f32 FPR128:$Rn))),
5450          (FRSQRTEv4f32 FPR128:$Rn)>;
5451def : Pat<(f64 (AArch64frsqrte (f64 FPR64:$Rn))),
5452          (FRSQRTEv1i64 FPR64:$Rn)>;
5453def : Pat<(v1f64 (AArch64frsqrte (v1f64 FPR64:$Rn))),
5454          (FRSQRTEv1i64 FPR64:$Rn)>;
5455def : Pat<(v2f64 (AArch64frsqrte (v2f64 FPR128:$Rn))),
5456          (FRSQRTEv2f64 FPR128:$Rn)>;
5457
5458def : Pat<(f32 (AArch64frsqrts (f32 FPR32:$Rn), (f32 FPR32:$Rm))),
5459          (FRSQRTS32 FPR32:$Rn, FPR32:$Rm)>;
5460def : Pat<(v2f32 (AArch64frsqrts (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
5461          (FRSQRTSv2f32 V64:$Rn, V64:$Rm)>;
5462def : Pat<(v4f32 (AArch64frsqrts (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))),
5463          (FRSQRTSv4f32 FPR128:$Rn, FPR128:$Rm)>;
5464def : Pat<(f64 (AArch64frsqrts (f64 FPR64:$Rn), (f64 FPR64:$Rm))),
5465          (FRSQRTS64 FPR64:$Rn, FPR64:$Rm)>;
5466def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
5467          (FRSQRTSv2f64 FPR128:$Rn, FPR128:$Rm)>;
5468
5469// Some float -> int -> float conversion patterns for which we want to keep the
5470// int values in FP registers using the corresponding NEON instructions to
5471// avoid more costly int <-> fp register transfers.
5472let Predicates = [HasNEON] in {
5473def : Pat<(f64 (any_sint_to_fp (i64 (any_fp_to_sint f64:$Rn)))),
5474          (SCVTFv1i64 (i64 (FCVTZSv1i64 f64:$Rn)))>;
5475def : Pat<(f32 (any_sint_to_fp (i32 (any_fp_to_sint f32:$Rn)))),
5476          (SCVTFv1i32 (i32 (FCVTZSv1i32 f32:$Rn)))>;
5477def : Pat<(f64 (any_uint_to_fp (i64 (any_fp_to_uint f64:$Rn)))),
5478          (UCVTFv1i64 (i64 (FCVTZUv1i64 f64:$Rn)))>;
5479def : Pat<(f32 (any_uint_to_fp (i32 (any_fp_to_uint f32:$Rn)))),
5480          (UCVTFv1i32 (i32 (FCVTZUv1i32 f32:$Rn)))>;
5481
5482let Predicates = [HasFullFP16] in {
5483def : Pat<(f16 (any_sint_to_fp (i32 (any_fp_to_sint f16:$Rn)))),
5484          (SCVTFv1i16 (f16 (FCVTZSv1f16 f16:$Rn)))>;
5485def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint f16:$Rn)))),
5486          (UCVTFv1i16 (f16 (FCVTZUv1f16 f16:$Rn)))>;
5487}
5488// If an integer is about to be converted to a floating point value,
5489// just load it on the floating point unit.
5490// Here are the patterns for 8 and 16-bits to float.
5491// 8-bits -> float.
5492multiclass UIntToFPROLoadPat<ValueType DstTy, ValueType SrcTy,
5493                             SDPatternOperator loadop, Instruction UCVTF,
5494                             ROAddrMode ro, Instruction LDRW, Instruction LDRX,
5495                             SubRegIndex sub> {
5496  def : Pat<(DstTy (uint_to_fp (SrcTy
5497                     (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm,
5498                                      ro.Wext:$extend))))),
5499           (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)),
5500                                 (LDRW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend),
5501                                 sub))>;
5502
5503  def : Pat<(DstTy (uint_to_fp (SrcTy
5504                     (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm,
5505                                      ro.Wext:$extend))))),
5506           (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)),
5507                                 (LDRX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend),
5508                                 sub))>;
5509}
5510
5511defm : UIntToFPROLoadPat<f32, i32, zextloadi8,
5512                         UCVTFv1i32, ro8, LDRBroW, LDRBroX, bsub>;
5513def : Pat <(f32 (uint_to_fp (i32
5514               (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
5515           (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
5516                          (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>;
5517def : Pat <(f32 (uint_to_fp (i32
5518                     (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))),
5519           (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
5520                          (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>;
5521// 16-bits -> float.
5522defm : UIntToFPROLoadPat<f32, i32, zextloadi16,
5523                         UCVTFv1i32, ro16, LDRHroW, LDRHroX, hsub>;
5524def : Pat <(f32 (uint_to_fp (i32
5525                  (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
5526           (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
5527                          (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>;
5528def : Pat <(f32 (uint_to_fp (i32
5529                  (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))),
5530           (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
5531                          (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>;
5532// 32-bits are handled in target specific dag combine:
5533// performIntToFpCombine.
5534// 64-bits integer to 32-bits floating point, not possible with
5535// UCVTF on floating point registers (both source and destination
5536// must have the same size).
5537
5538// Here are the patterns for 8, 16, 32, and 64-bits to double.
5539// 8-bits -> double.
5540defm : UIntToFPROLoadPat<f64, i32, zextloadi8,
5541                         UCVTFv1i64, ro8, LDRBroW, LDRBroX, bsub>;
5542def : Pat <(f64 (uint_to_fp (i32
5543                    (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
5544           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
5545                          (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>;
5546def : Pat <(f64 (uint_to_fp (i32
5547                  (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))),
5548           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
5549                          (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>;
5550// 16-bits -> double.
5551defm : UIntToFPROLoadPat<f64, i32, zextloadi16,
5552                         UCVTFv1i64, ro16, LDRHroW, LDRHroX, hsub>;
5553def : Pat <(f64 (uint_to_fp (i32
5554                  (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
5555           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
5556                          (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>;
5557def : Pat <(f64 (uint_to_fp (i32
5558                  (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))),
5559           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
5560                          (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>;
5561// 32-bits -> double.
5562defm : UIntToFPROLoadPat<f64, i32, load,
5563                         UCVTFv1i64, ro32, LDRSroW, LDRSroX, ssub>;
5564def : Pat <(f64 (uint_to_fp (i32
5565                  (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
5566           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
5567                          (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub))>;
5568def : Pat <(f64 (uint_to_fp (i32
5569                  (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset))))),
5570           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
5571                          (LDURSi GPR64sp:$Rn, simm9:$offset), ssub))>;
5572// 64-bits -> double are handled in target specific dag combine:
5573// performIntToFpCombine.
5574} // let Predicates = [HasNEON]
5575
5576//===----------------------------------------------------------------------===//
5577// Advanced SIMD three different-sized vector instructions.
5578//===----------------------------------------------------------------------===//
5579
5580defm ADDHN  : SIMDNarrowThreeVectorBHS<0,0b0100,"addhn", int_aarch64_neon_addhn>;
5581defm SUBHN  : SIMDNarrowThreeVectorBHS<0,0b0110,"subhn", int_aarch64_neon_subhn>;
5582defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_aarch64_neon_raddhn>;
5583defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>;
5584defm PMULL  : SIMDDifferentThreeVectorBD<0,0b1110,"pmull", AArch64pmull>;
5585defm SABAL  : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal",
5586                                             AArch64sabd>;
5587defm SABDL   : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl",
5588                                          AArch64sabd>;
5589defm SADDL   : SIMDLongThreeVectorBHS<   0, 0b0000, "saddl",
5590            BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>;
5591defm SADDW   : SIMDWideThreeVectorBHS<   0, 0b0001, "saddw",
5592                 BinOpFrag<(add node:$LHS, (sext node:$RHS))>>;
5593defm SMLAL   : SIMDLongThreeVectorTiedBHS<0, 0b1000, "smlal",
5594    TriOpFrag<(add node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>;
5595defm SMLSL   : SIMDLongThreeVectorTiedBHS<0, 0b1010, "smlsl",
5596    TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>;
5597defm SMULL   : SIMDLongThreeVectorBHS<0, 0b1100, "smull", AArch64smull>;
5598defm SQDMLAL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1001, "sqdmlal",
5599                                               int_aarch64_neon_sqadd>;
5600defm SQDMLSL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1011, "sqdmlsl",
5601                                               int_aarch64_neon_sqsub>;
5602defm SQDMULL : SIMDLongThreeVectorHS<0, 0b1101, "sqdmull",
5603                                     int_aarch64_neon_sqdmull>;
5604defm SSUBL   : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl",
5605                 BinOpFrag<(sub (sext node:$LHS), (sext node:$RHS))>>;
5606defm SSUBW   : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw",
5607                 BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>;
5608defm UABAL   : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal",
5609                                              AArch64uabd>;
5610defm UADDL   : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl",
5611                 BinOpFrag<(add (zanyext node:$LHS), (zanyext node:$RHS))>>;
5612defm UADDW   : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw",
5613                 BinOpFrag<(add node:$LHS, (zanyext node:$RHS))>>;
5614defm UMLAL   : SIMDLongThreeVectorTiedBHS<1, 0b1000, "umlal",
5615    TriOpFrag<(add node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>;
5616defm UMLSL   : SIMDLongThreeVectorTiedBHS<1, 0b1010, "umlsl",
5617    TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>;
5618defm UMULL   : SIMDLongThreeVectorBHS<1, 0b1100, "umull", AArch64umull>;
5619defm USUBL   : SIMDLongThreeVectorBHS<1, 0b0010, "usubl",
5620                 BinOpFrag<(sub (zanyext node:$LHS), (zanyext node:$RHS))>>;
5621defm USUBW   : SIMDWideThreeVectorBHS<   1, 0b0011, "usubw",
5622                 BinOpFrag<(sub node:$LHS, (zanyext node:$RHS))>>;
5623
5624// Additional patterns for [SU]ML[AS]L
5625multiclass Neon_mul_acc_widen_patterns<SDPatternOperator opnode, SDPatternOperator vecopnode,
5626  Instruction INST8B, Instruction INST4H, Instruction INST2S> {
5627  def : Pat<(v4i16 (opnode
5628                    V64:$Ra,
5629                    (v4i16 (extract_subvector
5630                            (vecopnode (v8i8 V64:$Rn),(v8i8 V64:$Rm)),
5631                            (i64 0))))),
5632             (EXTRACT_SUBREG (v8i16 (INST8B
5633                                     (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Ra, dsub),
5634                                     V64:$Rn, V64:$Rm)), dsub)>;
5635  def : Pat<(v2i32 (opnode
5636                    V64:$Ra,
5637                    (v2i32 (extract_subvector
5638                            (vecopnode (v4i16 V64:$Rn),(v4i16 V64:$Rm)),
5639                            (i64 0))))),
5640             (EXTRACT_SUBREG (v4i32 (INST4H
5641                                     (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Ra, dsub),
5642                                     V64:$Rn, V64:$Rm)), dsub)>;
5643  def : Pat<(v1i64 (opnode
5644                    V64:$Ra,
5645                    (v1i64 (extract_subvector
5646                            (vecopnode (v2i32 V64:$Rn),(v2i32 V64:$Rm)),
5647                            (i64 0))))),
5648             (EXTRACT_SUBREG (v2i64 (INST2S
5649                                     (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), V64:$Ra, dsub),
5650                                     V64:$Rn, V64:$Rm)), dsub)>;
5651}
5652
5653defm : Neon_mul_acc_widen_patterns<add, AArch64umull,
5654     UMLALv8i8_v8i16, UMLALv4i16_v4i32, UMLALv2i32_v2i64>;
5655defm : Neon_mul_acc_widen_patterns<add, AArch64smull,
5656     SMLALv8i8_v8i16, SMLALv4i16_v4i32, SMLALv2i32_v2i64>;
5657defm : Neon_mul_acc_widen_patterns<sub, AArch64umull,
5658     UMLSLv8i8_v8i16, UMLSLv4i16_v4i32, UMLSLv2i32_v2i64>;
5659defm : Neon_mul_acc_widen_patterns<sub, AArch64smull,
5660     SMLSLv8i8_v8i16, SMLSLv4i16_v4i32, SMLSLv2i32_v2i64>;
5661
5662
5663multiclass Neon_addl_extract_patterns<SDPatternOperator opnode, SDPatternOperator ext, string Inst> {
5664  def : Pat<(v4i16 (opnode (extract_subvector (ext (v8i8 V64:$Rn)), (i64 0)),
5665                           (extract_subvector (ext (v8i8 V64:$Rm)), (i64 0)))),
5666            (EXTRACT_SUBREG (v8i16 (!cast<Instruction>(Inst#"Lv8i8_v8i16") V64:$Rn, V64:$Rm)), dsub)>;
5667  def : Pat<(v2i32 (opnode (extract_subvector (ext (v4i16 V64:$Rn)), (i64 0)),
5668                           (extract_subvector (ext (v4i16 V64:$Rm)), (i64 0)))),
5669            (EXTRACT_SUBREG (v4i32 (!cast<Instruction>(Inst#"Lv4i16_v4i32") V64:$Rn, V64:$Rm)), dsub)>;
5670  def : Pat<(v1i64 (opnode (extract_subvector (ext (v2i32 V64:$Rn)), (i64 0)),
5671                           (extract_subvector (ext (v2i32 V64:$Rm)), (i64 0)))),
5672            (EXTRACT_SUBREG (v2i64 (!cast<Instruction>(Inst#"Lv2i32_v2i64") V64:$Rn, V64:$Rm)), dsub)>;
5673
5674  def : Pat<(v4i16 (opnode (v4i16 V64:$Rn),
5675                           (extract_subvector (ext (v8i8 V64:$Rm)), (i64 0)))),
5676            (EXTRACT_SUBREG (v8i16 (!cast<Instruction>(Inst#"Wv8i8_v8i16") (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Rn, dsub), V64:$Rm)), dsub)>;
5677  def : Pat<(v2i32 (opnode (v2i32 V64:$Rn),
5678                           (extract_subvector (ext (v4i16 V64:$Rm)), (i64 0)))),
5679            (EXTRACT_SUBREG (v4i32 (!cast<Instruction>(Inst#"Wv4i16_v4i32") (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Rn, dsub), V64:$Rm)), dsub)>;
5680  def : Pat<(v1i64 (opnode (v1i64 V64:$Rn),
5681                           (extract_subvector (ext (v2i32 V64:$Rm)), (i64 0)))),
5682            (EXTRACT_SUBREG (v2i64 (!cast<Instruction>(Inst#"Wv2i32_v2i64") (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), V64:$Rn, dsub), V64:$Rm)), dsub)>;
5683}
5684
5685defm : Neon_addl_extract_patterns<add, zanyext, "UADD">;
5686defm : Neon_addl_extract_patterns<add, sext, "SADD">;
5687defm : Neon_addl_extract_patterns<sub, zanyext, "USUB">;
5688defm : Neon_addl_extract_patterns<sub, sext, "SSUB">;
5689
5690// CodeGen patterns for addhn and subhn instructions, which can actually be
5691// written in LLVM IR without too much difficulty.
5692
5693// Prioritize ADDHN and SUBHN over UZP2.
5694let AddedComplexity = 10 in {
5695
5696// ADDHN
5697def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 8))))),
5698          (ADDHNv8i16_v8i8 V128:$Rn, V128:$Rm)>;
5699def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm),
5700                                           (i32 16))))),
5701          (ADDHNv4i32_v4i16 V128:$Rn, V128:$Rm)>;
5702def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm),
5703                                           (i32 32))))),
5704          (ADDHNv2i64_v2i32 V128:$Rn, V128:$Rm)>;
5705def : Pat<(concat_vectors (v8i8 V64:$Rd),
5706                          (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm),
5707                                                    (i32 8))))),
5708          (ADDHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
5709                            V128:$Rn, V128:$Rm)>;
5710def : Pat<(concat_vectors (v4i16 V64:$Rd),
5711                          (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm),
5712                                                    (i32 16))))),
5713          (ADDHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
5714                            V128:$Rn, V128:$Rm)>;
5715def : Pat<(concat_vectors (v2i32 V64:$Rd),
5716                          (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm),
5717                                                    (i32 32))))),
5718          (ADDHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
5719                            V128:$Rn, V128:$Rm)>;
5720
5721// SUBHN
5722def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 8))))),
5723          (SUBHNv8i16_v8i8 V128:$Rn, V128:$Rm)>;
5724def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
5725                                           (i32 16))))),
5726          (SUBHNv4i32_v4i16 V128:$Rn, V128:$Rm)>;
5727def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
5728                                           (i32 32))))),
5729          (SUBHNv2i64_v2i32 V128:$Rn, V128:$Rm)>;
5730def : Pat<(concat_vectors (v8i8 V64:$Rd),
5731                          (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
5732                                                    (i32 8))))),
5733          (SUBHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
5734                            V128:$Rn, V128:$Rm)>;
5735def : Pat<(concat_vectors (v4i16 V64:$Rd),
5736                          (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
5737                                                    (i32 16))))),
5738          (SUBHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
5739                            V128:$Rn, V128:$Rm)>;
5740def : Pat<(concat_vectors (v2i32 V64:$Rd),
5741                          (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
5742                                                    (i32 32))))),
5743          (SUBHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
5744                            V128:$Rn, V128:$Rm)>;
5745
5746} // AddedComplexity = 10
5747
5748//----------------------------------------------------------------------------
5749// AdvSIMD bitwise extract from vector instruction.
5750//----------------------------------------------------------------------------
5751
5752defm EXT : SIMDBitwiseExtract<"ext">;
5753
5754def AdjustExtImm : SDNodeXForm<imm, [{
5755  return CurDAG->getTargetConstant(8 + N->getZExtValue(), SDLoc(N), MVT::i32);
5756}]>;
5757multiclass ExtPat<ValueType VT64, ValueType VT128, int N> {
5758  def : Pat<(VT64 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))),
5759            (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>;
5760  def : Pat<(VT128 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
5761            (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
5762  // We use EXT to handle extract_subvector to copy the upper 64-bits of a
5763  // 128-bit vector.
5764  def : Pat<(VT64 (extract_subvector V128:$Rn, (i64 N))),
5765            (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
5766  // A 64-bit EXT of two halves of the same 128-bit register can be done as a
5767  // single 128-bit EXT.
5768  def : Pat<(VT64 (AArch64ext (extract_subvector V128:$Rn, (i64 0)),
5769                              (extract_subvector V128:$Rn, (i64 N)),
5770                              (i32 imm:$imm))),
5771            (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, imm:$imm), dsub)>;
5772  // A 64-bit EXT of the high half of a 128-bit register can be done using a
5773  // 128-bit EXT of the whole register with an adjustment to the immediate. The
5774  // top half of the other operand will be unset, but that doesn't matter as it
5775  // will not be used.
5776  def : Pat<(VT64 (AArch64ext (extract_subvector V128:$Rn, (i64 N)),
5777                              V64:$Rm,
5778                              (i32 imm:$imm))),
5779            (EXTRACT_SUBREG (EXTv16i8 V128:$Rn,
5780                                      (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
5781                                      (AdjustExtImm imm:$imm)), dsub)>;
5782}
5783
5784defm : ExtPat<v8i8, v16i8, 8>;
5785defm : ExtPat<v4i16, v8i16, 4>;
5786defm : ExtPat<v4f16, v8f16, 4>;
5787defm : ExtPat<v4bf16, v8bf16, 4>;
5788defm : ExtPat<v2i32, v4i32, 2>;
5789defm : ExtPat<v2f32, v4f32, 2>;
5790defm : ExtPat<v1i64, v2i64, 1>;
5791defm : ExtPat<v1f64, v2f64, 1>;
5792
5793//----------------------------------------------------------------------------
5794// AdvSIMD zip vector
5795//----------------------------------------------------------------------------
5796
5797defm TRN1 : SIMDZipVector<0b010, "trn1", AArch64trn1>;
5798defm TRN2 : SIMDZipVector<0b110, "trn2", AArch64trn2>;
5799defm UZP1 : SIMDZipVector<0b001, "uzp1", AArch64uzp1>;
5800defm UZP2 : SIMDZipVector<0b101, "uzp2", AArch64uzp2>;
5801defm ZIP1 : SIMDZipVector<0b011, "zip1", AArch64zip1>;
5802defm ZIP2 : SIMDZipVector<0b111, "zip2", AArch64zip2>;
5803
5804def : Pat<(v16i8 (concat_vectors (v8i8 (trunc (v8i16 V128:$Vn))),
5805                                 (v8i8 (trunc (v8i16 V128:$Vm))))),
5806          (UZP1v16i8 V128:$Vn, V128:$Vm)>;
5807def : Pat<(v8i16 (concat_vectors (v4i16 (trunc (v4i32 V128:$Vn))),
5808                                 (v4i16 (trunc (v4i32 V128:$Vm))))),
5809          (UZP1v8i16 V128:$Vn, V128:$Vm)>;
5810def : Pat<(v4i32 (concat_vectors (v2i32 (trunc (v2i64 V128:$Vn))),
5811                                 (v2i32 (trunc (v2i64 V128:$Vm))))),
5812          (UZP1v4i32 V128:$Vn, V128:$Vm)>;
5813
5814def : Pat<(v16i8 (concat_vectors
5815                 (v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vn), (i32 8)))),
5816                 (v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vm), (i32 8)))))),
5817          (UZP2v16i8 V128:$Vn, V128:$Vm)>;
5818def : Pat<(v8i16 (concat_vectors
5819                 (v4i16 (trunc (AArch64vlshr (v4i32 V128:$Vn), (i32 16)))),
5820                 (v4i16 (trunc (AArch64vlshr (v4i32 V128:$Vm), (i32 16)))))),
5821          (UZP2v8i16 V128:$Vn, V128:$Vm)>;
5822def : Pat<(v4i32 (concat_vectors
5823                 (v2i32 (trunc (AArch64vlshr (v2i64 V128:$Vn), (i32 32)))),
5824                 (v2i32 (trunc (AArch64vlshr (v2i64 V128:$Vm), (i32 32)))))),
5825          (UZP2v4i32 V128:$Vn, V128:$Vm)>;
5826
5827//----------------------------------------------------------------------------
5828// AdvSIMD TBL/TBX instructions
5829//----------------------------------------------------------------------------
5830
5831defm TBL : SIMDTableLookup<    0, "tbl">;
5832defm TBX : SIMDTableLookupTied<1, "tbx">;
5833
5834def : Pat<(v8i8 (int_aarch64_neon_tbl1 (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))),
5835          (TBLv8i8One VecListOne128:$Rn, V64:$Ri)>;
5836def : Pat<(v16i8 (int_aarch64_neon_tbl1 (v16i8 V128:$Ri), (v16i8 V128:$Rn))),
5837          (TBLv16i8One V128:$Ri, V128:$Rn)>;
5838
5839def : Pat<(v8i8 (int_aarch64_neon_tbx1 (v8i8 V64:$Rd),
5840                  (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))),
5841          (TBXv8i8One V64:$Rd, VecListOne128:$Rn, V64:$Ri)>;
5842def : Pat<(v16i8 (int_aarch64_neon_tbx1 (v16i8 V128:$Rd),
5843                   (v16i8 V128:$Ri), (v16i8 V128:$Rn))),
5844          (TBXv16i8One V128:$Rd, V128:$Ri, V128:$Rn)>;
5845
5846
5847//----------------------------------------------------------------------------
5848// AdvSIMD scalar DUP instruction
5849//----------------------------------------------------------------------------
5850
5851defm DUP : SIMDScalarDUP<"mov">;
5852
5853//----------------------------------------------------------------------------
5854// AdvSIMD scalar pairwise instructions
5855//----------------------------------------------------------------------------
5856
5857defm ADDP    : SIMDPairwiseScalarD<0, 0b11011, "addp">;
5858defm FADDP   : SIMDFPPairwiseScalar<0, 0b01101, "faddp">;
5859defm FMAXNMP : SIMDFPPairwiseScalar<0, 0b01100, "fmaxnmp">;
5860defm FMAXP   : SIMDFPPairwiseScalar<0, 0b01111, "fmaxp">;
5861defm FMINNMP : SIMDFPPairwiseScalar<1, 0b01100, "fminnmp">;
5862defm FMINP   : SIMDFPPairwiseScalar<1, 0b01111, "fminp">;
5863
5864// Only the lower half of the result of the inner FADDP is used in the patterns
5865// below, so the second operand does not matter. Re-use the first input
5866// operand, so no additional dependencies need to be introduced.
5867let Predicates = [HasFullFP16] in {
5868def : Pat<(f16 (vecreduce_fadd (v8f16 V128:$Rn))),
5869            (FADDPv2i16p
5870              (EXTRACT_SUBREG
5871                 (FADDPv8f16 (FADDPv8f16 V128:$Rn, V128:$Rn), V128:$Rn),
5872               dsub))>;
5873def : Pat<(f16 (vecreduce_fadd (v4f16 V64:$Rn))),
5874          (FADDPv2i16p (FADDPv4f16 V64:$Rn, V64:$Rn))>;
5875}
5876def : Pat<(f32 (vecreduce_fadd (v4f32 V128:$Rn))),
5877          (FADDPv2i32p
5878            (EXTRACT_SUBREG
5879              (FADDPv4f32 V128:$Rn, V128:$Rn),
5880             dsub))>;
5881def : Pat<(f32 (vecreduce_fadd (v2f32 V64:$Rn))),
5882          (FADDPv2i32p V64:$Rn)>;
5883def : Pat<(f64 (vecreduce_fadd (v2f64 V128:$Rn))),
5884          (FADDPv2i64p V128:$Rn)>;
5885
5886def : Pat<(v2i64 (AArch64saddv V128:$Rn)),
5887          (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>;
5888def : Pat<(v2i64 (AArch64uaddv V128:$Rn)),
5889          (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>;
5890def : Pat<(f32 (int_aarch64_neon_faddv (v2f32 V64:$Rn))),
5891          (FADDPv2i32p V64:$Rn)>;
5892def : Pat<(f32 (int_aarch64_neon_faddv (v4f32 V128:$Rn))),
5893          (FADDPv2i32p (EXTRACT_SUBREG (FADDPv4f32 V128:$Rn, V128:$Rn), dsub))>;
5894def : Pat<(f64 (int_aarch64_neon_faddv (v2f64 V128:$Rn))),
5895          (FADDPv2i64p V128:$Rn)>;
5896def : Pat<(f32 (AArch64fmaxnmv (v2f32 V64:$Rn))),
5897          (FMAXNMPv2i32p V64:$Rn)>;
5898def : Pat<(f64 (AArch64fmaxnmv (v2f64 V128:$Rn))),
5899          (FMAXNMPv2i64p V128:$Rn)>;
5900def : Pat<(f32 (AArch64fmaxv (v2f32 V64:$Rn))),
5901          (FMAXPv2i32p V64:$Rn)>;
5902def : Pat<(f64 (AArch64fmaxv (v2f64 V128:$Rn))),
5903          (FMAXPv2i64p V128:$Rn)>;
5904def : Pat<(f32 (AArch64fminnmv (v2f32 V64:$Rn))),
5905          (FMINNMPv2i32p V64:$Rn)>;
5906def : Pat<(f64 (AArch64fminnmv (v2f64 V128:$Rn))),
5907          (FMINNMPv2i64p V128:$Rn)>;
5908def : Pat<(f32 (AArch64fminv (v2f32 V64:$Rn))),
5909          (FMINPv2i32p V64:$Rn)>;
5910def : Pat<(f64 (AArch64fminv (v2f64 V128:$Rn))),
5911          (FMINPv2i64p V128:$Rn)>;
5912
5913//----------------------------------------------------------------------------
5914// AdvSIMD INS/DUP instructions
5915//----------------------------------------------------------------------------
5916
5917def DUPv8i8gpr  : SIMDDupFromMain<0, {?,?,?,?,1}, ".8b", v8i8, V64, GPR32>;
5918def DUPv16i8gpr : SIMDDupFromMain<1, {?,?,?,?,1}, ".16b", v16i8, V128, GPR32>;
5919def DUPv4i16gpr : SIMDDupFromMain<0, {?,?,?,1,0}, ".4h", v4i16, V64, GPR32>;
5920def DUPv8i16gpr : SIMDDupFromMain<1, {?,?,?,1,0}, ".8h", v8i16, V128, GPR32>;
5921def DUPv2i32gpr : SIMDDupFromMain<0, {?,?,1,0,0}, ".2s", v2i32, V64, GPR32>;
5922def DUPv4i32gpr : SIMDDupFromMain<1, {?,?,1,0,0}, ".4s", v4i32, V128, GPR32>;
5923def DUPv2i64gpr : SIMDDupFromMain<1, {?,1,0,0,0}, ".2d", v2i64, V128, GPR64>;
5924
5925def DUPv2i64lane : SIMDDup64FromElement;
5926def DUPv2i32lane : SIMDDup32FromElement<0, ".2s", v2i32, V64>;
5927def DUPv4i32lane : SIMDDup32FromElement<1, ".4s", v4i32, V128>;
5928def DUPv4i16lane : SIMDDup16FromElement<0, ".4h", v4i16, V64>;
5929def DUPv8i16lane : SIMDDup16FromElement<1, ".8h", v8i16, V128>;
5930def DUPv8i8lane  : SIMDDup8FromElement <0, ".8b", v8i8, V64>;
5931def DUPv16i8lane : SIMDDup8FromElement <1, ".16b", v16i8, V128>;
5932
5933// DUP from a 64-bit register to a 64-bit register is just a copy
5934def : Pat<(v1i64 (AArch64dup (i64 GPR64:$Rn))),
5935          (COPY_TO_REGCLASS GPR64:$Rn, FPR64)>;
5936def : Pat<(v1f64 (AArch64dup (f64 FPR64:$Rn))),
5937          (COPY_TO_REGCLASS FPR64:$Rn, FPR64)>;
5938
5939def : Pat<(v2f32 (AArch64dup (f32 FPR32:$Rn))),
5940          (v2f32 (DUPv2i32lane
5941            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub),
5942            (i64 0)))>;
5943def : Pat<(v4f32 (AArch64dup (f32 FPR32:$Rn))),
5944          (v4f32 (DUPv4i32lane
5945            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub),
5946            (i64 0)))>;
5947def : Pat<(v2f64 (AArch64dup (f64 FPR64:$Rn))),
5948          (v2f64 (DUPv2i64lane
5949            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rn, dsub),
5950            (i64 0)))>;
5951def : Pat<(v4f16 (AArch64dup (f16 FPR16:$Rn))),
5952          (v4f16 (DUPv4i16lane
5953            (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
5954            (i64 0)))>;
5955def : Pat<(v4bf16 (AArch64dup (bf16 FPR16:$Rn))),
5956          (v4bf16 (DUPv4i16lane
5957            (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
5958            (i64 0)))>;
5959def : Pat<(v8f16 (AArch64dup (f16 FPR16:$Rn))),
5960          (v8f16 (DUPv8i16lane
5961            (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
5962            (i64 0)))>;
5963def : Pat<(v8bf16 (AArch64dup (bf16 FPR16:$Rn))),
5964          (v8bf16 (DUPv8i16lane
5965            (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
5966            (i64 0)))>;
5967
5968def : Pat<(v4f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)),
5969          (DUPv4i16lane V128:$Rn, VectorIndexH:$imm)>;
5970def : Pat<(v8f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)),
5971          (DUPv8i16lane V128:$Rn, VectorIndexH:$imm)>;
5972
5973def : Pat<(v4bf16 (AArch64duplane16 (v8bf16 V128:$Rn), VectorIndexH:$imm)),
5974          (DUPv4i16lane V128:$Rn, VectorIndexH:$imm)>;
5975def : Pat<(v8bf16 (AArch64duplane16 (v8bf16 V128:$Rn), VectorIndexH:$imm)),
5976          (DUPv8i16lane V128:$Rn, VectorIndexH:$imm)>;
5977
5978def : Pat<(v2f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)),
5979          (DUPv2i32lane V128:$Rn, VectorIndexS:$imm)>;
5980def : Pat<(v4f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)),
5981         (DUPv4i32lane V128:$Rn, VectorIndexS:$imm)>;
5982def : Pat<(v2f64 (AArch64duplane64 (v2f64 V128:$Rn), VectorIndexD:$imm)),
5983          (DUPv2i64lane V128:$Rn, VectorIndexD:$imm)>;
5984
5985// If there's an (AArch64dup (vector_extract ...) ...), we can use a duplane
5986// instruction even if the types don't match: we just have to remap the lane
5987// carefully. N.b. this trick only applies to truncations.
5988def VecIndex_x2 : SDNodeXForm<imm, [{
5989  return CurDAG->getTargetConstant(2 * N->getZExtValue(), SDLoc(N), MVT::i64);
5990}]>;
5991def VecIndex_x4 : SDNodeXForm<imm, [{
5992  return CurDAG->getTargetConstant(4 * N->getZExtValue(), SDLoc(N), MVT::i64);
5993}]>;
5994def VecIndex_x8 : SDNodeXForm<imm, [{
5995  return CurDAG->getTargetConstant(8 * N->getZExtValue(), SDLoc(N), MVT::i64);
5996}]>;
5997
5998multiclass DUPWithTruncPats<ValueType ResVT, ValueType Src64VT,
5999                            ValueType Src128VT, ValueType ScalVT,
6000                            Instruction DUP, SDNodeXForm IdxXFORM> {
6001  def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src128VT V128:$Rn),
6002                                                     imm:$idx)))),
6003            (DUP V128:$Rn, (IdxXFORM imm:$idx))>;
6004
6005  def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src64VT V64:$Rn),
6006                                                     imm:$idx)))),
6007            (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>;
6008}
6009
6010defm : DUPWithTruncPats<v8i8,   v4i16, v8i16, i32, DUPv8i8lane,  VecIndex_x2>;
6011defm : DUPWithTruncPats<v8i8,   v2i32, v4i32, i32, DUPv8i8lane,  VecIndex_x4>;
6012defm : DUPWithTruncPats<v4i16,  v2i32, v4i32, i32, DUPv4i16lane, VecIndex_x2>;
6013
6014defm : DUPWithTruncPats<v16i8,  v4i16, v8i16, i32, DUPv16i8lane, VecIndex_x2>;
6015defm : DUPWithTruncPats<v16i8,  v2i32, v4i32, i32, DUPv16i8lane, VecIndex_x4>;
6016defm : DUPWithTruncPats<v8i16,  v2i32, v4i32, i32, DUPv8i16lane, VecIndex_x2>;
6017
6018multiclass DUPWithTrunci64Pats<ValueType ResVT, Instruction DUP,
6019                               SDNodeXForm IdxXFORM> {
6020  def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v2i64 V128:$Rn),
6021                                                         imm:$idx))))),
6022            (DUP V128:$Rn, (IdxXFORM imm:$idx))>;
6023
6024  def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v1i64 V64:$Rn),
6025                                                       imm:$idx))))),
6026            (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>;
6027}
6028
6029defm : DUPWithTrunci64Pats<v8i8,  DUPv8i8lane,   VecIndex_x8>;
6030defm : DUPWithTrunci64Pats<v4i16, DUPv4i16lane,  VecIndex_x4>;
6031defm : DUPWithTrunci64Pats<v2i32, DUPv2i32lane,  VecIndex_x2>;
6032
6033defm : DUPWithTrunci64Pats<v16i8, DUPv16i8lane, VecIndex_x8>;
6034defm : DUPWithTrunci64Pats<v8i16, DUPv8i16lane, VecIndex_x4>;
6035defm : DUPWithTrunci64Pats<v4i32, DUPv4i32lane, VecIndex_x2>;
6036
6037// SMOV and UMOV definitions, with some extra patterns for convenience
6038defm SMOV : SMov;
6039defm UMOV : UMov;
6040
6041def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8),
6042          (i32 (SMOVvi8to32 V128:$Rn, VectorIndexB:$idx))>;
6043def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8),
6044          (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>;
6045def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
6046          (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>;
6047def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
6048          (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>;
6049def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
6050          (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>;
6051def : Pat<(sext (i32 (vector_extract (v4i32 V128:$Rn), VectorIndexS:$idx))),
6052          (i64 (SMOVvi32to64 V128:$Rn, VectorIndexS:$idx))>;
6053
6054def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn),
6055            VectorIndexB:$idx)))), i8),
6056          (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>;
6057def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn),
6058            VectorIndexH:$idx)))), i16),
6059          (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>;
6060
6061// Extracting i8 or i16 elements will have the zero-extend transformed to
6062// an 'and' mask by type legalization since neither i8 nor i16 are legal types
6063// for AArch64. Match these patterns here since UMOV already zeroes out the high
6064// bits of the destination register.
6065def : Pat<(and (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx),
6066               (i32 0xff)),
6067          (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx))>;
6068def : Pat<(and (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),
6069               (i32 0xffff)),
6070          (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx))>;
6071
6072def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn),
6073            VectorIndexB:$idx)))), (i64 0xff))),
6074          (SUBREG_TO_REG (i64 0), (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx)), sub_32)>;
6075def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn),
6076            VectorIndexH:$idx)))), (i64 0xffff))),
6077          (SUBREG_TO_REG (i64 0), (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx)), sub_32)>;
6078
6079defm INS : SIMDIns;
6080
6081def : Pat<(v16i8 (scalar_to_vector GPR32:$Rn)),
6082          (SUBREG_TO_REG (i32 0),
6083                         (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
6084def : Pat<(v8i8 (scalar_to_vector GPR32:$Rn)),
6085          (SUBREG_TO_REG (i32 0),
6086                         (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
6087
6088// The top bits will be zero from the FMOVWSr
6089def : Pat<(v8i8 (bitconvert (i64 (zext GPR32:$Rn)))),
6090          (SUBREG_TO_REG (i32 0), (f32 (FMOVWSr GPR32:$Rn)), ssub)>;
6091
6092def : Pat<(v8i16 (scalar_to_vector GPR32:$Rn)),
6093          (SUBREG_TO_REG (i32 0),
6094                         (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
6095def : Pat<(v4i16 (scalar_to_vector GPR32:$Rn)),
6096          (SUBREG_TO_REG (i32 0),
6097                         (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
6098
6099def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))),
6100          (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6101def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))),
6102          (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6103
6104def : Pat<(v4bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
6105          (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6106def : Pat<(v8bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
6107          (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6108
6109def : Pat<(v2i32 (scalar_to_vector (i32 FPR32:$Rn))),
6110            (v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
6111                                  (i32 FPR32:$Rn), ssub))>;
6112def : Pat<(v4i32 (scalar_to_vector (i32 FPR32:$Rn))),
6113            (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
6114                                  (i32 FPR32:$Rn), ssub))>;
6115
6116def : Pat<(v2i64 (scalar_to_vector (i64 FPR64:$Rn))),
6117            (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
6118                                  (i64 FPR64:$Rn), dsub))>;
6119
6120def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))),
6121          (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6122def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))),
6123          (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6124
6125def : Pat<(v4bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
6126          (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6127def : Pat<(v8bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
6128          (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6129
6130def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))),
6131          (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>;
6132def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))),
6133          (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>;
6134
6135def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$Rn))),
6136          (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rn, dsub)>;
6137
6138def : Pat<(v4f16 (vector_insert (v4f16 V64:$Rn),
6139            (f16 FPR16:$Rm), (i64 VectorIndexS:$imm))),
6140          (EXTRACT_SUBREG
6141            (INSvi16lane
6142              (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), V64:$Rn, dsub)),
6143              VectorIndexS:$imm,
6144              (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
6145              (i64 0)),
6146            dsub)>;
6147
6148def : Pat<(vector_insert (v8f16 V128:$Rn), (f16 fpimm0), (i64 VectorIndexH:$imm)),
6149          (INSvi16gpr V128:$Rn, VectorIndexH:$imm, WZR)>;
6150def : Pat<(vector_insert (v4f16 V64:$Rn), (f16 fpimm0), (i64 VectorIndexH:$imm)),
6151          (EXTRACT_SUBREG (INSvi16gpr (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), V64:$Rn, dsub)), VectorIndexH:$imm, WZR), dsub)>;
6152def : Pat<(vector_insert (v4f32 V128:$Rn), (f32 fpimm0), (i64 VectorIndexS:$imm)),
6153          (INSvi32gpr V128:$Rn, VectorIndexS:$imm, WZR)>;
6154def : Pat<(vector_insert (v2f32 V64:$Rn), (f32 fpimm0), (i64 VectorIndexS:$imm)),
6155          (EXTRACT_SUBREG (INSvi32gpr (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), V64:$Rn, dsub)), VectorIndexS:$imm, WZR), dsub)>;
6156def : Pat<(vector_insert v2f64:$Rn, (f64 fpimm0), (i64 VectorIndexD:$imm)),
6157          (INSvi64gpr V128:$Rn, VectorIndexS:$imm, XZR)>;
6158
6159def : Pat<(v8f16 (vector_insert (v8f16 V128:$Rn),
6160            (f16 FPR16:$Rm), (i64 VectorIndexH:$imm))),
6161          (INSvi16lane
6162            V128:$Rn, VectorIndexH:$imm,
6163            (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
6164            (i64 0))>;
6165
6166def : Pat<(v4bf16 (vector_insert (v4bf16 V64:$Rn),
6167            (bf16 FPR16:$Rm), (i64 VectorIndexS:$imm))),
6168          (EXTRACT_SUBREG
6169            (INSvi16lane
6170              (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), V64:$Rn, dsub)),
6171              VectorIndexS:$imm,
6172              (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
6173              (i64 0)),
6174            dsub)>;
6175
6176def : Pat<(v8bf16 (vector_insert (v8bf16 V128:$Rn),
6177            (bf16 FPR16:$Rm), (i64 VectorIndexH:$imm))),
6178          (INSvi16lane
6179            V128:$Rn, VectorIndexH:$imm,
6180            (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
6181            (i64 0))>;
6182
6183def : Pat<(v2f32 (vector_insert (v2f32 V64:$Rn),
6184            (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))),
6185          (EXTRACT_SUBREG
6186            (INSvi32lane
6187              (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), V64:$Rn, dsub)),
6188              VectorIndexS:$imm,
6189              (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)),
6190              (i64 0)),
6191            dsub)>;
6192def : Pat<(v4f32 (vector_insert (v4f32 V128:$Rn),
6193            (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))),
6194          (INSvi32lane
6195            V128:$Rn, VectorIndexS:$imm,
6196            (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)),
6197            (i64 0))>;
6198def : Pat<(v2f64 (vector_insert (v2f64 V128:$Rn),
6199            (f64 FPR64:$Rm), (i64 VectorIndexD:$imm))),
6200          (INSvi64lane
6201            V128:$Rn, VectorIndexD:$imm,
6202            (v2f64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rm, dsub)),
6203            (i64 0))>;
6204
6205def : Pat<(v2i32 (vector_insert (v2i32 V64:$Rn), (i32 GPR32:$Rm), (i64 VectorIndexS:$imm))),
6206          (EXTRACT_SUBREG
6207            (INSvi32gpr (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Rn, dsub)),
6208                        VectorIndexS:$imm, GPR32:$Rm),
6209            dsub)>;
6210def : Pat<(v4i16 (vector_insert (v4i16 V64:$Rn), (i32 GPR32:$Rm), (i64 VectorIndexH:$imm))),
6211          (EXTRACT_SUBREG
6212            (INSvi16gpr (v8i16 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Rn, dsub)),
6213                        VectorIndexH:$imm, GPR32:$Rm),
6214            dsub)>;
6215def : Pat<(v8i8 (vector_insert (v8i8 V64:$Rn), (i32 GPR32:$Rm), (i64 VectorIndexB:$imm))),
6216          (EXTRACT_SUBREG
6217            (INSvi8gpr (v16i8 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), V64:$Rn, dsub)),
6218                       VectorIndexB:$imm, GPR32:$Rm),
6219            dsub)>;
6220
6221// Copy an element at a constant index in one vector into a constant indexed
6222// element of another.
6223// FIXME refactor to a shared class/dev parameterized on vector type, vector
6224// index type and INS extension
6225def : Pat<(v16i8 (int_aarch64_neon_vcopy_lane
6226                   (v16i8 V128:$Vd), VectorIndexB:$idx, (v16i8 V128:$Vs),
6227                   VectorIndexB:$idx2)),
6228          (v16i8 (INSvi8lane
6229                   V128:$Vd, VectorIndexB:$idx, V128:$Vs, VectorIndexB:$idx2)
6230          )>;
6231def : Pat<(v8i16 (int_aarch64_neon_vcopy_lane
6232                   (v8i16 V128:$Vd), VectorIndexH:$idx, (v8i16 V128:$Vs),
6233                   VectorIndexH:$idx2)),
6234          (v8i16 (INSvi16lane
6235                   V128:$Vd, VectorIndexH:$idx, V128:$Vs, VectorIndexH:$idx2)
6236          )>;
6237def : Pat<(v4i32 (int_aarch64_neon_vcopy_lane
6238                   (v4i32 V128:$Vd), VectorIndexS:$idx, (v4i32 V128:$Vs),
6239                   VectorIndexS:$idx2)),
6240          (v4i32 (INSvi32lane
6241                   V128:$Vd, VectorIndexS:$idx, V128:$Vs, VectorIndexS:$idx2)
6242          )>;
6243def : Pat<(v2i64 (int_aarch64_neon_vcopy_lane
6244                   (v2i64 V128:$Vd), VectorIndexD:$idx, (v2i64 V128:$Vs),
6245                   VectorIndexD:$idx2)),
6246          (v2i64 (INSvi64lane
6247                   V128:$Vd, VectorIndexD:$idx, V128:$Vs, VectorIndexD:$idx2)
6248          )>;
6249
6250multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64,
6251                                ValueType VTScal, Instruction INS> {
6252  def : Pat<(VT128 (vector_insert V128:$src,
6253                        (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)),
6254                        imm:$Immd)),
6255            (INS V128:$src, imm:$Immd, V128:$Rn, imm:$Immn)>;
6256
6257  def : Pat<(VT128 (vector_insert V128:$src,
6258                        (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)),
6259                        imm:$Immd)),
6260            (INS V128:$src, imm:$Immd,
6261                 (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn)>;
6262
6263  def : Pat<(VT64 (vector_insert V64:$src,
6264                        (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)),
6265                        imm:$Immd)),
6266            (EXTRACT_SUBREG (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub),
6267                                 imm:$Immd, V128:$Rn, imm:$Immn),
6268                            dsub)>;
6269
6270  def : Pat<(VT64 (vector_insert V64:$src,
6271                        (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)),
6272                        imm:$Immd)),
6273            (EXTRACT_SUBREG
6274                (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), imm:$Immd,
6275                     (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn),
6276                dsub)>;
6277}
6278
6279defm : Neon_INS_elt_pattern<v8f16, v4f16, f16, INSvi16lane>;
6280defm : Neon_INS_elt_pattern<v8bf16, v4bf16, bf16, INSvi16lane>;
6281defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, INSvi32lane>;
6282defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, INSvi64lane>;
6283
6284defm : Neon_INS_elt_pattern<v16i8, v8i8, i32, INSvi8lane>;
6285defm : Neon_INS_elt_pattern<v8i16, v4i16, i32, INSvi16lane>;
6286defm : Neon_INS_elt_pattern<v4i32, v2i32, i32, INSvi32lane>;
6287defm : Neon_INS_elt_pattern<v2i64, v1i64, i64, INSvi64lane>;
6288
6289// Insert from bitcast
6290// vector_insert(bitcast(f32 src), n, lane) -> INSvi32lane(src, lane, INSERT_SUBREG(-, n), 0)
6291def : Pat<(v4i32 (vector_insert v4i32:$src, (i32 (bitconvert (f32 FPR32:$Sn))), imm:$Immd)),
6292          (INSvi32lane V128:$src, imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$Sn, ssub), 0)>;
6293def : Pat<(v2i32 (vector_insert v2i32:$src, (i32 (bitconvert (f32 FPR32:$Sn))), imm:$Immd)),
6294          (EXTRACT_SUBREG
6295            (INSvi32lane (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$src, dsub)),
6296                         imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$Sn, ssub), 0),
6297            dsub)>;
6298def : Pat<(v2i64 (vector_insert v2i64:$src, (i64 (bitconvert (f64 FPR64:$Sn))), imm:$Immd)),
6299          (INSvi64lane V128:$src, imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR64:$Sn, dsub), 0)>;
6300
6301// bitcast of an extract
6302// f32 bitcast(vector_extract(v4i32 src, lane)) -> EXTRACT_SUBREG(INSvi32lane(-, 0, src, lane))
6303def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, imm:$Immd)))),
6304          (EXTRACT_SUBREG (INSvi32lane (IMPLICIT_DEF), 0, V128:$src, imm:$Immd), ssub)>;
6305def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, 0)))),
6306          (EXTRACT_SUBREG V128:$src, ssub)>;
6307def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, imm:$Immd)))),
6308          (EXTRACT_SUBREG (INSvi64lane (IMPLICIT_DEF), 0, V128:$src, imm:$Immd), dsub)>;
6309def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, 0)))),
6310          (EXTRACT_SUBREG V128:$src, dsub)>;
6311
6312// Floating point vector extractions are codegen'd as either a sequence of
6313// subregister extractions, or a MOV (aka DUP here) if
6314// the lane number is anything other than zero.
6315def : Pat<(vector_extract (v2f64 V128:$Rn), 0),
6316          (f64 (EXTRACT_SUBREG V128:$Rn, dsub))>;
6317def : Pat<(vector_extract (v4f32 V128:$Rn), 0),
6318          (f32 (EXTRACT_SUBREG V128:$Rn, ssub))>;
6319def : Pat<(vector_extract (v8f16 V128:$Rn), 0),
6320          (f16 (EXTRACT_SUBREG V128:$Rn, hsub))>;
6321def : Pat<(vector_extract (v8bf16 V128:$Rn), 0),
6322          (bf16 (EXTRACT_SUBREG V128:$Rn, hsub))>;
6323
6324
6325def : Pat<(vector_extract (v2f64 V128:$Rn), VectorIndexD:$idx),
6326          (f64 (DUPi64 V128:$Rn, VectorIndexD:$idx))>;
6327def : Pat<(vector_extract (v4f32 V128:$Rn), VectorIndexS:$idx),
6328          (f32 (DUPi32 V128:$Rn, VectorIndexS:$idx))>;
6329def : Pat<(vector_extract (v8f16 V128:$Rn), VectorIndexH:$idx),
6330          (f16 (DUPi16 V128:$Rn, VectorIndexH:$idx))>;
6331def : Pat<(vector_extract (v8bf16 V128:$Rn), VectorIndexH:$idx),
6332          (bf16 (DUPi16 V128:$Rn, VectorIndexH:$idx))>;
6333
6334// All concat_vectors operations are canonicalised to act on i64 vectors for
6335// AArch64. In the general case we need an instruction, which had just as well be
6336// INS.
6337class ConcatPat<ValueType DstTy, ValueType SrcTy>
6338  : Pat<(DstTy (concat_vectors (SrcTy V64:$Rd), V64:$Rn)),
6339        (INSvi64lane (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 1,
6340                     (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub), 0)>;
6341
6342def : ConcatPat<v2i64, v1i64>;
6343def : ConcatPat<v2f64, v1f64>;
6344def : ConcatPat<v4i32, v2i32>;
6345def : ConcatPat<v4f32, v2f32>;
6346def : ConcatPat<v8i16, v4i16>;
6347def : ConcatPat<v8f16, v4f16>;
6348def : ConcatPat<v8bf16, v4bf16>;
6349def : ConcatPat<v16i8, v8i8>;
6350
6351// If the high lanes are undef, though, we can just ignore them:
6352class ConcatUndefPat<ValueType DstTy, ValueType SrcTy>
6353  : Pat<(DstTy (concat_vectors (SrcTy V64:$Rn), undef)),
6354        (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub)>;
6355
6356def : ConcatUndefPat<v2i64, v1i64>;
6357def : ConcatUndefPat<v2f64, v1f64>;
6358def : ConcatUndefPat<v4i32, v2i32>;
6359def : ConcatUndefPat<v4f32, v2f32>;
6360def : ConcatUndefPat<v8i16, v4i16>;
6361def : ConcatUndefPat<v16i8, v8i8>;
6362
6363//----------------------------------------------------------------------------
6364// AdvSIMD across lanes instructions
6365//----------------------------------------------------------------------------
6366
6367defm ADDV    : SIMDAcrossLanesBHS<0, 0b11011, "addv">;
6368defm SMAXV   : SIMDAcrossLanesBHS<0, 0b01010, "smaxv">;
6369defm SMINV   : SIMDAcrossLanesBHS<0, 0b11010, "sminv">;
6370defm UMAXV   : SIMDAcrossLanesBHS<1, 0b01010, "umaxv">;
6371defm UMINV   : SIMDAcrossLanesBHS<1, 0b11010, "uminv">;
6372defm SADDLV  : SIMDAcrossLanesHSD<0, 0b00011, "saddlv">;
6373defm UADDLV  : SIMDAcrossLanesHSD<1, 0b00011, "uaddlv">;
6374defm FMAXNMV : SIMDFPAcrossLanes<0b01100, 0, "fmaxnmv", AArch64fmaxnmv>;
6375defm FMAXV   : SIMDFPAcrossLanes<0b01111, 0, "fmaxv", AArch64fmaxv>;
6376defm FMINNMV : SIMDFPAcrossLanes<0b01100, 1, "fminnmv", AArch64fminnmv>;
6377defm FMINV   : SIMDFPAcrossLanes<0b01111, 1, "fminv", AArch64fminv>;
6378
6379multiclass SIMDAcrossLaneLongPairIntrinsic<string Opc, SDPatternOperator addlp> {
6380  // Patterns for addv(addlp(x)) ==> addlv
6381  def : Pat<(i32 (vector_extract (v8i16 (insert_subvector undef,
6382              (v4i16 (AArch64uaddv (v4i16 (addlp (v8i8 V64:$op))))),
6383              (i64 0))), (i64 0))),
6384            (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
6385              (!cast<Instruction>(Opc#"v8i8v") V64:$op), hsub), ssub)>;
6386  def : Pat<(i32 (vector_extract (v8i16 (AArch64uaddv (v8i16 (addlp (v16i8 V128:$op))))), (i64 0))),
6387            (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
6388              (!cast<Instruction>(Opc#"v16i8v") V128:$op), hsub), ssub)>;
6389  def : Pat<(v4i32 (AArch64uaddv (v4i32 (addlp (v8i16 V128:$op))))),
6390            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v8i16v") V128:$op), ssub)>;
6391
6392  // Patterns for addp(addlp(x))) ==> addlv
6393  def : Pat<(v2i32 (AArch64uaddv (v2i32 (addlp (v4i16 V64:$op))))),
6394            (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v4i16v") V64:$op), ssub)>;
6395  def : Pat<(v2i64 (AArch64uaddv (v2i64 (addlp (v4i32 V128:$op))))),
6396            (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v4i32v") V128:$op), dsub)>;
6397}
6398
6399defm : SIMDAcrossLaneLongPairIntrinsic<"UADDLV", AArch64uaddlp>;
6400defm : SIMDAcrossLaneLongPairIntrinsic<"SADDLV", AArch64saddlp>;
6401
6402// Patterns for uaddlv(uaddlp(x)) ==> uaddlv
6403def : Pat<(i64 (int_aarch64_neon_uaddlv (v4i32 (AArch64uaddlp (v8i16 V128:$op))))),
6404          (i64 (EXTRACT_SUBREG
6405            (v4i32 (SUBREG_TO_REG (i64 0), (UADDLVv8i16v V128:$op), ssub)),
6406            dsub))>;
6407
6408def : Pat<(i32 (int_aarch64_neon_uaddlv (v8i16 (AArch64uaddlp (v16i8 V128:$op))))),
6409          (i32 (EXTRACT_SUBREG
6410            (v8i16 (SUBREG_TO_REG (i64 0), (UADDLVv16i8v V128:$op), hsub)),
6411            ssub))>;
6412
6413// Patterns for across-vector intrinsics, that have a node equivalent, that
6414// returns a vector (with only the low lane defined) instead of a scalar.
6415// In effect, opNode is the same as (scalar_to_vector (IntNode)).
6416multiclass SIMDAcrossLanesIntrinsic<string baseOpc,
6417                                    SDPatternOperator opNode> {
6418// If a lane instruction caught the vector_extract around opNode, we can
6419// directly match the latter to the instruction.
6420def : Pat<(v8i8 (opNode V64:$Rn)),
6421          (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
6422           (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub)>;
6423def : Pat<(v16i8 (opNode V128:$Rn)),
6424          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6425           (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub)>;
6426def : Pat<(v4i16 (opNode V64:$Rn)),
6427          (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
6428           (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub)>;
6429def : Pat<(v8i16 (opNode V128:$Rn)),
6430          (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
6431           (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub)>;
6432def : Pat<(v4i32 (opNode V128:$Rn)),
6433          (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
6434           (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub)>;
6435
6436
6437// If none did, fallback to the explicit patterns, consuming the vector_extract.
6438def : Pat<(i32 (vector_extract (insert_subvector undef, (v8i8 (opNode V64:$Rn)),
6439            (i64 0)), (i64 0))),
6440          (EXTRACT_SUBREG (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
6441            (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn),
6442            bsub), ssub)>;
6443def : Pat<(i32 (vector_extract (v16i8 (opNode V128:$Rn)), (i64 0))),
6444          (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6445            (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn),
6446            bsub), ssub)>;
6447def : Pat<(i32 (vector_extract (insert_subvector undef,
6448            (v4i16 (opNode V64:$Rn)), (i64 0)), (i64 0))),
6449          (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
6450            (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn),
6451            hsub), ssub)>;
6452def : Pat<(i32 (vector_extract (v8i16 (opNode V128:$Rn)), (i64 0))),
6453          (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
6454            (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn),
6455            hsub), ssub)>;
6456def : Pat<(i32 (vector_extract (v4i32 (opNode V128:$Rn)), (i64 0))),
6457          (EXTRACT_SUBREG (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
6458            (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn),
6459            ssub), ssub)>;
6460
6461}
6462
6463multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc,
6464                                          SDPatternOperator opNode>
6465    : SIMDAcrossLanesIntrinsic<baseOpc, opNode> {
6466// If there is a sign extension after this intrinsic, consume it as smov already
6467// performed it
6468def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef,
6469            (opNode (v8i8 V64:$Rn)), (i64 0)), (i64 0))), i8)),
6470          (i32 (SMOVvi8to32
6471            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6472              (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
6473            (i64 0)))>;
6474def : Pat<(i32 (sext_inreg (i32 (vector_extract
6475            (opNode (v16i8 V128:$Rn)), (i64 0))), i8)),
6476          (i32 (SMOVvi8to32
6477            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6478             (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
6479            (i64 0)))>;
6480def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef,
6481            (opNode (v4i16 V64:$Rn)), (i64 0)), (i64 0))), i16)),
6482          (i32 (SMOVvi16to32
6483           (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6484            (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
6485           (i64 0)))>;
6486def : Pat<(i32 (sext_inreg (i32 (vector_extract
6487            (opNode (v8i16 V128:$Rn)), (i64 0))), i16)),
6488          (i32 (SMOVvi16to32
6489            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6490             (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
6491            (i64 0)))>;
6492}
6493
6494multiclass SIMDAcrossLanesUnsignedIntrinsic<string baseOpc,
6495                                            SDPatternOperator opNode>
6496    : SIMDAcrossLanesIntrinsic<baseOpc, opNode> {
6497// If there is a masking operation keeping only what has been actually
6498// generated, consume it.
6499def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef,
6500            (opNode (v8i8 V64:$Rn)), (i64 0)), (i64 0))), maski8_or_more)),
6501      (i32 (EXTRACT_SUBREG
6502        (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6503          (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
6504        ssub))>;
6505def : Pat<(i32 (and (i32 (vector_extract (opNode (v16i8 V128:$Rn)), (i64 0))),
6506            maski8_or_more)),
6507        (i32 (EXTRACT_SUBREG
6508          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6509            (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
6510          ssub))>;
6511def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef,
6512            (opNode (v4i16 V64:$Rn)), (i64 0)), (i64 0))), maski16_or_more)),
6513          (i32 (EXTRACT_SUBREG
6514            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6515              (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
6516            ssub))>;
6517def : Pat<(i32 (and (i32 (vector_extract (opNode (v8i16 V128:$Rn)), (i64 0))),
6518            maski16_or_more)),
6519        (i32 (EXTRACT_SUBREG
6520          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6521            (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
6522          ssub))>;
6523}
6524
6525defm : SIMDAcrossLanesSignedIntrinsic<"ADDV",  AArch64saddv>;
6526// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
6527def : Pat<(v2i32 (AArch64saddv (v2i32 V64:$Rn))),
6528          (ADDPv2i32 V64:$Rn, V64:$Rn)>;
6529
6530defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV", AArch64uaddv>;
6531// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
6532def : Pat<(v2i32 (AArch64uaddv (v2i32 V64:$Rn))),
6533          (ADDPv2i32 V64:$Rn, V64:$Rn)>;
6534
6535defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", AArch64smaxv>;
6536def : Pat<(v2i32 (AArch64smaxv (v2i32 V64:$Rn))),
6537          (SMAXPv2i32 V64:$Rn, V64:$Rn)>;
6538
6539defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", AArch64sminv>;
6540def : Pat<(v2i32 (AArch64sminv (v2i32 V64:$Rn))),
6541          (SMINPv2i32 V64:$Rn, V64:$Rn)>;
6542
6543defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", AArch64umaxv>;
6544def : Pat<(v2i32 (AArch64umaxv (v2i32 V64:$Rn))),
6545          (UMAXPv2i32 V64:$Rn, V64:$Rn)>;
6546
6547defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", AArch64uminv>;
6548def : Pat<(v2i32 (AArch64uminv (v2i32 V64:$Rn))),
6549          (UMINPv2i32 V64:$Rn, V64:$Rn)>;
6550
6551multiclass SIMDAcrossLanesSignedLongIntrinsic<string baseOpc, Intrinsic intOp> {
6552  def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
6553        (i32 (SMOVvi16to32
6554          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6555            (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub),
6556          (i64 0)))>;
6557def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
6558        (i32 (SMOVvi16to32
6559          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6560           (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub),
6561          (i64 0)))>;
6562
6563def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
6564          (i32 (EXTRACT_SUBREG
6565           (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6566            (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub),
6567           ssub))>;
6568def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
6569        (i32 (EXTRACT_SUBREG
6570          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6571           (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub),
6572          ssub))>;
6573
6574def : Pat<(i64 (intOp (v4i32 V128:$Rn))),
6575        (i64 (EXTRACT_SUBREG
6576          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6577           (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub),
6578          dsub))>;
6579}
6580
6581multiclass SIMDAcrossLanesUnsignedLongIntrinsic<string baseOpc,
6582                                                Intrinsic intOp> {
6583  def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
6584        (i32 (EXTRACT_SUBREG
6585          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6586            (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub),
6587          ssub))>;
6588def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
6589        (i32 (EXTRACT_SUBREG
6590          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6591            (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub),
6592          ssub))>;
6593
6594def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
6595          (i32 (EXTRACT_SUBREG
6596            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6597              (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub),
6598            ssub))>;
6599def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
6600        (i32 (EXTRACT_SUBREG
6601          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6602            (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub),
6603          ssub))>;
6604
6605def : Pat<(i64 (intOp (v4i32 V128:$Rn))),
6606        (i64 (EXTRACT_SUBREG
6607          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6608            (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub),
6609          dsub))>;
6610}
6611
6612defm : SIMDAcrossLanesSignedLongIntrinsic<"SADDLV", int_aarch64_neon_saddlv>;
6613defm : SIMDAcrossLanesUnsignedLongIntrinsic<"UADDLV", int_aarch64_neon_uaddlv>;
6614
6615// The vaddlv_s32 intrinsic gets mapped to SADDLP.
6616def : Pat<(i64 (int_aarch64_neon_saddlv (v2i32 V64:$Rn))),
6617          (i64 (EXTRACT_SUBREG
6618            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6619              (SADDLPv2i32_v1i64 V64:$Rn), dsub),
6620            dsub))>;
6621// The vaddlv_u32 intrinsic gets mapped to UADDLP.
6622def : Pat<(i64 (int_aarch64_neon_uaddlv (v2i32 V64:$Rn))),
6623          (i64 (EXTRACT_SUBREG
6624            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6625              (UADDLPv2i32_v1i64 V64:$Rn), dsub),
6626            dsub))>;
6627
6628//------------------------------------------------------------------------------
6629// AdvSIMD modified immediate instructions
6630//------------------------------------------------------------------------------
6631
6632// AdvSIMD BIC
6633defm BIC : SIMDModifiedImmVectorShiftTied<1, 0b11, 0b01, "bic", AArch64bici>;
6634// AdvSIMD ORR
6635defm ORR : SIMDModifiedImmVectorShiftTied<0, 0b11, 0b01, "orr", AArch64orri>;
6636
6637def : InstAlias<"bic $Vd.4h, $imm", (BICv4i16 V64:$Vd,  imm0_255:$imm, 0)>;
6638def : InstAlias<"bic $Vd.8h, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>;
6639def : InstAlias<"bic $Vd.2s, $imm", (BICv2i32 V64:$Vd,  imm0_255:$imm, 0)>;
6640def : InstAlias<"bic $Vd.4s, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>;
6641
6642def : InstAlias<"bic.4h $Vd, $imm", (BICv4i16 V64:$Vd,  imm0_255:$imm, 0)>;
6643def : InstAlias<"bic.8h $Vd, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>;
6644def : InstAlias<"bic.2s $Vd, $imm", (BICv2i32 V64:$Vd,  imm0_255:$imm, 0)>;
6645def : InstAlias<"bic.4s $Vd, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>;
6646
6647def : InstAlias<"orr $Vd.4h, $imm", (ORRv4i16 V64:$Vd,  imm0_255:$imm, 0)>;
6648def : InstAlias<"orr $Vd.8h, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>;
6649def : InstAlias<"orr $Vd.2s, $imm", (ORRv2i32 V64:$Vd,  imm0_255:$imm, 0)>;
6650def : InstAlias<"orr $Vd.4s, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>;
6651
6652def : InstAlias<"orr.4h $Vd, $imm", (ORRv4i16 V64:$Vd,  imm0_255:$imm, 0)>;
6653def : InstAlias<"orr.8h $Vd, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>;
6654def : InstAlias<"orr.2s $Vd, $imm", (ORRv2i32 V64:$Vd,  imm0_255:$imm, 0)>;
6655def : InstAlias<"orr.4s $Vd, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>;
6656
6657// AdvSIMD FMOV
6658def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1111, V128, fpimm8,
6659                                              "fmov", ".2d",
6660                       [(set (v2f64 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
6661def FMOVv2f32_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1111, V64,  fpimm8,
6662                                              "fmov", ".2s",
6663                       [(set (v2f32 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>;
6664def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1111, V128, fpimm8,
6665                                              "fmov", ".4s",
6666                       [(set (v4f32 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
6667let Predicates = [HasNEON, HasFullFP16] in {
6668def FMOVv4f16_ns : SIMDModifiedImmVectorNoShift<0, 0, 1, 0b1111, V64,  fpimm8,
6669                                              "fmov", ".4h",
6670                       [(set (v4f16 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>;
6671def FMOVv8f16_ns : SIMDModifiedImmVectorNoShift<1, 0, 1, 0b1111, V128, fpimm8,
6672                                              "fmov", ".8h",
6673                       [(set (v8f16 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
6674} // Predicates = [HasNEON, HasFullFP16]
6675
6676// AdvSIMD MOVI
6677
6678// EDIT byte mask: scalar
6679let isReMaterializable = 1, isAsCheapAsAMove = 1 in
6680def MOVID      : SIMDModifiedImmScalarNoShift<0, 1, 0b1110, "movi",
6681                    [(set FPR64:$Rd, simdimmtype10:$imm8)]>;
6682// The movi_edit node has the immediate value already encoded, so we use
6683// a plain imm0_255 here.
6684def : Pat<(f64 (AArch64movi_edit imm0_255:$shift)),
6685          (MOVID imm0_255:$shift)>;
6686
6687// EDIT byte mask: 2d
6688
6689// The movi_edit node has the immediate value already encoded, so we use
6690// a plain imm0_255 in the pattern
6691let isReMaterializable = 1, isAsCheapAsAMove = 1 in
6692def MOVIv2d_ns   : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1110, V128,
6693                                                simdimmtype10,
6694                                                "movi", ".2d",
6695                   [(set (v2i64 V128:$Rd), (AArch64movi_edit imm0_255:$imm8))]>;
6696
6697def : Pat<(v2i64 immAllZerosV), (MOVIv2d_ns (i32 0))>;
6698def : Pat<(v4i32 immAllZerosV), (MOVIv2d_ns (i32 0))>;
6699def : Pat<(v8i16 immAllZerosV), (MOVIv2d_ns (i32 0))>;
6700def : Pat<(v16i8 immAllZerosV), (MOVIv2d_ns (i32 0))>;
6701def : Pat<(v2f64 immAllZerosV), (MOVIv2d_ns (i32 0))>;
6702def : Pat<(v4f32 immAllZerosV), (MOVIv2d_ns (i32 0))>;
6703def : Pat<(v8f16 immAllZerosV), (MOVIv2d_ns (i32 0))>;
6704def : Pat<(v8bf16 immAllZerosV), (MOVIv2d_ns (i32 0))>;
6705
6706def : Pat<(v2i64 immAllOnesV), (MOVIv2d_ns (i32 255))>;
6707def : Pat<(v4i32 immAllOnesV), (MOVIv2d_ns (i32 255))>;
6708def : Pat<(v8i16 immAllOnesV), (MOVIv2d_ns (i32 255))>;
6709def : Pat<(v16i8 immAllOnesV), (MOVIv2d_ns (i32 255))>;
6710
6711// Set 64-bit vectors to all 0/1 by extracting from a 128-bit register as the
6712// extract is free and this gives better MachineCSE results.
6713def : Pat<(v1i64 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
6714def : Pat<(v2i32 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
6715def : Pat<(v4i16 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
6716def : Pat<(v8i8  immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
6717def : Pat<(v1f64 immAllZerosV), (MOVID (i32 0))>;
6718def : Pat<(v2f32 immAllZerosV), (MOVID (i32 0))>;
6719def : Pat<(v4f16 immAllZerosV), (MOVID (i32 0))>;
6720def : Pat<(v4bf16 immAllZerosV), (MOVID (i32 0))>;
6721
6722def : Pat<(v1i64 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
6723def : Pat<(v2i32 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
6724def : Pat<(v4i16 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
6725def : Pat<(v8i8  immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
6726
6727// EDIT per word & halfword: 2s, 4h, 4s, & 8h
6728let isReMaterializable = 1, isAsCheapAsAMove = 1 in
6729defm MOVI      : SIMDModifiedImmVectorShift<0, 0b10, 0b00, "movi">;
6730
6731let Predicates = [HasNEON] in {
6732  // Using the MOVI to materialize fp constants.
6733  def : Pat<(f32 fpimm32SIMDModImmType4:$in),
6734            (EXTRACT_SUBREG (MOVIv2i32 (fpimm32SIMDModImmType4XForm f32:$in),
6735                                       (i32 24)),
6736                            ssub)>;
6737}
6738
6739def : InstAlias<"movi $Vd.4h, $imm", (MOVIv4i16 V64:$Vd,  imm0_255:$imm, 0), 0>;
6740def : InstAlias<"movi $Vd.8h, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
6741def : InstAlias<"movi $Vd.2s, $imm", (MOVIv2i32 V64:$Vd,  imm0_255:$imm, 0), 0>;
6742def : InstAlias<"movi $Vd.4s, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
6743
6744def : InstAlias<"movi.4h $Vd, $imm", (MOVIv4i16 V64:$Vd,  imm0_255:$imm, 0), 0>;
6745def : InstAlias<"movi.8h $Vd, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
6746def : InstAlias<"movi.2s $Vd, $imm", (MOVIv2i32 V64:$Vd,  imm0_255:$imm, 0), 0>;
6747def : InstAlias<"movi.4s $Vd, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
6748
6749def : Pat<(v2i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
6750          (MOVIv2i32 imm0_255:$imm8, imm:$shift)>;
6751def : Pat<(v4i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
6752          (MOVIv4i32 imm0_255:$imm8, imm:$shift)>;
6753def : Pat<(v4i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
6754          (MOVIv4i16 imm0_255:$imm8, imm:$shift)>;
6755def : Pat<(v8i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
6756          (MOVIv8i16 imm0_255:$imm8, imm:$shift)>;
6757
6758let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
6759// EDIT per word: 2s & 4s with MSL shifter
6760def MOVIv2s_msl  : SIMDModifiedImmMoveMSL<0, 0, {1,1,0,?}, V64, "movi", ".2s",
6761                      [(set (v2i32 V64:$Rd),
6762                            (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
6763def MOVIv4s_msl  : SIMDModifiedImmMoveMSL<1, 0, {1,1,0,?}, V128, "movi", ".4s",
6764                      [(set (v4i32 V128:$Rd),
6765                            (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
6766
6767// Per byte: 8b & 16b
6768def MOVIv8b_ns   : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1110, V64,  imm0_255,
6769                                                 "movi", ".8b",
6770                       [(set (v8i8 V64:$Rd), (AArch64movi imm0_255:$imm8))]>;
6771
6772def MOVIv16b_ns  : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1110, V128, imm0_255,
6773                                                 "movi", ".16b",
6774                       [(set (v16i8 V128:$Rd), (AArch64movi imm0_255:$imm8))]>;
6775}
6776
6777// AdvSIMD MVNI
6778
6779// EDIT per word & halfword: 2s, 4h, 4s, & 8h
6780let isReMaterializable = 1, isAsCheapAsAMove = 1 in
6781defm MVNI      : SIMDModifiedImmVectorShift<1, 0b10, 0b00, "mvni">;
6782
6783def : InstAlias<"mvni $Vd.4h, $imm", (MVNIv4i16 V64:$Vd,  imm0_255:$imm, 0), 0>;
6784def : InstAlias<"mvni $Vd.8h, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
6785def : InstAlias<"mvni $Vd.2s, $imm", (MVNIv2i32 V64:$Vd,  imm0_255:$imm, 0), 0>;
6786def : InstAlias<"mvni $Vd.4s, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
6787
6788def : InstAlias<"mvni.4h $Vd, $imm", (MVNIv4i16 V64:$Vd,  imm0_255:$imm, 0), 0>;
6789def : InstAlias<"mvni.8h $Vd, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
6790def : InstAlias<"mvni.2s $Vd, $imm", (MVNIv2i32 V64:$Vd,  imm0_255:$imm, 0), 0>;
6791def : InstAlias<"mvni.4s $Vd, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
6792
6793def : Pat<(v2i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
6794          (MVNIv2i32 imm0_255:$imm8, imm:$shift)>;
6795def : Pat<(v4i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
6796          (MVNIv4i32 imm0_255:$imm8, imm:$shift)>;
6797def : Pat<(v4i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
6798          (MVNIv4i16 imm0_255:$imm8, imm:$shift)>;
6799def : Pat<(v8i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
6800          (MVNIv8i16 imm0_255:$imm8, imm:$shift)>;
6801
6802// EDIT per word: 2s & 4s with MSL shifter
6803let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
6804def MVNIv2s_msl   : SIMDModifiedImmMoveMSL<0, 1, {1,1,0,?}, V64, "mvni", ".2s",
6805                      [(set (v2i32 V64:$Rd),
6806                            (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
6807def MVNIv4s_msl   : SIMDModifiedImmMoveMSL<1, 1, {1,1,0,?}, V128, "mvni", ".4s",
6808                      [(set (v4i32 V128:$Rd),
6809                            (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
6810}
6811
6812//----------------------------------------------------------------------------
6813// AdvSIMD indexed element
6814//----------------------------------------------------------------------------
6815
6816let hasSideEffects = 0 in {
6817  defm FMLA  : SIMDFPIndexedTied<0, 0b0001, "fmla">;
6818  defm FMLS  : SIMDFPIndexedTied<0, 0b0101, "fmls">;
6819}
6820
6821// NOTE: Operands are reordered in the FMLA/FMLS PatFrags because the
6822// instruction expects the addend first, while the intrinsic expects it last.
6823
6824// On the other hand, there are quite a few valid combinatorial options due to
6825// the commutativity of multiplication and the fact that (-x) * y = x * (-y).
6826defm : SIMDFPIndexedTiedPatterns<"FMLA",
6827           TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)>>;
6828defm : SIMDFPIndexedTiedPatterns<"FMLA",
6829           TriOpFrag<(any_fma node:$MHS, node:$RHS, node:$LHS)>>;
6830
6831defm : SIMDFPIndexedTiedPatterns<"FMLS",
6832           TriOpFrag<(any_fma node:$MHS, (fneg node:$RHS), node:$LHS)> >;
6833defm : SIMDFPIndexedTiedPatterns<"FMLS",
6834           TriOpFrag<(any_fma node:$RHS, (fneg node:$MHS), node:$LHS)> >;
6835defm : SIMDFPIndexedTiedPatterns<"FMLS",
6836           TriOpFrag<(any_fma (fneg node:$RHS), node:$MHS, node:$LHS)> >;
6837defm : SIMDFPIndexedTiedPatterns<"FMLS",
6838           TriOpFrag<(any_fma (fneg node:$MHS), node:$RHS, node:$LHS)> >;
6839
6840multiclass FMLSIndexedAfterNegPatterns<SDPatternOperator OpNode> {
6841  // 3 variants for the .2s version: DUPLANE from 128-bit, DUPLANE from 64-bit
6842  // and DUP scalar.
6843  def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
6844                           (AArch64duplane32 (v4f32 (fneg V128:$Rm)),
6845                                           VectorIndexS:$idx))),
6846            (FMLSv2i32_indexed V64:$Rd, V64:$Rn, V128:$Rm, VectorIndexS:$idx)>;
6847  def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
6848                           (v2f32 (AArch64duplane32
6849                                      (v4f32 (insert_subvector undef,
6850                                                 (v2f32 (fneg V64:$Rm)),
6851                                                 (i64 0))),
6852                                      VectorIndexS:$idx)))),
6853            (FMLSv2i32_indexed V64:$Rd, V64:$Rn,
6854                               (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
6855                               VectorIndexS:$idx)>;
6856  def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
6857                           (AArch64dup (f32 (fneg FPR32Op:$Rm))))),
6858            (FMLSv2i32_indexed V64:$Rd, V64:$Rn,
6859                (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>;
6860
6861  // 3 variants for the .4s version: DUPLANE from 128-bit, DUPLANE from 64-bit
6862  // and DUP scalar.
6863  def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
6864                           (AArch64duplane32 (v4f32 (fneg V128:$Rm)),
6865                                           VectorIndexS:$idx))),
6866            (FMLSv4i32_indexed V128:$Rd, V128:$Rn, V128:$Rm,
6867                               VectorIndexS:$idx)>;
6868  def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
6869                           (v4f32 (AArch64duplane32
6870                                      (v4f32 (insert_subvector undef,
6871                                                 (v2f32 (fneg V64:$Rm)),
6872                                                 (i64 0))),
6873                                      VectorIndexS:$idx)))),
6874            (FMLSv4i32_indexed V128:$Rd, V128:$Rn,
6875                               (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
6876                               VectorIndexS:$idx)>;
6877  def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
6878                           (AArch64dup (f32 (fneg FPR32Op:$Rm))))),
6879            (FMLSv4i32_indexed V128:$Rd, V128:$Rn,
6880                (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>;
6881
6882  // 2 variants for the .2d version: DUPLANE from 128-bit, and DUP scalar
6883  // (DUPLANE from 64-bit would be trivial).
6884  def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn),
6885                           (AArch64duplane64 (v2f64 (fneg V128:$Rm)),
6886                                           VectorIndexD:$idx))),
6887            (FMLSv2i64_indexed
6888                V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>;
6889  def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn),
6890                           (AArch64dup (f64 (fneg FPR64Op:$Rm))))),
6891            (FMLSv2i64_indexed V128:$Rd, V128:$Rn,
6892                (SUBREG_TO_REG (i32 0), FPR64Op:$Rm, dsub), (i64 0))>;
6893
6894  // 2 variants for 32-bit scalar version: extract from .2s or from .4s
6895  def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn),
6896                         (vector_extract (v4f32 (fneg V128:$Rm)),
6897                                         VectorIndexS:$idx))),
6898            (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn,
6899                V128:$Rm, VectorIndexS:$idx)>;
6900  def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn),
6901                         (vector_extract (v4f32 (insert_subvector undef,
6902                                                    (v2f32 (fneg V64:$Rm)),
6903                                                    (i64 0))),
6904                                         VectorIndexS:$idx))),
6905            (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn,
6906                (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>;
6907
6908  // 1 variant for 64-bit scalar version: extract from .1d or from .2d
6909  def : Pat<(f64 (OpNode (f64 FPR64:$Rd), (f64 FPR64:$Rn),
6910                         (vector_extract (v2f64 (fneg V128:$Rm)),
6911                                         VectorIndexS:$idx))),
6912            (FMLSv1i64_indexed FPR64:$Rd, FPR64:$Rn,
6913                V128:$Rm, VectorIndexS:$idx)>;
6914}
6915
6916defm : FMLSIndexedAfterNegPatterns<
6917           TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)> >;
6918defm : FMLSIndexedAfterNegPatterns<
6919           TriOpFrag<(any_fma node:$MHS, node:$RHS, node:$LHS)> >;
6920
6921defm FMULX : SIMDFPIndexed<1, 0b1001, "fmulx", int_aarch64_neon_fmulx>;
6922defm FMUL  : SIMDFPIndexed<0, 0b1001, "fmul", any_fmul>;
6923
6924def : Pat<(v2f32 (any_fmul V64:$Rn, (AArch64dup (f32 FPR32:$Rm)))),
6925          (FMULv2i32_indexed V64:$Rn,
6926            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub),
6927            (i64 0))>;
6928def : Pat<(v4f32 (any_fmul V128:$Rn, (AArch64dup (f32 FPR32:$Rm)))),
6929          (FMULv4i32_indexed V128:$Rn,
6930            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub),
6931            (i64 0))>;
6932def : Pat<(v2f64 (any_fmul V128:$Rn, (AArch64dup (f64 FPR64:$Rm)))),
6933          (FMULv2i64_indexed V128:$Rn,
6934            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rm, dsub),
6935            (i64 0))>;
6936
6937defm SQDMULH : SIMDIndexedHS<0, 0b1100, "sqdmulh", int_aarch64_neon_sqdmulh>;
6938defm SQRDMULH : SIMDIndexedHS<0, 0b1101, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
6939
6940defm SQDMULH : SIMDIndexedHSPatterns<int_aarch64_neon_sqdmulh_lane,
6941                                     int_aarch64_neon_sqdmulh_laneq>;
6942defm SQRDMULH : SIMDIndexedHSPatterns<int_aarch64_neon_sqrdmulh_lane,
6943                                      int_aarch64_neon_sqrdmulh_laneq>;
6944
6945// Generated by MachineCombine
6946defm MLA   : SIMDVectorIndexedHSTied<1, 0b0000, "mla", null_frag>;
6947defm MLS   : SIMDVectorIndexedHSTied<1, 0b0100, "mls", null_frag>;
6948
6949defm MUL   : SIMDVectorIndexedHS<0, 0b1000, "mul", mul>;
6950defm SMLAL : SIMDVectorIndexedLongSDTied<0, 0b0010, "smlal",
6951    TriOpFrag<(add node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>;
6952defm SMLSL : SIMDVectorIndexedLongSDTied<0, 0b0110, "smlsl",
6953    TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>;
6954defm SMULL : SIMDVectorIndexedLongSD<0, 0b1010, "smull", AArch64smull>;
6955defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal",
6956                                           int_aarch64_neon_sqadd>;
6957defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl",
6958                                           int_aarch64_neon_sqsub>;
6959defm SQRDMLAH : SIMDIndexedSQRDMLxHSDTied<1, 0b1101, "sqrdmlah",
6960                                          int_aarch64_neon_sqrdmlah>;
6961defm SQRDMLSH : SIMDIndexedSQRDMLxHSDTied<1, 0b1111, "sqrdmlsh",
6962                                          int_aarch64_neon_sqrdmlsh>;
6963defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_aarch64_neon_sqdmull>;
6964defm UMLAL   : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal",
6965    TriOpFrag<(add node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>;
6966defm UMLSL   : SIMDVectorIndexedLongSDTied<1, 0b0110, "umlsl",
6967    TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>;
6968defm UMULL   : SIMDVectorIndexedLongSD<1, 0b1010, "umull", AArch64umull>;
6969
6970// A scalar sqdmull with the second operand being a vector lane can be
6971// handled directly with the indexed instruction encoding.
6972def : Pat<(int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
6973                                          (vector_extract (v4i32 V128:$Vm),
6974                                                           VectorIndexS:$idx)),
6975          (SQDMULLv1i64_indexed FPR32:$Rn, V128:$Vm, VectorIndexS:$idx)>;
6976
6977//----------------------------------------------------------------------------
6978// AdvSIMD scalar shift instructions
6979//----------------------------------------------------------------------------
6980defm FCVTZS : SIMDFPScalarRShift<0, 0b11111, "fcvtzs">;
6981defm FCVTZU : SIMDFPScalarRShift<1, 0b11111, "fcvtzu">;
6982defm SCVTF  : SIMDFPScalarRShift<0, 0b11100, "scvtf">;
6983defm UCVTF  : SIMDFPScalarRShift<1, 0b11100, "ucvtf">;
6984// Codegen patterns for the above. We don't put these directly on the
6985// instructions because TableGen's type inference can't handle the truth.
6986// Having the same base pattern for fp <--> int totally freaks it out.
6987def : Pat<(int_aarch64_neon_vcvtfp2fxs FPR32:$Rn, vecshiftR32:$imm),
6988          (FCVTZSs FPR32:$Rn, vecshiftR32:$imm)>;
6989def : Pat<(int_aarch64_neon_vcvtfp2fxu FPR32:$Rn, vecshiftR32:$imm),
6990          (FCVTZUs FPR32:$Rn, vecshiftR32:$imm)>;
6991def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f64 FPR64:$Rn), vecshiftR64:$imm)),
6992          (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>;
6993def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f64 FPR64:$Rn), vecshiftR64:$imm)),
6994          (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>;
6995def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxs (v1f64 FPR64:$Rn),
6996                                            vecshiftR64:$imm)),
6997          (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>;
6998def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxu (v1f64 FPR64:$Rn),
6999                                            vecshiftR64:$imm)),
7000          (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>;
7001def : Pat<(int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR32:$imm),
7002          (UCVTFs FPR32:$Rn, vecshiftR32:$imm)>;
7003def : Pat<(f64 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR64:$imm)),
7004          (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
7005def : Pat<(v1f64 (int_aarch64_neon_vcvtfxs2fp (v1i64 FPR64:$Rn),
7006                                            vecshiftR64:$imm)),
7007          (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
7008def : Pat<(f64 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR64:$imm)),
7009          (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
7010def : Pat<(v1f64 (int_aarch64_neon_vcvtfxu2fp (v1i64 FPR64:$Rn),
7011                                            vecshiftR64:$imm)),
7012          (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
7013def : Pat<(int_aarch64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR32:$imm),
7014          (SCVTFs FPR32:$Rn, vecshiftR32:$imm)>;
7015
7016// Patterns for FP16 Intrinsics - requires reg copy to/from as i16s not supported.
7017
7018def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 (sext_inreg FPR32:$Rn, i16)), vecshiftR16:$imm)),
7019          (SCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>;
7020def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 FPR32:$Rn), vecshiftR16:$imm)),
7021          (SCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>;
7022def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR16:$imm)),
7023          (SCVTFh (EXTRACT_SUBREG FPR64:$Rn, hsub), vecshiftR16:$imm)>;
7024def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp
7025            (and FPR32:$Rn, (i32 65535)),
7026            vecshiftR16:$imm)),
7027          (UCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>;
7028def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR16:$imm)),
7029          (UCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>;
7030def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR16:$imm)),
7031          (UCVTFh (EXTRACT_SUBREG FPR64:$Rn, hsub), vecshiftR16:$imm)>;
7032def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR32:$imm)),
7033          (i32 (INSERT_SUBREG
7034            (i32 (IMPLICIT_DEF)),
7035            (FCVTZSh FPR16:$Rn, vecshiftR32:$imm),
7036            hsub))>;
7037def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR64:$imm)),
7038          (i64 (INSERT_SUBREG
7039            (i64 (IMPLICIT_DEF)),
7040            (FCVTZSh FPR16:$Rn, vecshiftR64:$imm),
7041            hsub))>;
7042def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR32:$imm)),
7043          (i32 (INSERT_SUBREG
7044            (i32 (IMPLICIT_DEF)),
7045            (FCVTZUh FPR16:$Rn, vecshiftR32:$imm),
7046            hsub))>;
7047def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR64:$imm)),
7048          (i64 (INSERT_SUBREG
7049            (i64 (IMPLICIT_DEF)),
7050            (FCVTZUh FPR16:$Rn, vecshiftR64:$imm),
7051            hsub))>;
7052def : Pat<(i32 (int_aarch64_neon_facge (f16 FPR16:$Rn), (f16 FPR16:$Rm))),
7053          (i32 (INSERT_SUBREG
7054            (i32 (IMPLICIT_DEF)),
7055            (FACGE16 FPR16:$Rn, FPR16:$Rm),
7056            hsub))>;
7057def : Pat<(i32 (int_aarch64_neon_facgt (f16 FPR16:$Rn), (f16 FPR16:$Rm))),
7058          (i32 (INSERT_SUBREG
7059            (i32 (IMPLICIT_DEF)),
7060            (FACGT16 FPR16:$Rn, FPR16:$Rm),
7061            hsub))>;
7062
7063defm SHL      : SIMDScalarLShiftD<   0, 0b01010, "shl", AArch64vshl>;
7064defm SLI      : SIMDScalarLShiftDTied<1, 0b01010, "sli">;
7065defm SQRSHRN  : SIMDScalarRShiftBHS< 0, 0b10011, "sqrshrn",
7066                                     int_aarch64_neon_sqrshrn>;
7067defm SQRSHRUN : SIMDScalarRShiftBHS< 1, 0b10001, "sqrshrun",
7068                                     int_aarch64_neon_sqrshrun>;
7069defm SQSHLU   : SIMDScalarLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>;
7070defm SQSHL    : SIMDScalarLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>;
7071defm SQSHRN   : SIMDScalarRShiftBHS< 0, 0b10010, "sqshrn",
7072                                     int_aarch64_neon_sqshrn>;
7073defm SQSHRUN  : SIMDScalarRShiftBHS< 1, 0b10000, "sqshrun",
7074                                     int_aarch64_neon_sqshrun>;
7075defm SRI      : SIMDScalarRShiftDTied<   1, 0b01000, "sri">;
7076defm SRSHR    : SIMDScalarRShiftD<   0, 0b00100, "srshr", AArch64srshri>;
7077defm SRSRA    : SIMDScalarRShiftDTied<   0, 0b00110, "srsra",
7078    TriOpFrag<(add node:$LHS,
7079                   (AArch64srshri node:$MHS, node:$RHS))>>;
7080defm SSHR     : SIMDScalarRShiftD<   0, 0b00000, "sshr", AArch64vashr>;
7081defm SSRA     : SIMDScalarRShiftDTied<   0, 0b00010, "ssra",
7082    TriOpFrag<(add_and_or_is_add node:$LHS,
7083                   (AArch64vashr node:$MHS, node:$RHS))>>;
7084defm UQRSHRN  : SIMDScalarRShiftBHS< 1, 0b10011, "uqrshrn",
7085                                     int_aarch64_neon_uqrshrn>;
7086defm UQSHL    : SIMDScalarLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>;
7087defm UQSHRN   : SIMDScalarRShiftBHS< 1, 0b10010, "uqshrn",
7088                                     int_aarch64_neon_uqshrn>;
7089defm URSHR    : SIMDScalarRShiftD<   1, 0b00100, "urshr", AArch64urshri>;
7090defm URSRA    : SIMDScalarRShiftDTied<   1, 0b00110, "ursra",
7091    TriOpFrag<(add node:$LHS,
7092                   (AArch64urshri node:$MHS, node:$RHS))>>;
7093defm USHR     : SIMDScalarRShiftD<   1, 0b00000, "ushr", AArch64vlshr>;
7094defm USRA     : SIMDScalarRShiftDTied<   1, 0b00010, "usra",
7095    TriOpFrag<(add_and_or_is_add node:$LHS,
7096                   (AArch64vlshr node:$MHS, node:$RHS))>>;
7097
7098//----------------------------------------------------------------------------
7099// AdvSIMD vector shift instructions
7100//----------------------------------------------------------------------------
7101defm FCVTZS:SIMDVectorRShiftSD<0, 0b11111, "fcvtzs", int_aarch64_neon_vcvtfp2fxs>;
7102defm FCVTZU:SIMDVectorRShiftSD<1, 0b11111, "fcvtzu", int_aarch64_neon_vcvtfp2fxu>;
7103defm SCVTF: SIMDVectorRShiftToFP<0, 0b11100, "scvtf",
7104                                   int_aarch64_neon_vcvtfxs2fp>;
7105defm RSHRN   : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn",
7106                          BinOpFrag<(trunc (AArch64roundingvlshr node:$LHS, node:$RHS))>>;
7107defm SHL     : SIMDVectorLShiftBHSD<0, 0b01010, "shl", AArch64vshl>;
7108
7109// X << 1 ==> X + X
7110class SHLToADDPat<ValueType ty, RegisterClass regtype>
7111  : Pat<(ty (AArch64vshl (ty regtype:$Rn), (i32 1))),
7112            (!cast<Instruction>("ADD"#ty) regtype:$Rn, regtype:$Rn)>;
7113
7114def : SHLToADDPat<v16i8, FPR128>;
7115def : SHLToADDPat<v8i16, FPR128>;
7116def : SHLToADDPat<v4i32, FPR128>;
7117def : SHLToADDPat<v2i64, FPR128>;
7118def : SHLToADDPat<v8i8,  FPR64>;
7119def : SHLToADDPat<v4i16, FPR64>;
7120def : SHLToADDPat<v2i32, FPR64>;
7121
7122defm SHRN    : SIMDVectorRShiftNarrowBHS<0, 0b10000, "shrn",
7123                          BinOpFrag<(trunc (AArch64vashr node:$LHS, node:$RHS))>>;
7124defm SLI     : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", AArch64vsli>;
7125def : Pat<(v1i64 (AArch64vsli (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
7126                                      (i32 vecshiftL64:$imm))),
7127          (SLId FPR64:$Rd, FPR64:$Rn, vecshiftL64:$imm)>;
7128defm SQRSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10011, "sqrshrn",
7129                                         int_aarch64_neon_sqrshrn>;
7130defm SQRSHRUN: SIMDVectorRShiftNarrowBHS<1, 0b10001, "sqrshrun",
7131                                         int_aarch64_neon_sqrshrun>;
7132defm SQSHLU : SIMDVectorLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>;
7133defm SQSHL  : SIMDVectorLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>;
7134defm SQSHRN  : SIMDVectorRShiftNarrowBHS<0, 0b10010, "sqshrn",
7135                                         int_aarch64_neon_sqshrn>;
7136defm SQSHRUN : SIMDVectorRShiftNarrowBHS<1, 0b10000, "sqshrun",
7137                                         int_aarch64_neon_sqshrun>;
7138defm SRI     : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", AArch64vsri>;
7139def : Pat<(v1i64 (AArch64vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
7140                                      (i32 vecshiftR64:$imm))),
7141          (SRId FPR64:$Rd, FPR64:$Rn, vecshiftR64:$imm)>;
7142defm SRSHR   : SIMDVectorRShiftBHSD<0, 0b00100, "srshr", AArch64srshri>;
7143defm SRSRA   : SIMDVectorRShiftBHSDTied<0, 0b00110, "srsra",
7144                 TriOpFrag<(add node:$LHS,
7145                                (AArch64srshri node:$MHS, node:$RHS))> >;
7146defm SSHLL   : SIMDVectorLShiftLongBHSD<0, 0b10100, "sshll",
7147                BinOpFrag<(AArch64vshl (sext node:$LHS), node:$RHS)>>;
7148
7149defm SSHR    : SIMDVectorRShiftBHSD<0, 0b00000, "sshr", AArch64vashr>;
7150defm SSRA    : SIMDVectorRShiftBHSDTied<0, 0b00010, "ssra",
7151                TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vashr node:$MHS, node:$RHS))>>;
7152defm UCVTF   : SIMDVectorRShiftToFP<1, 0b11100, "ucvtf",
7153                        int_aarch64_neon_vcvtfxu2fp>;
7154defm UQRSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10011, "uqrshrn",
7155                                         int_aarch64_neon_uqrshrn>;
7156defm UQSHL   : SIMDVectorLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>;
7157defm UQSHRN  : SIMDVectorRShiftNarrowBHS<1, 0b10010, "uqshrn",
7158                                         int_aarch64_neon_uqshrn>;
7159defm URSHR   : SIMDVectorRShiftBHSD<1, 0b00100, "urshr", AArch64urshri>;
7160defm URSRA   : SIMDVectorRShiftBHSDTied<1, 0b00110, "ursra",
7161                TriOpFrag<(add node:$LHS,
7162                               (AArch64urshri node:$MHS, node:$RHS))> >;
7163defm USHLL   : SIMDVectorLShiftLongBHSD<1, 0b10100, "ushll",
7164                BinOpFrag<(AArch64vshl (zext node:$LHS), node:$RHS)>>;
7165defm USHR    : SIMDVectorRShiftBHSD<1, 0b00000, "ushr", AArch64vlshr>;
7166defm USRA    : SIMDVectorRShiftBHSDTied<1, 0b00010, "usra",
7167                TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))> >;
7168
7169// RADDHN patterns for when RSHRN shifts by half the size of the vector element
7170def : Pat<(v8i8 (trunc (AArch64vlshr (add (v8i16 V128:$Vn), (AArch64movi_shift (i32 128), (i32 0))), (i32 8)))),
7171          (RADDHNv8i16_v8i8 V128:$Vn, (v8i16 (MOVIv2d_ns (i32 0))))>;
7172def : Pat<(v4i16 (trunc (AArch64vlshr (add (v4i32 V128:$Vn), (AArch64movi_shift (i32 128), (i32 8))), (i32 16)))),
7173          (RADDHNv4i32_v4i16 V128:$Vn, (v4i32 (MOVIv2d_ns (i32 0))))>;
7174let AddedComplexity = 5 in
7175def : Pat<(v2i32 (trunc (AArch64vlshr (add (v2i64 V128:$Vn), (AArch64dup (i64 2147483648))), (i32 32)))),
7176          (RADDHNv2i64_v2i32 V128:$Vn, (v2i64 (MOVIv2d_ns (i32 0))))>;
7177
7178// RADDHN2 patterns for when RSHRN shifts by half the size of the vector element
7179def : Pat<(v16i8 (concat_vectors
7180                 (v8i8 V64:$Vd),
7181                 (v8i8 (trunc (AArch64vlshr (add (v8i16 V128:$Vn), (AArch64movi_shift (i32 128), (i32 0))), (i32 8)))))),
7182          (RADDHNv8i16_v16i8
7183                 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
7184                 (v8i16 (MOVIv2d_ns (i32 0))))>;
7185def : Pat<(v8i16 (concat_vectors
7186                 (v4i16 V64:$Vd),
7187                 (v4i16 (trunc (AArch64vlshr (add (v4i32 V128:$Vn), (AArch64movi_shift (i32 128), (i32 8))), (i32 16)))))),
7188          (RADDHNv4i32_v8i16
7189                 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
7190                 (v4i32 (MOVIv2d_ns (i32 0))))>;
7191let AddedComplexity = 5 in
7192def : Pat<(v4i32 (concat_vectors
7193                 (v2i32 V64:$Vd),
7194                 (v2i32 (trunc (AArch64vlshr (add (v2i64 V128:$Vn), (AArch64dup (i64 2147483648))), (i32 32)))))),
7195          (RADDHNv2i64_v4i32
7196                 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
7197                 (v2i64 (MOVIv2d_ns (i32 0))))>;
7198
7199// SHRN patterns for when a logical right shift was used instead of arithmetic
7200// (the immediate guarantees no sign bits actually end up in the result so it
7201// doesn't matter).
7202def : Pat<(v8i8 (trunc (AArch64vlshr (v8i16 V128:$Rn), vecshiftR16Narrow:$imm))),
7203          (SHRNv8i8_shift V128:$Rn, vecshiftR16Narrow:$imm)>;
7204def : Pat<(v4i16 (trunc (AArch64vlshr (v4i32 V128:$Rn), vecshiftR32Narrow:$imm))),
7205          (SHRNv4i16_shift V128:$Rn, vecshiftR32Narrow:$imm)>;
7206def : Pat<(v2i32 (trunc (AArch64vlshr (v2i64 V128:$Rn), vecshiftR64Narrow:$imm))),
7207          (SHRNv2i32_shift V128:$Rn, vecshiftR64Narrow:$imm)>;
7208
7209def : Pat<(v16i8 (concat_vectors (v8i8 V64:$Rd),
7210                                 (trunc (AArch64vlshr (v8i16 V128:$Rn),
7211                                                    vecshiftR16Narrow:$imm)))),
7212          (SHRNv16i8_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
7213                           V128:$Rn, vecshiftR16Narrow:$imm)>;
7214def : Pat<(v8i16 (concat_vectors (v4i16 V64:$Rd),
7215                                 (trunc (AArch64vlshr (v4i32 V128:$Rn),
7216                                                    vecshiftR32Narrow:$imm)))),
7217          (SHRNv8i16_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
7218                           V128:$Rn, vecshiftR32Narrow:$imm)>;
7219def : Pat<(v4i32 (concat_vectors (v2i32 V64:$Rd),
7220                                 (trunc (AArch64vlshr (v2i64 V128:$Rn),
7221                                                    vecshiftR64Narrow:$imm)))),
7222          (SHRNv4i32_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
7223                           V128:$Rn, vecshiftR32Narrow:$imm)>;
7224
7225// Vector sign and zero extensions are implemented with SSHLL and USSHLL.
7226// Anyexts are implemented as zexts.
7227def : Pat<(v8i16 (sext   (v8i8 V64:$Rn))),  (SSHLLv8i8_shift  V64:$Rn, (i32 0))>;
7228def : Pat<(v8i16 (zext   (v8i8 V64:$Rn))),  (USHLLv8i8_shift  V64:$Rn, (i32 0))>;
7229def : Pat<(v8i16 (anyext (v8i8 V64:$Rn))),  (USHLLv8i8_shift  V64:$Rn, (i32 0))>;
7230def : Pat<(v4i32 (sext   (v4i16 V64:$Rn))), (SSHLLv4i16_shift V64:$Rn, (i32 0))>;
7231def : Pat<(v4i32 (zext   (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>;
7232def : Pat<(v4i32 (anyext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>;
7233def : Pat<(v2i64 (sext   (v2i32 V64:$Rn))), (SSHLLv2i32_shift V64:$Rn, (i32 0))>;
7234def : Pat<(v2i64 (zext   (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>;
7235def : Pat<(v2i64 (anyext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>;
7236// Also match an extend from the upper half of a 128 bit source register.
7237def : Pat<(v8i16 (anyext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))),
7238          (USHLLv16i8_shift V128:$Rn, (i32 0))>;
7239def : Pat<(v8i16 (zext   (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))),
7240          (USHLLv16i8_shift V128:$Rn, (i32 0))>;
7241def : Pat<(v8i16 (sext   (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))),
7242          (SSHLLv16i8_shift V128:$Rn, (i32 0))>;
7243def : Pat<(v4i32 (anyext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))),
7244          (USHLLv8i16_shift V128:$Rn, (i32 0))>;
7245def : Pat<(v4i32 (zext   (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))),
7246          (USHLLv8i16_shift V128:$Rn, (i32 0))>;
7247def : Pat<(v4i32 (sext   (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))),
7248          (SSHLLv8i16_shift V128:$Rn, (i32 0))>;
7249def : Pat<(v2i64 (anyext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))),
7250          (USHLLv4i32_shift V128:$Rn, (i32 0))>;
7251def : Pat<(v2i64 (zext   (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))),
7252          (USHLLv4i32_shift V128:$Rn, (i32 0))>;
7253def : Pat<(v2i64 (sext   (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))),
7254          (SSHLLv4i32_shift V128:$Rn, (i32 0))>;
7255
7256// Vector shift sxtl aliases
7257def : InstAlias<"sxtl.8h $dst, $src1",
7258                (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
7259def : InstAlias<"sxtl $dst.8h, $src1.8b",
7260                (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
7261def : InstAlias<"sxtl.4s $dst, $src1",
7262                (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
7263def : InstAlias<"sxtl $dst.4s, $src1.4h",
7264                (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
7265def : InstAlias<"sxtl.2d $dst, $src1",
7266                (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
7267def : InstAlias<"sxtl $dst.2d, $src1.2s",
7268                (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
7269
7270// Vector shift sxtl2 aliases
7271def : InstAlias<"sxtl2.8h $dst, $src1",
7272                (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
7273def : InstAlias<"sxtl2 $dst.8h, $src1.16b",
7274                (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
7275def : InstAlias<"sxtl2.4s $dst, $src1",
7276                (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
7277def : InstAlias<"sxtl2 $dst.4s, $src1.8h",
7278                (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
7279def : InstAlias<"sxtl2.2d $dst, $src1",
7280                (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
7281def : InstAlias<"sxtl2 $dst.2d, $src1.4s",
7282                (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
7283
7284// Vector shift uxtl aliases
7285def : InstAlias<"uxtl.8h $dst, $src1",
7286                (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
7287def : InstAlias<"uxtl $dst.8h, $src1.8b",
7288                (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
7289def : InstAlias<"uxtl.4s $dst, $src1",
7290                (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
7291def : InstAlias<"uxtl $dst.4s, $src1.4h",
7292                (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
7293def : InstAlias<"uxtl.2d $dst, $src1",
7294                (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
7295def : InstAlias<"uxtl $dst.2d, $src1.2s",
7296                (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
7297
7298// Vector shift uxtl2 aliases
7299def : InstAlias<"uxtl2.8h $dst, $src1",
7300                (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
7301def : InstAlias<"uxtl2 $dst.8h, $src1.16b",
7302                (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
7303def : InstAlias<"uxtl2.4s $dst, $src1",
7304                (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
7305def : InstAlias<"uxtl2 $dst.4s, $src1.8h",
7306                (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
7307def : InstAlias<"uxtl2.2d $dst, $src1",
7308                (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
7309def : InstAlias<"uxtl2 $dst.2d, $src1.4s",
7310                (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
7311
7312// If an integer is about to be converted to a floating point value,
7313// just load it on the floating point unit.
7314// These patterns are more complex because floating point loads do not
7315// support sign extension.
7316// The sign extension has to be explicitly added and is only supported for
7317// one step: byte-to-half, half-to-word, word-to-doubleword.
7318// SCVTF GPR -> FPR is 9 cycles.
7319// SCVTF FPR -> FPR is 4 cyclces.
7320// (sign extension with lengthen) SXTL FPR -> FPR is 2 cycles.
7321// Therefore, we can do 2 sign extensions and one SCVTF FPR -> FPR
7322// and still being faster.
7323// However, this is not good for code size.
7324// 8-bits -> float. 2 sizes step-up.
7325class SExtLoadi8CVTf32Pat<dag addrmode, dag INST>
7326  : Pat<(f32 (sint_to_fp (i32 (sextloadi8 addrmode)))),
7327        (SCVTFv1i32 (f32 (EXTRACT_SUBREG
7328                            (SSHLLv4i16_shift
7329                              (f64
7330                                (EXTRACT_SUBREG
7331                                  (SSHLLv8i8_shift
7332                                    (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
7333                                        INST,
7334                                        bsub),
7335                                    0),
7336                                  dsub)),
7337                               0),
7338                             ssub)))>,
7339    Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>;
7340
7341def : SExtLoadi8CVTf32Pat<(ro8.Wpat GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext),
7342                          (LDRBroW  GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext)>;
7343def : SExtLoadi8CVTf32Pat<(ro8.Xpat GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext),
7344                          (LDRBroX  GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext)>;
7345def : SExtLoadi8CVTf32Pat<(am_indexed8 GPR64sp:$Rn, uimm12s1:$offset),
7346                          (LDRBui GPR64sp:$Rn, uimm12s1:$offset)>;
7347def : SExtLoadi8CVTf32Pat<(am_unscaled8 GPR64sp:$Rn, simm9:$offset),
7348                          (LDURBi GPR64sp:$Rn, simm9:$offset)>;
7349
7350// 16-bits -> float. 1 size step-up.
7351class SExtLoadi16CVTf32Pat<dag addrmode, dag INST>
7352  : Pat<(f32 (sint_to_fp (i32 (sextloadi16 addrmode)))),
7353        (SCVTFv1i32 (f32 (EXTRACT_SUBREG
7354                            (SSHLLv4i16_shift
7355                                (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
7356                                  INST,
7357                                  hsub),
7358                                0),
7359                            ssub)))>,
7360    Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>;
7361
7362def : SExtLoadi16CVTf32Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext),
7363                           (LDRHroW   GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>;
7364def : SExtLoadi16CVTf32Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext),
7365                           (LDRHroX   GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>;
7366def : SExtLoadi16CVTf32Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset),
7367                           (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>;
7368def : SExtLoadi16CVTf32Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset),
7369                           (LDURHi GPR64sp:$Rn, simm9:$offset)>;
7370
7371// 32-bits to 32-bits are handled in target specific dag combine:
7372// performIntToFpCombine.
7373// 64-bits integer to 32-bits floating point, not possible with
7374// SCVTF on floating point registers (both source and destination
7375// must have the same size).
7376
7377// Here are the patterns for 8, 16, 32, and 64-bits to double.
7378// 8-bits -> double. 3 size step-up: give up.
7379// 16-bits -> double. 2 size step.
7380class SExtLoadi16CVTf64Pat<dag addrmode, dag INST>
7381  : Pat <(f64 (sint_to_fp (i32 (sextloadi16 addrmode)))),
7382           (SCVTFv1i64 (f64 (EXTRACT_SUBREG
7383                              (SSHLLv2i32_shift
7384                                 (f64
7385                                  (EXTRACT_SUBREG
7386                                    (SSHLLv4i16_shift
7387                                      (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
7388                                        INST,
7389                                        hsub),
7390                                     0),
7391                                   dsub)),
7392                               0),
7393                             dsub)))>,
7394    Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>;
7395
7396def : SExtLoadi16CVTf64Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext),
7397                           (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>;
7398def : SExtLoadi16CVTf64Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext),
7399                           (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>;
7400def : SExtLoadi16CVTf64Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset),
7401                           (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>;
7402def : SExtLoadi16CVTf64Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset),
7403                           (LDURHi GPR64sp:$Rn, simm9:$offset)>;
7404// 32-bits -> double. 1 size step-up.
7405class SExtLoadi32CVTf64Pat<dag addrmode, dag INST>
7406  : Pat <(f64 (sint_to_fp (i32 (load addrmode)))),
7407           (SCVTFv1i64 (f64 (EXTRACT_SUBREG
7408                              (SSHLLv2i32_shift
7409                                (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
7410                                  INST,
7411                                  ssub),
7412                               0),
7413                             dsub)))>,
7414    Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>;
7415
7416def : SExtLoadi32CVTf64Pat<(ro32.Wpat GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext),
7417                           (LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext)>;
7418def : SExtLoadi32CVTf64Pat<(ro32.Xpat GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext),
7419                           (LDRSroX GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext)>;
7420def : SExtLoadi32CVTf64Pat<(am_indexed32 GPR64sp:$Rn, uimm12s4:$offset),
7421                           (LDRSui GPR64sp:$Rn, uimm12s4:$offset)>;
7422def : SExtLoadi32CVTf64Pat<(am_unscaled32 GPR64sp:$Rn, simm9:$offset),
7423                           (LDURSi GPR64sp:$Rn, simm9:$offset)>;
7424
7425// 64-bits -> double are handled in target specific dag combine:
7426// performIntToFpCombine.
7427
7428
7429//----------------------------------------------------------------------------
7430// AdvSIMD Load-Store Structure
7431//----------------------------------------------------------------------------
7432defm LD1 : SIMDLd1Multiple<"ld1">;
7433defm LD2 : SIMDLd2Multiple<"ld2">;
7434defm LD3 : SIMDLd3Multiple<"ld3">;
7435defm LD4 : SIMDLd4Multiple<"ld4">;
7436
7437defm ST1 : SIMDSt1Multiple<"st1">;
7438defm ST2 : SIMDSt2Multiple<"st2">;
7439defm ST3 : SIMDSt3Multiple<"st3">;
7440defm ST4 : SIMDSt4Multiple<"st4">;
7441
7442class Ld1Pat<ValueType ty, Instruction INST>
7443  : Pat<(ty (load GPR64sp:$Rn)), (INST GPR64sp:$Rn)>;
7444
7445def : Ld1Pat<v16i8, LD1Onev16b>;
7446def : Ld1Pat<v8i16, LD1Onev8h>;
7447def : Ld1Pat<v4i32, LD1Onev4s>;
7448def : Ld1Pat<v2i64, LD1Onev2d>;
7449def : Ld1Pat<v8i8,  LD1Onev8b>;
7450def : Ld1Pat<v4i16, LD1Onev4h>;
7451def : Ld1Pat<v2i32, LD1Onev2s>;
7452def : Ld1Pat<v1i64, LD1Onev1d>;
7453
7454class St1Pat<ValueType ty, Instruction INST>
7455  : Pat<(store ty:$Vt, GPR64sp:$Rn),
7456        (INST ty:$Vt, GPR64sp:$Rn)>;
7457
7458def : St1Pat<v16i8, ST1Onev16b>;
7459def : St1Pat<v8i16, ST1Onev8h>;
7460def : St1Pat<v4i32, ST1Onev4s>;
7461def : St1Pat<v2i64, ST1Onev2d>;
7462def : St1Pat<v8i8,  ST1Onev8b>;
7463def : St1Pat<v4i16, ST1Onev4h>;
7464def : St1Pat<v2i32, ST1Onev2s>;
7465def : St1Pat<v1i64, ST1Onev1d>;
7466
7467//---
7468// Single-element
7469//---
7470
7471defm LD1R          : SIMDLdR<0, 0b110, 0, "ld1r", "One", 1, 2, 4, 8>;
7472defm LD2R          : SIMDLdR<1, 0b110, 0, "ld2r", "Two", 2, 4, 8, 16>;
7473defm LD3R          : SIMDLdR<0, 0b111, 0, "ld3r", "Three", 3, 6, 12, 24>;
7474defm LD4R          : SIMDLdR<1, 0b111, 0, "ld4r", "Four", 4, 8, 16, 32>;
7475let mayLoad = 1, hasSideEffects = 0 in {
7476defm LD1 : SIMDLdSingleBTied<0, 0b000,       "ld1", VecListOneb,   GPR64pi1>;
7477defm LD1 : SIMDLdSingleHTied<0, 0b010, 0,    "ld1", VecListOneh,   GPR64pi2>;
7478defm LD1 : SIMDLdSingleSTied<0, 0b100, 0b00, "ld1", VecListOnes,   GPR64pi4>;
7479defm LD1 : SIMDLdSingleDTied<0, 0b100, 0b01, "ld1", VecListOned,   GPR64pi8>;
7480defm LD2 : SIMDLdSingleBTied<1, 0b000,       "ld2", VecListTwob,   GPR64pi2>;
7481defm LD2 : SIMDLdSingleHTied<1, 0b010, 0,    "ld2", VecListTwoh,   GPR64pi4>;
7482defm LD2 : SIMDLdSingleSTied<1, 0b100, 0b00, "ld2", VecListTwos,   GPR64pi8>;
7483defm LD2 : SIMDLdSingleDTied<1, 0b100, 0b01, "ld2", VecListTwod,   GPR64pi16>;
7484defm LD3 : SIMDLdSingleBTied<0, 0b001,       "ld3", VecListThreeb, GPR64pi3>;
7485defm LD3 : SIMDLdSingleHTied<0, 0b011, 0,    "ld3", VecListThreeh, GPR64pi6>;
7486defm LD3 : SIMDLdSingleSTied<0, 0b101, 0b00, "ld3", VecListThrees, GPR64pi12>;
7487defm LD3 : SIMDLdSingleDTied<0, 0b101, 0b01, "ld3", VecListThreed, GPR64pi24>;
7488defm LD4 : SIMDLdSingleBTied<1, 0b001,       "ld4", VecListFourb,  GPR64pi4>;
7489defm LD4 : SIMDLdSingleHTied<1, 0b011, 0,    "ld4", VecListFourh,  GPR64pi8>;
7490defm LD4 : SIMDLdSingleSTied<1, 0b101, 0b00, "ld4", VecListFours,  GPR64pi16>;
7491defm LD4 : SIMDLdSingleDTied<1, 0b101, 0b01, "ld4", VecListFourd,  GPR64pi32>;
7492}
7493
7494def : Pat<(v8i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))),
7495          (LD1Rv8b GPR64sp:$Rn)>;
7496def : Pat<(v16i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))),
7497          (LD1Rv16b GPR64sp:$Rn)>;
7498def : Pat<(v4i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))),
7499          (LD1Rv4h GPR64sp:$Rn)>;
7500def : Pat<(v8i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))),
7501          (LD1Rv8h GPR64sp:$Rn)>;
7502def : Pat<(v2i32 (AArch64dup (i32 (load GPR64sp:$Rn)))),
7503          (LD1Rv2s GPR64sp:$Rn)>;
7504def : Pat<(v4i32 (AArch64dup (i32 (load GPR64sp:$Rn)))),
7505          (LD1Rv4s GPR64sp:$Rn)>;
7506def : Pat<(v2i64 (AArch64dup (i64 (load GPR64sp:$Rn)))),
7507          (LD1Rv2d GPR64sp:$Rn)>;
7508def : Pat<(v1i64 (AArch64dup (i64 (load GPR64sp:$Rn)))),
7509          (LD1Rv1d GPR64sp:$Rn)>;
7510
7511def : Pat<(v8i8 (AArch64duplane8 (v16i8 (insert_subvector undef, (v8i8 (load GPR64sp:$Rn)), (i64 0))), (i64 0))),
7512          (LD1Rv8b GPR64sp:$Rn)>;
7513def : Pat<(v16i8 (AArch64duplane8 (v16i8 (load GPR64sp:$Rn)), (i64 0))),
7514          (LD1Rv16b GPR64sp:$Rn)>;
7515def : Pat<(v4i16 (AArch64duplane16 (v8i16 (insert_subvector undef, (v4i16 (load GPR64sp:$Rn)), (i64 0))), (i64 0))),
7516          (LD1Rv4h GPR64sp:$Rn)>;
7517def : Pat<(v8i16 (AArch64duplane16 (v8i16 (load GPR64sp:$Rn)), (i64 0))),
7518          (LD1Rv8h GPR64sp:$Rn)>;
7519def : Pat<(v2i32 (AArch64duplane32 (v4i32 (insert_subvector undef, (v2i32 (load GPR64sp:$Rn)), (i64 0))), (i64 0))),
7520          (LD1Rv2s GPR64sp:$Rn)>;
7521def : Pat<(v4i32 (AArch64duplane32 (v4i32 (load GPR64sp:$Rn)), (i64 0))),
7522          (LD1Rv4s GPR64sp:$Rn)>;
7523def : Pat<(v2i64 (AArch64duplane64 (v2i64 (load GPR64sp:$Rn)), (i64 0))),
7524          (LD1Rv2d GPR64sp:$Rn)>;
7525
7526// Grab the floating point version too
7527def : Pat<(v2f32 (AArch64dup (f32 (load GPR64sp:$Rn)))),
7528          (LD1Rv2s GPR64sp:$Rn)>;
7529def : Pat<(v4f32 (AArch64dup (f32 (load GPR64sp:$Rn)))),
7530          (LD1Rv4s GPR64sp:$Rn)>;
7531def : Pat<(v2f64 (AArch64dup (f64 (load GPR64sp:$Rn)))),
7532          (LD1Rv2d GPR64sp:$Rn)>;
7533def : Pat<(v1f64 (AArch64dup (f64 (load GPR64sp:$Rn)))),
7534          (LD1Rv1d GPR64sp:$Rn)>;
7535def : Pat<(v4f16 (AArch64dup (f16 (load GPR64sp:$Rn)))),
7536          (LD1Rv4h GPR64sp:$Rn)>;
7537def : Pat<(v8f16 (AArch64dup (f16 (load GPR64sp:$Rn)))),
7538          (LD1Rv8h GPR64sp:$Rn)>;
7539def : Pat<(v4bf16 (AArch64dup (bf16 (load GPR64sp:$Rn)))),
7540          (LD1Rv4h GPR64sp:$Rn)>;
7541def : Pat<(v8bf16 (AArch64dup (bf16 (load GPR64sp:$Rn)))),
7542          (LD1Rv8h GPR64sp:$Rn)>;
7543
7544class Ld1Lane128Pat<SDPatternOperator scalar_load, Operand VecIndex,
7545                    ValueType VTy, ValueType STy, Instruction LD1>
7546  : Pat<(vector_insert (VTy VecListOne128:$Rd),
7547           (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx),
7548        (LD1 VecListOne128:$Rd, VecIndex:$idx, GPR64sp:$Rn)>;
7549
7550def : Ld1Lane128Pat<extloadi8,  VectorIndexB, v16i8, i32, LD1i8>;
7551def : Ld1Lane128Pat<extloadi16, VectorIndexH, v8i16, i32, LD1i16>;
7552def : Ld1Lane128Pat<load,       VectorIndexS, v4i32, i32, LD1i32>;
7553def : Ld1Lane128Pat<load,       VectorIndexS, v4f32, f32, LD1i32>;
7554def : Ld1Lane128Pat<load,       VectorIndexD, v2i64, i64, LD1i64>;
7555def : Ld1Lane128Pat<load,       VectorIndexD, v2f64, f64, LD1i64>;
7556def : Ld1Lane128Pat<load,       VectorIndexH, v8f16, f16, LD1i16>;
7557def : Ld1Lane128Pat<load,       VectorIndexH, v8bf16, bf16, LD1i16>;
7558
7559// Generate LD1 for extload if memory type does not match the
7560// destination type, for example:
7561//
7562//   (v4i32 (insert_vector_elt (load anyext from i8) idx))
7563//
7564// In this case, the index must be adjusted to match LD1 type.
7565//
7566class Ld1Lane128IdxOpPat<SDPatternOperator scalar_load, Operand
7567                         VecIndex, ValueType VTy, ValueType STy,
7568                         Instruction LD1, SDNodeXForm IdxOp>
7569  : Pat<(vector_insert (VTy VecListOne128:$Rd),
7570                       (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx),
7571        (LD1 VecListOne128:$Rd, (IdxOp VecIndex:$idx), GPR64sp:$Rn)>;
7572
7573class Ld1Lane64IdxOpPat<SDPatternOperator scalar_load, Operand VecIndex,
7574                        ValueType VTy, ValueType STy, Instruction LD1,
7575                        SDNodeXForm IdxOp>
7576  : Pat<(vector_insert (VTy VecListOne64:$Rd),
7577                       (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx),
7578        (EXTRACT_SUBREG
7579            (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub),
7580                (IdxOp VecIndex:$idx), GPR64sp:$Rn),
7581            dsub)>;
7582
7583def VectorIndexStoH : SDNodeXForm<imm, [{
7584  return CurDAG->getTargetConstant(N->getZExtValue() * 2, SDLoc(N), MVT::i64);
7585}]>;
7586def VectorIndexStoB : SDNodeXForm<imm, [{
7587  return CurDAG->getTargetConstant(N->getZExtValue() * 4, SDLoc(N), MVT::i64);
7588}]>;
7589def VectorIndexHtoB : SDNodeXForm<imm, [{
7590  return CurDAG->getTargetConstant(N->getZExtValue() * 2, SDLoc(N), MVT::i64);
7591}]>;
7592
7593def : Ld1Lane128IdxOpPat<extloadi16, VectorIndexS, v4i32, i32, LD1i16, VectorIndexStoH>;
7594def : Ld1Lane128IdxOpPat<extloadi8, VectorIndexS, v4i32, i32, LD1i8, VectorIndexStoB>;
7595def : Ld1Lane128IdxOpPat<extloadi8, VectorIndexH, v8i16, i32, LD1i8, VectorIndexHtoB>;
7596
7597def : Ld1Lane64IdxOpPat<extloadi16, VectorIndexS, v2i32, i32, LD1i16, VectorIndexStoH>;
7598def : Ld1Lane64IdxOpPat<extloadi8, VectorIndexS, v2i32, i32, LD1i8, VectorIndexStoB>;
7599def : Ld1Lane64IdxOpPat<extloadi8, VectorIndexH, v4i16, i32, LD1i8, VectorIndexHtoB>;
7600
7601// Same as above, but the first element is populated using
7602// scalar_to_vector + insert_subvector instead of insert_vector_elt.
7603let Predicates = [IsNeonAvailable] in {
7604  class Ld1Lane128FirstElm<ValueType ResultTy, ValueType VecTy,
7605                          SDPatternOperator ExtLoad, Instruction LD1>
7606    : Pat<(ResultTy (scalar_to_vector (i32 (ExtLoad GPR64sp:$Rn)))),
7607            (ResultTy (EXTRACT_SUBREG
7608              (LD1 (VecTy (IMPLICIT_DEF)), 0, GPR64sp:$Rn), dsub))>;
7609
7610  def : Ld1Lane128FirstElm<v2i32, v8i16, extloadi16, LD1i16>;
7611  def : Ld1Lane128FirstElm<v2i32, v16i8, extloadi8, LD1i8>;
7612  def : Ld1Lane128FirstElm<v4i16, v16i8, extloadi8, LD1i8>;
7613}
7614class Ld1Lane64Pat<SDPatternOperator scalar_load, Operand VecIndex,
7615                   ValueType VTy, ValueType STy, Instruction LD1>
7616  : Pat<(vector_insert (VTy VecListOne64:$Rd),
7617           (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx),
7618        (EXTRACT_SUBREG
7619            (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub),
7620                          VecIndex:$idx, GPR64sp:$Rn),
7621            dsub)>;
7622
7623def : Ld1Lane64Pat<extloadi8,  VectorIndexB, v8i8,  i32, LD1i8>;
7624def : Ld1Lane64Pat<extloadi16, VectorIndexH, v4i16, i32, LD1i16>;
7625def : Ld1Lane64Pat<load,       VectorIndexS, v2i32, i32, LD1i32>;
7626def : Ld1Lane64Pat<load,       VectorIndexS, v2f32, f32, LD1i32>;
7627def : Ld1Lane64Pat<load,       VectorIndexH, v4f16, f16, LD1i16>;
7628def : Ld1Lane64Pat<load,       VectorIndexH, v4bf16, bf16, LD1i16>;
7629
7630
7631defm LD1 : SIMDLdSt1SingleAliases<"ld1">;
7632defm LD2 : SIMDLdSt2SingleAliases<"ld2">;
7633defm LD3 : SIMDLdSt3SingleAliases<"ld3">;
7634defm LD4 : SIMDLdSt4SingleAliases<"ld4">;
7635
7636// Stores
7637defm ST1 : SIMDStSingleB<0, 0b000,       "st1", VecListOneb, GPR64pi1>;
7638defm ST1 : SIMDStSingleH<0, 0b010, 0,    "st1", VecListOneh, GPR64pi2>;
7639defm ST1 : SIMDStSingleS<0, 0b100, 0b00, "st1", VecListOnes, GPR64pi4>;
7640defm ST1 : SIMDStSingleD<0, 0b100, 0b01, "st1", VecListOned, GPR64pi8>;
7641
7642let AddedComplexity = 19 in
7643class St1Lane128Pat<SDPatternOperator scalar_store, Operand VecIndex,
7644                    ValueType VTy, ValueType STy, Instruction ST1>
7645  : Pat<(scalar_store
7646             (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)),
7647             GPR64sp:$Rn),
7648        (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn)>;
7649
7650def : St1Lane128Pat<truncstorei8,  VectorIndexB, v16i8, i32, ST1i8>;
7651def : St1Lane128Pat<truncstorei16, VectorIndexH, v8i16, i32, ST1i16>;
7652def : St1Lane128Pat<store,         VectorIndexS, v4i32, i32, ST1i32>;
7653def : St1Lane128Pat<store,         VectorIndexS, v4f32, f32, ST1i32>;
7654def : St1Lane128Pat<store,         VectorIndexD, v2i64, i64, ST1i64>;
7655def : St1Lane128Pat<store,         VectorIndexD, v2f64, f64, ST1i64>;
7656def : St1Lane128Pat<store,         VectorIndexH, v8f16, f16, ST1i16>;
7657def : St1Lane128Pat<store,         VectorIndexH, v8bf16, bf16, ST1i16>;
7658
7659let AddedComplexity = 19 in
7660class St1Lane64Pat<SDPatternOperator scalar_store, Operand VecIndex,
7661                   ValueType VTy, ValueType STy, Instruction ST1>
7662  : Pat<(scalar_store
7663             (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)),
7664             GPR64sp:$Rn),
7665        (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub),
7666             VecIndex:$idx, GPR64sp:$Rn)>;
7667
7668def : St1Lane64Pat<truncstorei8,  VectorIndexB, v8i8, i32, ST1i8>;
7669def : St1Lane64Pat<truncstorei16, VectorIndexH, v4i16, i32, ST1i16>;
7670def : St1Lane64Pat<store,         VectorIndexS, v2i32, i32, ST1i32>;
7671def : St1Lane64Pat<store,         VectorIndexS, v2f32, f32, ST1i32>;
7672def : St1Lane64Pat<store,         VectorIndexH, v4f16, f16, ST1i16>;
7673def : St1Lane64Pat<store,         VectorIndexH, v4bf16, bf16, ST1i16>;
7674
7675multiclass St1LanePost64Pat<SDPatternOperator scalar_store, Operand VecIndex,
7676                             ValueType VTy, ValueType STy, Instruction ST1,
7677                             int offset> {
7678  def : Pat<(scalar_store
7679              (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)),
7680              GPR64sp:$Rn, offset),
7681        (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub),
7682             VecIndex:$idx, GPR64sp:$Rn, XZR)>;
7683
7684  def : Pat<(scalar_store
7685              (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)),
7686              GPR64sp:$Rn, GPR64:$Rm),
7687        (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub),
7688             VecIndex:$idx, GPR64sp:$Rn, $Rm)>;
7689}
7690
7691defm : St1LanePost64Pat<post_truncsti8, VectorIndexB, v8i8, i32, ST1i8_POST, 1>;
7692defm : St1LanePost64Pat<post_truncsti16, VectorIndexH, v4i16, i32, ST1i16_POST,
7693                        2>;
7694defm : St1LanePost64Pat<post_store, VectorIndexS, v2i32, i32, ST1i32_POST, 4>;
7695defm : St1LanePost64Pat<post_store, VectorIndexS, v2f32, f32, ST1i32_POST, 4>;
7696defm : St1LanePost64Pat<post_store, VectorIndexD, v1i64, i64, ST1i64_POST, 8>;
7697defm : St1LanePost64Pat<post_store, VectorIndexD, v1f64, f64, ST1i64_POST, 8>;
7698defm : St1LanePost64Pat<post_store, VectorIndexH, v4f16, f16, ST1i16_POST, 2>;
7699defm : St1LanePost64Pat<post_store, VectorIndexH, v4bf16, bf16, ST1i16_POST, 2>;
7700
7701multiclass St1LanePost128Pat<SDPatternOperator scalar_store, Operand VecIndex,
7702                             ValueType VTy, ValueType STy, Instruction ST1,
7703                             int offset> {
7704  def : Pat<(scalar_store
7705              (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)),
7706              GPR64sp:$Rn, offset),
7707        (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, XZR)>;
7708
7709  def : Pat<(scalar_store
7710              (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)),
7711              GPR64sp:$Rn, GPR64:$Rm),
7712        (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, $Rm)>;
7713}
7714
7715defm : St1LanePost128Pat<post_truncsti8, VectorIndexB, v16i8, i32, ST1i8_POST,
7716                         1>;
7717defm : St1LanePost128Pat<post_truncsti16, VectorIndexH, v8i16, i32, ST1i16_POST,
7718                         2>;
7719defm : St1LanePost128Pat<post_store, VectorIndexS, v4i32, i32, ST1i32_POST, 4>;
7720defm : St1LanePost128Pat<post_store, VectorIndexS, v4f32, f32, ST1i32_POST, 4>;
7721defm : St1LanePost128Pat<post_store, VectorIndexD, v2i64, i64, ST1i64_POST, 8>;
7722defm : St1LanePost128Pat<post_store, VectorIndexD, v2f64, f64, ST1i64_POST, 8>;
7723defm : St1LanePost128Pat<post_store, VectorIndexH, v8f16, f16, ST1i16_POST, 2>;
7724defm : St1LanePost128Pat<post_store, VectorIndexH, v8bf16, bf16, ST1i16_POST, 2>;
7725
7726let mayStore = 1, hasSideEffects = 0 in {
7727defm ST2 : SIMDStSingleB<1, 0b000,       "st2", VecListTwob,   GPR64pi2>;
7728defm ST2 : SIMDStSingleH<1, 0b010, 0,    "st2", VecListTwoh,   GPR64pi4>;
7729defm ST2 : SIMDStSingleS<1, 0b100, 0b00, "st2", VecListTwos,   GPR64pi8>;
7730defm ST2 : SIMDStSingleD<1, 0b100, 0b01, "st2", VecListTwod,   GPR64pi16>;
7731defm ST3 : SIMDStSingleB<0, 0b001,       "st3", VecListThreeb, GPR64pi3>;
7732defm ST3 : SIMDStSingleH<0, 0b011, 0,    "st3", VecListThreeh, GPR64pi6>;
7733defm ST3 : SIMDStSingleS<0, 0b101, 0b00, "st3", VecListThrees, GPR64pi12>;
7734defm ST3 : SIMDStSingleD<0, 0b101, 0b01, "st3", VecListThreed, GPR64pi24>;
7735defm ST4 : SIMDStSingleB<1, 0b001,       "st4", VecListFourb,  GPR64pi4>;
7736defm ST4 : SIMDStSingleH<1, 0b011, 0,    "st4", VecListFourh,  GPR64pi8>;
7737defm ST4 : SIMDStSingleS<1, 0b101, 0b00, "st4", VecListFours,  GPR64pi16>;
7738defm ST4 : SIMDStSingleD<1, 0b101, 0b01, "st4", VecListFourd,  GPR64pi32>;
7739}
7740
7741defm ST1 : SIMDLdSt1SingleAliases<"st1">;
7742defm ST2 : SIMDLdSt2SingleAliases<"st2">;
7743defm ST3 : SIMDLdSt3SingleAliases<"st3">;
7744defm ST4 : SIMDLdSt4SingleAliases<"st4">;
7745
7746//----------------------------------------------------------------------------
7747// Crypto extensions
7748//----------------------------------------------------------------------------
7749
7750let Predicates = [HasAES] in {
7751def AESErr   : AESTiedInst<0b0100, "aese",   int_aarch64_crypto_aese>;
7752def AESDrr   : AESTiedInst<0b0101, "aesd",   int_aarch64_crypto_aesd>;
7753def AESMCrr  : AESInst<    0b0110, "aesmc",  int_aarch64_crypto_aesmc>;
7754def AESIMCrr : AESInst<    0b0111, "aesimc", int_aarch64_crypto_aesimc>;
7755}
7756
7757// Pseudo instructions for AESMCrr/AESIMCrr with a register constraint required
7758// for AES fusion on some CPUs.
7759let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in {
7760def AESMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">,
7761                        Sched<[WriteVq]>;
7762def AESIMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">,
7763                         Sched<[WriteVq]>;
7764}
7765
7766// Only use constrained versions of AES(I)MC instructions if they are paired with
7767// AESE/AESD.
7768def : Pat<(v16i8 (int_aarch64_crypto_aesmc
7769            (v16i8 (int_aarch64_crypto_aese (v16i8 V128:$src1),
7770                                            (v16i8 V128:$src2))))),
7771          (v16i8 (AESMCrrTied (v16i8 (AESErr (v16i8 V128:$src1),
7772                                             (v16i8 V128:$src2)))))>,
7773          Requires<[HasFuseAES]>;
7774
7775def : Pat<(v16i8 (int_aarch64_crypto_aesimc
7776            (v16i8 (int_aarch64_crypto_aesd (v16i8 V128:$src1),
7777                                            (v16i8 V128:$src2))))),
7778          (v16i8 (AESIMCrrTied (v16i8 (AESDrr (v16i8 V128:$src1),
7779                                              (v16i8 V128:$src2)))))>,
7780          Requires<[HasFuseAES]>;
7781
7782let Predicates = [HasSHA2] in {
7783def SHA1Crrr     : SHATiedInstQSV<0b000, "sha1c",   int_aarch64_crypto_sha1c>;
7784def SHA1Prrr     : SHATiedInstQSV<0b001, "sha1p",   int_aarch64_crypto_sha1p>;
7785def SHA1Mrrr     : SHATiedInstQSV<0b010, "sha1m",   int_aarch64_crypto_sha1m>;
7786def SHA1SU0rrr   : SHATiedInstVVV<0b011, "sha1su0", int_aarch64_crypto_sha1su0>;
7787def SHA256Hrrr   : SHATiedInstQQV<0b100, "sha256h", int_aarch64_crypto_sha256h>;
7788def SHA256H2rrr  : SHATiedInstQQV<0b101, "sha256h2",int_aarch64_crypto_sha256h2>;
7789def SHA256SU1rrr :SHATiedInstVVV<0b110, "sha256su1",int_aarch64_crypto_sha256su1>;
7790
7791def SHA1Hrr     : SHAInstSS<    0b0000, "sha1h",    int_aarch64_crypto_sha1h>;
7792def SHA1SU1rr   : SHATiedInstVV<0b0001, "sha1su1",  int_aarch64_crypto_sha1su1>;
7793def SHA256SU0rr : SHATiedInstVV<0b0010, "sha256su0",int_aarch64_crypto_sha256su0>;
7794}
7795
7796//----------------------------------------------------------------------------
7797// Compiler-pseudos
7798//----------------------------------------------------------------------------
7799// FIXME: Like for X86, these should go in their own separate .td file.
7800
7801// For an anyext, we don't care what the high bits are, so we can perform an
7802// INSERT_SUBREF into an IMPLICIT_DEF.
7803def : Pat<(i64 (anyext GPR32:$src)),
7804          (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32)>;
7805
7806// When we need to explicitly zero-extend, we use a 32-bit MOV instruction and
7807// then assert the extension has happened.
7808def : Pat<(i64 (zext GPR32:$src)),
7809          (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>;
7810
7811// To sign extend, we use a signed bitfield move instruction (SBFM) on the
7812// containing super-reg.
7813def : Pat<(i64 (sext GPR32:$src)),
7814   (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), 0, 31)>;
7815def : Pat<(i64 (sext_inreg GPR64:$src, i32)), (SBFMXri GPR64:$src, 0, 31)>;
7816def : Pat<(i64 (sext_inreg GPR64:$src, i16)), (SBFMXri GPR64:$src, 0, 15)>;
7817def : Pat<(i64 (sext_inreg GPR64:$src, i8)),  (SBFMXri GPR64:$src, 0, 7)>;
7818def : Pat<(i64 (sext_inreg GPR64:$src, i1)),  (SBFMXri GPR64:$src, 0, 0)>;
7819def : Pat<(i32 (sext_inreg GPR32:$src, i16)), (SBFMWri GPR32:$src, 0, 15)>;
7820def : Pat<(i32 (sext_inreg GPR32:$src, i8)),  (SBFMWri GPR32:$src, 0, 7)>;
7821def : Pat<(i32 (sext_inreg GPR32:$src, i1)),  (SBFMWri GPR32:$src, 0, 0)>;
7822
7823def : Pat<(shl (sext_inreg GPR32:$Rn, i8), (i64 imm0_31:$imm)),
7824          (SBFMWri GPR32:$Rn, (i64 (i32shift_a       imm0_31:$imm)),
7825                              (i64 (i32shift_sext_i8 imm0_31:$imm)))>;
7826def : Pat<(shl (sext_inreg GPR64:$Rn, i8), (i64 imm0_63:$imm)),
7827          (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)),
7828                              (i64 (i64shift_sext_i8 imm0_63:$imm)))>;
7829
7830def : Pat<(shl (sext_inreg GPR32:$Rn, i16), (i64 imm0_31:$imm)),
7831          (SBFMWri GPR32:$Rn, (i64 (i32shift_a        imm0_31:$imm)),
7832                              (i64 (i32shift_sext_i16 imm0_31:$imm)))>;
7833def : Pat<(shl (sext_inreg GPR64:$Rn, i16), (i64 imm0_63:$imm)),
7834          (SBFMXri GPR64:$Rn, (i64 (i64shift_a        imm0_63:$imm)),
7835                              (i64 (i64shift_sext_i16 imm0_63:$imm)))>;
7836
7837def : Pat<(shl (i64 (sext GPR32:$Rn)), (i64 imm0_63:$imm)),
7838          (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32),
7839                   (i64 (i64shift_a        imm0_63:$imm)),
7840                   (i64 (i64shift_sext_i32 imm0_63:$imm)))>;
7841
7842def : Pat<(shl (i64 (zext GPR32:$Rn)), (i64 imm0_63:$imm)),
7843          (UBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32),
7844                   (i64 (i64shift_a        imm0_63:$imm)),
7845                   (i64 (i64shift_sext_i32 imm0_63:$imm)))>;
7846
7847// sra patterns have an AddedComplexity of 10, so make sure we have a higher
7848// AddedComplexity for the following patterns since we want to match sext + sra
7849// patterns before we attempt to match a single sra node.
7850let AddedComplexity = 20 in {
7851// We support all sext + sra combinations which preserve at least one bit of the
7852// original value which is to be sign extended. E.g. we support shifts up to
7853// bitwidth-1 bits.
7854def : Pat<(sra (sext_inreg GPR32:$Rn, i8), (i64 imm0_7:$imm)),
7855          (SBFMWri GPR32:$Rn, (i64 imm0_7:$imm), 7)>;
7856def : Pat<(sra (sext_inreg GPR64:$Rn, i8), (i64 imm0_7:$imm)),
7857          (SBFMXri GPR64:$Rn, (i64 imm0_7:$imm), 7)>;
7858
7859def : Pat<(sra (sext_inreg GPR32:$Rn, i16), (i64 imm0_15:$imm)),
7860          (SBFMWri GPR32:$Rn, (i64 imm0_15:$imm), 15)>;
7861def : Pat<(sra (sext_inreg GPR64:$Rn, i16), (i64 imm0_15:$imm)),
7862          (SBFMXri GPR64:$Rn, (i64 imm0_15:$imm), 15)>;
7863
7864def : Pat<(sra (i64 (sext GPR32:$Rn)), (i64 imm0_31:$imm)),
7865          (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32),
7866                   (i64 imm0_31:$imm), 31)>;
7867} // AddedComplexity = 20
7868
7869// To truncate, we can simply extract from a subregister.
7870def : Pat<(i32 (trunc GPR64sp:$src)),
7871          (i32 (EXTRACT_SUBREG GPR64sp:$src, sub_32))>;
7872
7873// __builtin_trap() uses the BRK instruction on AArch64.
7874def : Pat<(trap), (BRK 1)>;
7875def : Pat<(debugtrap), (BRK 0xF000)>;
7876
7877def ubsan_trap_xform : SDNodeXForm<timm, [{
7878  return CurDAG->getTargetConstant(N->getZExtValue() | ('U' << 8), SDLoc(N), MVT::i32);
7879}]>;
7880
7881def ubsan_trap_imm : TImmLeaf<i32, [{
7882  return isUInt<8>(Imm);
7883}], ubsan_trap_xform>;
7884
7885def : Pat<(ubsantrap ubsan_trap_imm:$kind), (BRK ubsan_trap_imm:$kind)>;
7886
7887// Multiply high patterns which multiply the lower subvector using smull/umull
7888// and the upper subvector with smull2/umull2. Then shuffle the high the high
7889// part of both results together.
7890def : Pat<(v16i8 (mulhs V128:$Rn, V128:$Rm)),
7891          (UZP2v16i8
7892           (SMULLv8i8_v8i16 (EXTRACT_SUBREG V128:$Rn, dsub),
7893                            (EXTRACT_SUBREG V128:$Rm, dsub)),
7894           (SMULLv16i8_v8i16 V128:$Rn, V128:$Rm))>;
7895def : Pat<(v8i16 (mulhs V128:$Rn, V128:$Rm)),
7896          (UZP2v8i16
7897           (SMULLv4i16_v4i32 (EXTRACT_SUBREG V128:$Rn, dsub),
7898                             (EXTRACT_SUBREG V128:$Rm, dsub)),
7899           (SMULLv8i16_v4i32 V128:$Rn, V128:$Rm))>;
7900def : Pat<(v4i32 (mulhs V128:$Rn, V128:$Rm)),
7901          (UZP2v4i32
7902           (SMULLv2i32_v2i64 (EXTRACT_SUBREG V128:$Rn, dsub),
7903                             (EXTRACT_SUBREG V128:$Rm, dsub)),
7904           (SMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>;
7905
7906def : Pat<(v16i8 (mulhu V128:$Rn, V128:$Rm)),
7907          (UZP2v16i8
7908           (UMULLv8i8_v8i16 (EXTRACT_SUBREG V128:$Rn, dsub),
7909                            (EXTRACT_SUBREG V128:$Rm, dsub)),
7910           (UMULLv16i8_v8i16 V128:$Rn, V128:$Rm))>;
7911def : Pat<(v8i16 (mulhu V128:$Rn, V128:$Rm)),
7912          (UZP2v8i16
7913           (UMULLv4i16_v4i32 (EXTRACT_SUBREG V128:$Rn, dsub),
7914                             (EXTRACT_SUBREG V128:$Rm, dsub)),
7915           (UMULLv8i16_v4i32 V128:$Rn, V128:$Rm))>;
7916def : Pat<(v4i32 (mulhu V128:$Rn, V128:$Rm)),
7917          (UZP2v4i32
7918           (UMULLv2i32_v2i64 (EXTRACT_SUBREG V128:$Rn, dsub),
7919                             (EXTRACT_SUBREG V128:$Rm, dsub)),
7920           (UMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>;
7921
7922// Conversions within AdvSIMD types in the same register size are free.
7923// But because we need a consistent lane ordering, in big endian many
7924// conversions require one or more REV instructions.
7925//
7926// Consider a simple memory load followed by a bitconvert then a store.
7927//   v0 = load v2i32
7928//   v1 = BITCAST v2i32 v0 to v4i16
7929//        store v4i16 v2
7930//
7931// In big endian mode every memory access has an implicit byte swap. LDR and
7932// STR do a 64-bit byte swap, whereas LD1/ST1 do a byte swap per lane - that
7933// is, they treat the vector as a sequence of elements to be byte-swapped.
7934// The two pairs of instructions are fundamentally incompatible. We've decided
7935// to use LD1/ST1 only to simplify compiler implementation.
7936//
7937// LD1/ST1 perform the equivalent of a sequence of LDR/STR + REV. This makes
7938// the original code sequence:
7939//   v0 = load v2i32
7940//   v1 = REV v2i32                  (implicit)
7941//   v2 = BITCAST v2i32 v1 to v4i16
7942//   v3 = REV v4i16 v2               (implicit)
7943//        store v4i16 v3
7944//
7945// But this is now broken - the value stored is different to the value loaded
7946// due to lane reordering. To fix this, on every BITCAST we must perform two
7947// other REVs:
7948//   v0 = load v2i32
7949//   v1 = REV v2i32                  (implicit)
7950//   v2 = REV v2i32
7951//   v3 = BITCAST v2i32 v2 to v4i16
7952//   v4 = REV v4i16
7953//   v5 = REV v4i16 v4               (implicit)
7954//        store v4i16 v5
7955//
7956// This means an extra two instructions, but actually in most cases the two REV
7957// instructions can be combined into one. For example:
7958//   (REV64_2s (REV64_4h X)) === (REV32_4h X)
7959//
7960// There is also no 128-bit REV instruction. This must be synthesized with an
7961// EXT instruction.
7962//
7963// Most bitconverts require some sort of conversion. The only exceptions are:
7964//   a) Identity conversions -  vNfX <-> vNiX
7965//   b) Single-lane-to-scalar - v1fX <-> fX or v1iX <-> iX
7966//
7967
7968// Natural vector casts (64 bit)
7969foreach VT = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, v1f64, f64 ] in
7970  foreach VT2 = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, v1f64, f64 ] in
7971    def : Pat<(VT (AArch64NvCast (VT2 FPR64:$src))),
7972              (VT FPR64:$src)>;
7973
7974// Natural vector casts (128 bit)
7975foreach VT = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in
7976  foreach VT2 = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in
7977    def : Pat<(VT (AArch64NvCast (VT2 FPR128:$src))),
7978              (VT FPR128:$src)>;
7979
7980let Predicates = [IsLE] in {
7981def : Pat<(v8i8  (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
7982def : Pat<(v4i16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
7983def : Pat<(v2i32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
7984def : Pat<(v4f16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
7985def : Pat<(v4bf16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
7986def : Pat<(v2f32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
7987
7988def : Pat<(i64 (bitconvert (v8i8  V64:$Vn))),
7989          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
7990def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))),
7991          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
7992def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))),
7993          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
7994def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))),
7995          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
7996def : Pat<(i64 (bitconvert (v4bf16 V64:$Vn))),
7997          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
7998def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))),
7999          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
8000def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))),
8001          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
8002}
8003let Predicates = [IsBE] in {
8004def : Pat<(v8i8  (bitconvert GPR64:$Xn)),
8005                 (REV64v8i8 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
8006def : Pat<(v4i16 (bitconvert GPR64:$Xn)),
8007                 (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
8008def : Pat<(v2i32 (bitconvert GPR64:$Xn)),
8009                 (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
8010def : Pat<(v4f16 (bitconvert GPR64:$Xn)),
8011                 (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
8012def : Pat<(v4bf16 (bitconvert GPR64:$Xn)),
8013                  (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
8014def : Pat<(v2f32 (bitconvert GPR64:$Xn)),
8015                 (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
8016
8017def : Pat<(i64 (bitconvert (v8i8  V64:$Vn))),
8018          (REV64v8i8 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
8019def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))),
8020          (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
8021def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))),
8022          (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
8023def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))),
8024          (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
8025def : Pat<(i64 (bitconvert (v4bf16 V64:$Vn))),
8026          (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
8027def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))),
8028          (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
8029}
8030def : Pat<(v1i64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8031def : Pat<(v1f64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8032def : Pat<(i64 (bitconvert (v1i64 V64:$Vn))),
8033          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
8034def : Pat<(v1i64 (scalar_to_vector GPR64:$Xn)),
8035          (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8036def : Pat<(v1f64 (scalar_to_vector GPR64:$Xn)),
8037          (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8038def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Xn))), (v1f64 FPR64:$Xn)>;
8039
8040def : Pat<(f32 (bitconvert (i32 GPR32:$Xn))),
8041          (COPY_TO_REGCLASS GPR32:$Xn, FPR32)>;
8042def : Pat<(i32 (bitconvert (f32 FPR32:$Xn))),
8043          (COPY_TO_REGCLASS FPR32:$Xn, GPR32)>;
8044def : Pat<(f64 (bitconvert (i64 GPR64:$Xn))),
8045          (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8046def : Pat<(i64 (bitconvert (f64 FPR64:$Xn))),
8047          (COPY_TO_REGCLASS FPR64:$Xn, GPR64)>;
8048def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))),
8049          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
8050
8051def : Pat<(f16 (bitconvert (bf16 FPR16:$src))), (f16 FPR16:$src)>;
8052def : Pat<(bf16 (bitconvert (f16 FPR16:$src))), (bf16 FPR16:$src)>;
8053
8054let Predicates = [IsLE] in {
8055def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>;
8056def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>;
8057def : Pat<(v1i64 (bitconvert (v8i8  FPR64:$src))), (v1i64 FPR64:$src)>;
8058def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))), (v1i64 FPR64:$src)>;
8059def : Pat<(v1i64 (bitconvert (v4bf16 FPR64:$src))), (v1i64 FPR64:$src)>;
8060def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>;
8061}
8062let Predicates = [IsBE] in {
8063def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))),
8064                             (v1i64 (REV64v2i32 FPR64:$src))>;
8065def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))),
8066                             (v1i64 (REV64v4i16 FPR64:$src))>;
8067def : Pat<(v1i64 (bitconvert (v8i8  FPR64:$src))),
8068                             (v1i64 (REV64v8i8 FPR64:$src))>;
8069def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))),
8070                             (v1i64 (REV64v4i16 FPR64:$src))>;
8071def : Pat<(v1i64 (bitconvert (v4bf16 FPR64:$src))),
8072                             (v1i64 (REV64v4i16 FPR64:$src))>;
8073def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))),
8074                             (v1i64 (REV64v2i32 FPR64:$src))>;
8075}
8076def : Pat<(v1i64 (bitconvert (v1f64 FPR64:$src))), (v1i64 FPR64:$src)>;
8077def : Pat<(v1i64 (bitconvert (f64   FPR64:$src))), (v1i64 FPR64:$src)>;
8078
8079let Predicates = [IsLE] in {
8080def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))), (v2i32 FPR64:$src)>;
8081def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>;
8082def : Pat<(v2i32 (bitconvert (v8i8  FPR64:$src))), (v2i32 FPR64:$src)>;
8083def : Pat<(v2i32 (bitconvert (f64   FPR64:$src))), (v2i32 FPR64:$src)>;
8084def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), (v2i32 FPR64:$src)>;
8085def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))), (v2i32 FPR64:$src)>;
8086def : Pat<(v2i32 (bitconvert (v4bf16 FPR64:$src))), (v2i32 FPR64:$src)>;
8087}
8088let Predicates = [IsBE] in {
8089def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))),
8090                             (v2i32 (REV64v2i32 FPR64:$src))>;
8091def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))),
8092                             (v2i32 (REV32v4i16 FPR64:$src))>;
8093def : Pat<(v2i32 (bitconvert (v8i8  FPR64:$src))),
8094                             (v2i32 (REV32v8i8 FPR64:$src))>;
8095def : Pat<(v2i32 (bitconvert (f64   FPR64:$src))),
8096                             (v2i32 (REV64v2i32 FPR64:$src))>;
8097def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))),
8098                             (v2i32 (REV64v2i32 FPR64:$src))>;
8099def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))),
8100                             (v2i32 (REV32v4i16 FPR64:$src))>;
8101def : Pat<(v2i32 (bitconvert (v4bf16 FPR64:$src))),
8102                             (v2i32 (REV32v4i16 FPR64:$src))>;
8103}
8104def : Pat<(v2i32 (bitconvert (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>;
8105
8106let Predicates = [IsLE] in {
8107def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))), (v4i16 FPR64:$src)>;
8108def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>;
8109def : Pat<(v4i16 (bitconvert (v8i8  FPR64:$src))), (v4i16 FPR64:$src)>;
8110def : Pat<(v4i16 (bitconvert (f64   FPR64:$src))), (v4i16 FPR64:$src)>;
8111def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>;
8112def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), (v4i16 FPR64:$src)>;
8113}
8114let Predicates = [IsBE] in {
8115def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))),
8116                             (v4i16 (REV64v4i16 FPR64:$src))>;
8117def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))),
8118                             (v4i16 (REV32v4i16 FPR64:$src))>;
8119def : Pat<(v4i16 (bitconvert (v8i8  FPR64:$src))),
8120                             (v4i16 (REV16v8i8 FPR64:$src))>;
8121def : Pat<(v4i16 (bitconvert (f64   FPR64:$src))),
8122                             (v4i16 (REV64v4i16 FPR64:$src))>;
8123def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))),
8124                             (v4i16 (REV32v4i16 FPR64:$src))>;
8125def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))),
8126                             (v4i16 (REV64v4i16 FPR64:$src))>;
8127}
8128def : Pat<(v4i16 (bitconvert (v4f16 FPR64:$src))), (v4i16 FPR64:$src)>;
8129def : Pat<(v4i16 (bitconvert (v4bf16 FPR64:$src))), (v4i16 FPR64:$src)>;
8130
8131let Predicates = [IsLE] in {
8132def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))), (v4f16 FPR64:$src)>;
8133def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>;
8134def : Pat<(v4f16 (bitconvert (v8i8  FPR64:$src))), (v4f16 FPR64:$src)>;
8135def : Pat<(v4f16 (bitconvert (f64   FPR64:$src))), (v4f16 FPR64:$src)>;
8136def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))), (v4f16 FPR64:$src)>;
8137def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))), (v4f16 FPR64:$src)>;
8138
8139def : Pat<(v4bf16 (bitconvert (v1i64 FPR64:$src))), (v4bf16 FPR64:$src)>;
8140def : Pat<(v4bf16 (bitconvert (v2i32 FPR64:$src))), (v4bf16 FPR64:$src)>;
8141def : Pat<(v4bf16 (bitconvert (v8i8  FPR64:$src))), (v4bf16 FPR64:$src)>;
8142def : Pat<(v4bf16 (bitconvert (f64   FPR64:$src))), (v4bf16 FPR64:$src)>;
8143def : Pat<(v4bf16 (bitconvert (v2f32 FPR64:$src))), (v4bf16 FPR64:$src)>;
8144def : Pat<(v4bf16 (bitconvert (v1f64 FPR64:$src))), (v4bf16 FPR64:$src)>;
8145}
8146let Predicates = [IsBE] in {
8147def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))),
8148                             (v4f16 (REV64v4i16 FPR64:$src))>;
8149def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))),
8150                             (v4f16 (REV32v4i16 FPR64:$src))>;
8151def : Pat<(v4f16 (bitconvert (v8i8  FPR64:$src))),
8152                             (v4f16 (REV16v8i8 FPR64:$src))>;
8153def : Pat<(v4f16 (bitconvert (f64   FPR64:$src))),
8154                             (v4f16 (REV64v4i16 FPR64:$src))>;
8155def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))),
8156                             (v4f16 (REV32v4i16 FPR64:$src))>;
8157def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))),
8158                             (v4f16 (REV64v4i16 FPR64:$src))>;
8159
8160def : Pat<(v4bf16 (bitconvert (v1i64 FPR64:$src))),
8161                             (v4bf16 (REV64v4i16 FPR64:$src))>;
8162def : Pat<(v4bf16 (bitconvert (v2i32 FPR64:$src))),
8163                             (v4bf16 (REV32v4i16 FPR64:$src))>;
8164def : Pat<(v4bf16 (bitconvert (v8i8  FPR64:$src))),
8165                             (v4bf16 (REV16v8i8 FPR64:$src))>;
8166def : Pat<(v4bf16 (bitconvert (f64   FPR64:$src))),
8167                             (v4bf16 (REV64v4i16 FPR64:$src))>;
8168def : Pat<(v4bf16 (bitconvert (v2f32 FPR64:$src))),
8169                             (v4bf16 (REV32v4i16 FPR64:$src))>;
8170def : Pat<(v4bf16 (bitconvert (v1f64 FPR64:$src))),
8171                             (v4bf16 (REV64v4i16 FPR64:$src))>;
8172}
8173def : Pat<(v4f16 (bitconvert (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>;
8174def : Pat<(v4bf16 (bitconvert (v4i16 FPR64:$src))), (v4bf16 FPR64:$src)>;
8175
8176let Predicates = [IsLE] in {
8177def : Pat<(v8i8  (bitconvert (v1i64 FPR64:$src))), (v8i8  FPR64:$src)>;
8178def : Pat<(v8i8  (bitconvert (v2i32 FPR64:$src))), (v8i8  FPR64:$src)>;
8179def : Pat<(v8i8  (bitconvert (v4i16 FPR64:$src))), (v8i8  FPR64:$src)>;
8180def : Pat<(v8i8  (bitconvert (f64   FPR64:$src))), (v8i8  FPR64:$src)>;
8181def : Pat<(v8i8  (bitconvert (v2f32 FPR64:$src))), (v8i8  FPR64:$src)>;
8182def : Pat<(v8i8  (bitconvert (v1f64 FPR64:$src))), (v8i8  FPR64:$src)>;
8183def : Pat<(v8i8  (bitconvert (v4f16 FPR64:$src))), (v8i8  FPR64:$src)>;
8184def : Pat<(v8i8  (bitconvert (v4bf16 FPR64:$src))), (v8i8  FPR64:$src)>;
8185}
8186let Predicates = [IsBE] in {
8187def : Pat<(v8i8  (bitconvert (v1i64 FPR64:$src))),
8188                             (v8i8 (REV64v8i8 FPR64:$src))>;
8189def : Pat<(v8i8  (bitconvert (v2i32 FPR64:$src))),
8190                             (v8i8 (REV32v8i8 FPR64:$src))>;
8191def : Pat<(v8i8  (bitconvert (v4i16 FPR64:$src))),
8192                             (v8i8 (REV16v8i8 FPR64:$src))>;
8193def : Pat<(v8i8  (bitconvert (f64   FPR64:$src))),
8194                             (v8i8 (REV64v8i8 FPR64:$src))>;
8195def : Pat<(v8i8  (bitconvert (v2f32 FPR64:$src))),
8196                             (v8i8 (REV32v8i8 FPR64:$src))>;
8197def : Pat<(v8i8  (bitconvert (v1f64 FPR64:$src))),
8198                             (v8i8 (REV64v8i8 FPR64:$src))>;
8199def : Pat<(v8i8  (bitconvert (v4f16 FPR64:$src))),
8200                             (v8i8 (REV16v8i8 FPR64:$src))>;
8201def : Pat<(v8i8  (bitconvert (v4bf16 FPR64:$src))),
8202                             (v8i8 (REV16v8i8 FPR64:$src))>;
8203}
8204
8205let Predicates = [IsLE] in {
8206def : Pat<(f64   (bitconvert (v2i32 FPR64:$src))), (f64   FPR64:$src)>;
8207def : Pat<(f64   (bitconvert (v4i16 FPR64:$src))), (f64   FPR64:$src)>;
8208def : Pat<(f64   (bitconvert (v2f32 FPR64:$src))), (f64   FPR64:$src)>;
8209def : Pat<(f64   (bitconvert (v8i8  FPR64:$src))), (f64   FPR64:$src)>;
8210def : Pat<(f64   (bitconvert (v4f16 FPR64:$src))), (f64   FPR64:$src)>;
8211def : Pat<(f64   (bitconvert (v4bf16 FPR64:$src))), (f64   FPR64:$src)>;
8212}
8213let Predicates = [IsBE] in {
8214def : Pat<(f64   (bitconvert (v2i32 FPR64:$src))),
8215                             (f64 (REV64v2i32 FPR64:$src))>;
8216def : Pat<(f64   (bitconvert (v4i16 FPR64:$src))),
8217                             (f64 (REV64v4i16 FPR64:$src))>;
8218def : Pat<(f64   (bitconvert (v2f32 FPR64:$src))),
8219                             (f64 (REV64v2i32 FPR64:$src))>;
8220def : Pat<(f64   (bitconvert (v8i8  FPR64:$src))),
8221                             (f64 (REV64v8i8 FPR64:$src))>;
8222def : Pat<(f64   (bitconvert (v4f16 FPR64:$src))),
8223                             (f64 (REV64v4i16 FPR64:$src))>;
8224def : Pat<(f64   (bitconvert (v4bf16 FPR64:$src))),
8225                             (f64 (REV64v4i16 FPR64:$src))>;
8226}
8227def : Pat<(f64   (bitconvert (v1i64 FPR64:$src))), (f64   FPR64:$src)>;
8228def : Pat<(f64   (bitconvert (v1f64 FPR64:$src))), (f64   FPR64:$src)>;
8229
8230let Predicates = [IsLE] in {
8231def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))), (v1f64 FPR64:$src)>;
8232def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), (v1f64 FPR64:$src)>;
8233def : Pat<(v1f64 (bitconvert (v8i8  FPR64:$src))), (v1f64 FPR64:$src)>;
8234def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), (v1f64 FPR64:$src)>;
8235def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))), (v1f64 FPR64:$src)>;
8236def : Pat<(v1f64 (bitconvert (v4bf16 FPR64:$src))), (v1f64 FPR64:$src)>;
8237}
8238let Predicates = [IsBE] in {
8239def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))),
8240                             (v1f64 (REV64v2i32 FPR64:$src))>;
8241def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))),
8242                             (v1f64 (REV64v4i16 FPR64:$src))>;
8243def : Pat<(v1f64 (bitconvert (v8i8  FPR64:$src))),
8244                             (v1f64 (REV64v8i8 FPR64:$src))>;
8245def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))),
8246                             (v1f64 (REV64v2i32 FPR64:$src))>;
8247def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))),
8248                             (v1f64 (REV64v4i16 FPR64:$src))>;
8249def : Pat<(v1f64 (bitconvert (v4bf16 FPR64:$src))),
8250                             (v1f64 (REV64v4i16 FPR64:$src))>;
8251}
8252def : Pat<(v1f64 (bitconvert (v1i64 FPR64:$src))), (v1f64 FPR64:$src)>;
8253def : Pat<(v1f64 (bitconvert (f64   FPR64:$src))), (v1f64 FPR64:$src)>;
8254
8255let Predicates = [IsLE] in {
8256def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))), (v2f32 FPR64:$src)>;
8257def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))), (v2f32 FPR64:$src)>;
8258def : Pat<(v2f32 (bitconvert (v8i8  FPR64:$src))), (v2f32 FPR64:$src)>;
8259def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), (v2f32 FPR64:$src)>;
8260def : Pat<(v2f32 (bitconvert (f64   FPR64:$src))), (v2f32 FPR64:$src)>;
8261def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))), (v2f32 FPR64:$src)>;
8262def : Pat<(v2f32 (bitconvert (v4bf16 FPR64:$src))), (v2f32 FPR64:$src)>;
8263}
8264let Predicates = [IsBE] in {
8265def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))),
8266                             (v2f32 (REV64v2i32 FPR64:$src))>;
8267def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))),
8268                             (v2f32 (REV32v4i16 FPR64:$src))>;
8269def : Pat<(v2f32 (bitconvert (v8i8  FPR64:$src))),
8270                             (v2f32 (REV32v8i8 FPR64:$src))>;
8271def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))),
8272                             (v2f32 (REV64v2i32 FPR64:$src))>;
8273def : Pat<(v2f32 (bitconvert (f64   FPR64:$src))),
8274                             (v2f32 (REV64v2i32 FPR64:$src))>;
8275def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))),
8276                             (v2f32 (REV32v4i16 FPR64:$src))>;
8277def : Pat<(v2f32 (bitconvert (v4bf16 FPR64:$src))),
8278                             (v2f32 (REV32v4i16 FPR64:$src))>;
8279}
8280def : Pat<(v2f32 (bitconvert (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>;
8281
8282let Predicates = [IsLE] in {
8283def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))), (f128 FPR128:$src)>;
8284def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))), (f128 FPR128:$src)>;
8285def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), (f128 FPR128:$src)>;
8286def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), (f128 FPR128:$src)>;
8287def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), (f128 FPR128:$src)>;
8288def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))), (f128 FPR128:$src)>;
8289def : Pat<(f128 (bitconvert (v8bf16 FPR128:$src))), (f128 FPR128:$src)>;
8290def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))), (f128 FPR128:$src)>;
8291}
8292let Predicates = [IsBE] in {
8293def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))),
8294                            (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>;
8295def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))),
8296                            (f128 (EXTv16i8 (REV64v4i32 FPR128:$src),
8297                                            (REV64v4i32 FPR128:$src), (i32 8)))>;
8298def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))),
8299                            (f128 (EXTv16i8 (REV64v8i16 FPR128:$src),
8300                                            (REV64v8i16 FPR128:$src), (i32 8)))>;
8301def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))),
8302                            (f128 (EXTv16i8 (REV64v8i16 FPR128:$src),
8303                                            (REV64v8i16 FPR128:$src), (i32 8)))>;
8304def : Pat<(f128 (bitconvert (v8bf16 FPR128:$src))),
8305                            (f128 (EXTv16i8 (REV64v8i16 FPR128:$src),
8306                                            (REV64v8i16 FPR128:$src), (i32 8)))>;
8307def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))),
8308                            (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>;
8309def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))),
8310                            (f128 (EXTv16i8 (REV64v4i32 FPR128:$src),
8311                                            (REV64v4i32 FPR128:$src), (i32 8)))>;
8312def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))),
8313                            (f128 (EXTv16i8 (REV64v16i8 FPR128:$src),
8314                                            (REV64v16i8 FPR128:$src), (i32 8)))>;
8315}
8316
8317let Predicates = [IsLE] in {
8318def : Pat<(v2f64 (bitconvert (f128  FPR128:$src))), (v2f64 FPR128:$src)>;
8319def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>;
8320def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>;
8321def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))), (v2f64 FPR128:$src)>;
8322def : Pat<(v2f64 (bitconvert (v8bf16 FPR128:$src))), (v2f64 FPR128:$src)>;
8323def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>;
8324def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>;
8325}
8326let Predicates = [IsBE] in {
8327def : Pat<(v2f64 (bitconvert (f128  FPR128:$src))),
8328                             (v2f64 (EXTv16i8 FPR128:$src,
8329                                              FPR128:$src, (i32 8)))>;
8330def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))),
8331                             (v2f64 (REV64v4i32 FPR128:$src))>;
8332def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))),
8333                             (v2f64 (REV64v8i16 FPR128:$src))>;
8334def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))),
8335                             (v2f64 (REV64v8i16 FPR128:$src))>;
8336def : Pat<(v2f64 (bitconvert (v8bf16 FPR128:$src))),
8337                             (v2f64 (REV64v8i16 FPR128:$src))>;
8338def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))),
8339                             (v2f64 (REV64v16i8 FPR128:$src))>;
8340def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))),
8341                             (v2f64 (REV64v4i32 FPR128:$src))>;
8342}
8343def : Pat<(v2f64 (bitconvert (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>;
8344
8345let Predicates = [IsLE] in {
8346def : Pat<(v4f32 (bitconvert (f128  FPR128:$src))), (v4f32 FPR128:$src)>;
8347def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>;
8348def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))), (v4f32 FPR128:$src)>;
8349def : Pat<(v4f32 (bitconvert (v8bf16 FPR128:$src))), (v4f32 FPR128:$src)>;
8350def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>;
8351def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>;
8352def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>;
8353}
8354let Predicates = [IsBE] in {
8355def : Pat<(v4f32 (bitconvert (f128  FPR128:$src))),
8356                             (v4f32 (EXTv16i8 (REV64v4i32 FPR128:$src),
8357                                    (REV64v4i32 FPR128:$src), (i32 8)))>;
8358def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))),
8359                             (v4f32 (REV32v8i16 FPR128:$src))>;
8360def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))),
8361                             (v4f32 (REV32v8i16 FPR128:$src))>;
8362def : Pat<(v4f32 (bitconvert (v8bf16 FPR128:$src))),
8363                             (v4f32 (REV32v8i16 FPR128:$src))>;
8364def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))),
8365                             (v4f32 (REV32v16i8 FPR128:$src))>;
8366def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))),
8367                             (v4f32 (REV64v4i32 FPR128:$src))>;
8368def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))),
8369                             (v4f32 (REV64v4i32 FPR128:$src))>;
8370}
8371def : Pat<(v4f32 (bitconvert (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>;
8372
8373let Predicates = [IsLE] in {
8374def : Pat<(v2i64 (bitconvert (f128  FPR128:$src))), (v2i64 FPR128:$src)>;
8375def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>;
8376def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>;
8377def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>;
8378def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>;
8379def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))), (v2i64 FPR128:$src)>;
8380def : Pat<(v2i64 (bitconvert (v8bf16 FPR128:$src))), (v2i64 FPR128:$src)>;
8381}
8382let Predicates = [IsBE] in {
8383def : Pat<(v2i64 (bitconvert (f128  FPR128:$src))),
8384                             (v2i64 (EXTv16i8 FPR128:$src,
8385                                              FPR128:$src, (i32 8)))>;
8386def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))),
8387                             (v2i64 (REV64v4i32 FPR128:$src))>;
8388def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))),
8389                             (v2i64 (REV64v8i16 FPR128:$src))>;
8390def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))),
8391                             (v2i64 (REV64v16i8 FPR128:$src))>;
8392def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))),
8393                             (v2i64 (REV64v4i32 FPR128:$src))>;
8394def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))),
8395                             (v2i64 (REV64v8i16 FPR128:$src))>;
8396def : Pat<(v2i64 (bitconvert (v8bf16 FPR128:$src))),
8397                             (v2i64 (REV64v8i16 FPR128:$src))>;
8398}
8399def : Pat<(v2i64 (bitconvert (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>;
8400
8401let Predicates = [IsLE] in {
8402def : Pat<(v4i32 (bitconvert (f128  FPR128:$src))), (v4i32 FPR128:$src)>;
8403def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>;
8404def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>;
8405def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>;
8406def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>;
8407def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))), (v4i32 FPR128:$src)>;
8408def : Pat<(v4i32 (bitconvert (v8bf16 FPR128:$src))), (v4i32 FPR128:$src)>;
8409}
8410let Predicates = [IsBE] in {
8411def : Pat<(v4i32 (bitconvert (f128  FPR128:$src))),
8412                             (v4i32 (EXTv16i8 (REV64v4i32 FPR128:$src),
8413                                              (REV64v4i32 FPR128:$src),
8414                                              (i32 8)))>;
8415def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))),
8416                             (v4i32 (REV64v4i32 FPR128:$src))>;
8417def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))),
8418                             (v4i32 (REV32v8i16 FPR128:$src))>;
8419def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))),
8420                             (v4i32 (REV32v16i8 FPR128:$src))>;
8421def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))),
8422                             (v4i32 (REV64v4i32 FPR128:$src))>;
8423def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))),
8424                             (v4i32 (REV32v8i16 FPR128:$src))>;
8425def : Pat<(v4i32 (bitconvert (v8bf16 FPR128:$src))),
8426                             (v4i32 (REV32v8i16 FPR128:$src))>;
8427}
8428def : Pat<(v4i32 (bitconvert (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>;
8429
8430let Predicates = [IsLE] in {
8431def : Pat<(v8i16 (bitconvert (f128  FPR128:$src))), (v8i16 FPR128:$src)>;
8432def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>;
8433def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>;
8434def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>;
8435def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>;
8436def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>;
8437}
8438let Predicates = [IsBE] in {
8439def : Pat<(v8i16 (bitconvert (f128  FPR128:$src))),
8440                             (v8i16 (EXTv16i8 (REV64v8i16 FPR128:$src),
8441                                              (REV64v8i16 FPR128:$src),
8442                                              (i32 8)))>;
8443def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))),
8444                             (v8i16 (REV64v8i16 FPR128:$src))>;
8445def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))),
8446                             (v8i16 (REV32v8i16 FPR128:$src))>;
8447def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))),
8448                             (v8i16 (REV16v16i8 FPR128:$src))>;
8449def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))),
8450                             (v8i16 (REV64v8i16 FPR128:$src))>;
8451def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))),
8452                             (v8i16 (REV32v8i16 FPR128:$src))>;
8453}
8454def : Pat<(v8i16 (bitconvert (v8f16 FPR128:$src))), (v8i16 FPR128:$src)>;
8455def : Pat<(v8i16 (bitconvert (v8bf16 FPR128:$src))), (v8i16 FPR128:$src)>;
8456
8457let Predicates = [IsLE] in {
8458def : Pat<(v8f16 (bitconvert (f128  FPR128:$src))), (v8f16 FPR128:$src)>;
8459def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>;
8460def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>;
8461def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>;
8462def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))), (v8f16 FPR128:$src)>;
8463def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))), (v8f16 FPR128:$src)>;
8464
8465def : Pat<(v8bf16 (bitconvert (f128  FPR128:$src))), (v8bf16 FPR128:$src)>;
8466def : Pat<(v8bf16 (bitconvert (v2i64 FPR128:$src))), (v8bf16 FPR128:$src)>;
8467def : Pat<(v8bf16 (bitconvert (v4i32 FPR128:$src))), (v8bf16 FPR128:$src)>;
8468def : Pat<(v8bf16 (bitconvert (v16i8 FPR128:$src))), (v8bf16 FPR128:$src)>;
8469def : Pat<(v8bf16 (bitconvert (v2f64 FPR128:$src))), (v8bf16 FPR128:$src)>;
8470def : Pat<(v8bf16 (bitconvert (v4f32 FPR128:$src))), (v8bf16 FPR128:$src)>;
8471}
8472let Predicates = [IsBE] in {
8473def : Pat<(v8f16 (bitconvert (f128  FPR128:$src))),
8474                             (v8f16 (EXTv16i8 (REV64v8i16 FPR128:$src),
8475                                              (REV64v8i16 FPR128:$src),
8476                                              (i32 8)))>;
8477def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))),
8478                             (v8f16 (REV64v8i16 FPR128:$src))>;
8479def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))),
8480                             (v8f16 (REV32v8i16 FPR128:$src))>;
8481def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))),
8482                             (v8f16 (REV16v16i8 FPR128:$src))>;
8483def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))),
8484                             (v8f16 (REV64v8i16 FPR128:$src))>;
8485def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))),
8486                             (v8f16 (REV32v8i16 FPR128:$src))>;
8487
8488def : Pat<(v8bf16 (bitconvert (f128  FPR128:$src))),
8489                             (v8bf16 (EXTv16i8 (REV64v8i16 FPR128:$src),
8490                                              (REV64v8i16 FPR128:$src),
8491                                              (i32 8)))>;
8492def : Pat<(v8bf16 (bitconvert (v2i64 FPR128:$src))),
8493                             (v8bf16 (REV64v8i16 FPR128:$src))>;
8494def : Pat<(v8bf16 (bitconvert (v4i32 FPR128:$src))),
8495                             (v8bf16 (REV32v8i16 FPR128:$src))>;
8496def : Pat<(v8bf16 (bitconvert (v16i8 FPR128:$src))),
8497                             (v8bf16 (REV16v16i8 FPR128:$src))>;
8498def : Pat<(v8bf16 (bitconvert (v2f64 FPR128:$src))),
8499                             (v8bf16 (REV64v8i16 FPR128:$src))>;
8500def : Pat<(v8bf16 (bitconvert (v4f32 FPR128:$src))),
8501                             (v8bf16 (REV32v8i16 FPR128:$src))>;
8502}
8503def : Pat<(v8f16 (bitconvert (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>;
8504def : Pat<(v8bf16 (bitconvert (v8i16 FPR128:$src))), (v8bf16 FPR128:$src)>;
8505
8506let Predicates = [IsLE] in {
8507def : Pat<(v16i8 (bitconvert (f128  FPR128:$src))), (v16i8 FPR128:$src)>;
8508def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>;
8509def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>;
8510def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>;
8511def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>;
8512def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>;
8513def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))), (v16i8 FPR128:$src)>;
8514def : Pat<(v16i8 (bitconvert (v8bf16 FPR128:$src))), (v16i8 FPR128:$src)>;
8515}
8516let Predicates = [IsBE] in {
8517def : Pat<(v16i8 (bitconvert (f128  FPR128:$src))),
8518                             (v16i8 (EXTv16i8 (REV64v16i8 FPR128:$src),
8519                                              (REV64v16i8 FPR128:$src),
8520                                              (i32 8)))>;
8521def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))),
8522                             (v16i8 (REV64v16i8 FPR128:$src))>;
8523def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))),
8524                             (v16i8 (REV32v16i8 FPR128:$src))>;
8525def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))),
8526                             (v16i8 (REV16v16i8 FPR128:$src))>;
8527def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))),
8528                             (v16i8 (REV64v16i8 FPR128:$src))>;
8529def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))),
8530                             (v16i8 (REV32v16i8 FPR128:$src))>;
8531def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))),
8532                             (v16i8 (REV16v16i8 FPR128:$src))>;
8533def : Pat<(v16i8 (bitconvert (v8bf16 FPR128:$src))),
8534                             (v16i8 (REV16v16i8 FPR128:$src))>;
8535}
8536
8537def : Pat<(v4i16 (extract_subvector V128:$Rn, (i64 0))),
8538           (EXTRACT_SUBREG V128:$Rn, dsub)>;
8539def : Pat<(v8i8 (extract_subvector V128:$Rn, (i64 0))),
8540           (EXTRACT_SUBREG V128:$Rn, dsub)>;
8541def : Pat<(v2f32 (extract_subvector V128:$Rn, (i64 0))),
8542           (EXTRACT_SUBREG V128:$Rn, dsub)>;
8543def : Pat<(v4f16 (extract_subvector V128:$Rn, (i64 0))),
8544           (EXTRACT_SUBREG V128:$Rn, dsub)>;
8545def : Pat<(v4bf16 (extract_subvector V128:$Rn, (i64 0))),
8546           (EXTRACT_SUBREG V128:$Rn, dsub)>;
8547def : Pat<(v2i32 (extract_subvector V128:$Rn, (i64 0))),
8548           (EXTRACT_SUBREG V128:$Rn, dsub)>;
8549def : Pat<(v1i64 (extract_subvector V128:$Rn, (i64 0))),
8550           (EXTRACT_SUBREG V128:$Rn, dsub)>;
8551def : Pat<(v1f64 (extract_subvector V128:$Rn, (i64 0))),
8552           (EXTRACT_SUBREG V128:$Rn, dsub)>;
8553
8554def : Pat<(v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 1))),
8555          (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
8556def : Pat<(v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 1))),
8557          (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
8558def : Pat<(v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 1))),
8559          (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
8560def : Pat<(v1i64 (extract_subvector (v2i64 FPR128:$Rn), (i64 1))),
8561          (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
8562
8563// A 64-bit subvector insert to the first 128-bit vector position
8564// is a subregister copy that needs no instruction.
8565multiclass InsertSubvectorUndef<ValueType Ty> {
8566  def : Pat<(insert_subvector undef, (v1i64 FPR64:$src), (Ty 0)),
8567            (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
8568  def : Pat<(insert_subvector undef, (v1f64 FPR64:$src), (Ty 0)),
8569            (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
8570  def : Pat<(insert_subvector undef, (v2i32 FPR64:$src), (Ty 0)),
8571            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
8572  def : Pat<(insert_subvector undef, (v2f32 FPR64:$src), (Ty 0)),
8573            (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
8574  def : Pat<(insert_subvector undef, (v4i16 FPR64:$src), (Ty 0)),
8575            (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
8576  def : Pat<(insert_subvector undef, (v4f16 FPR64:$src), (Ty 0)),
8577            (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
8578  def : Pat<(insert_subvector undef, (v4bf16 FPR64:$src), (Ty 0)),
8579            (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
8580  def : Pat<(insert_subvector undef, (v8i8 FPR64:$src), (Ty 0)),
8581            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
8582}
8583
8584defm : InsertSubvectorUndef<i32>;
8585defm : InsertSubvectorUndef<i64>;
8586
8587// Use pair-wise add instructions when summing up the lanes for v2f64, v2i64
8588// or v2f32.
8589def : Pat<(i64 (add (vector_extract (v2i64 FPR128:$Rn), (i64 0)),
8590                    (vector_extract (v2i64 FPR128:$Rn), (i64 1)))),
8591           (i64 (ADDPv2i64p (v2i64 FPR128:$Rn)))>;
8592def : Pat<(f64 (any_fadd (vector_extract (v2f64 FPR128:$Rn), (i64 0)),
8593                         (vector_extract (v2f64 FPR128:$Rn), (i64 1)))),
8594           (f64 (FADDPv2i64p (v2f64 FPR128:$Rn)))>;
8595    // vector_extract on 64-bit vectors gets promoted to a 128 bit vector,
8596    // so we match on v4f32 here, not v2f32. This will also catch adding
8597    // the low two lanes of a true v4f32 vector.
8598def : Pat<(any_fadd (vector_extract (v4f32 FPR128:$Rn), (i64 0)),
8599                    (vector_extract (v4f32 FPR128:$Rn), (i64 1))),
8600          (f32 (FADDPv2i32p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>;
8601def : Pat<(any_fadd (vector_extract (v8f16 FPR128:$Rn), (i64 0)),
8602                    (vector_extract (v8f16 FPR128:$Rn), (i64 1))),
8603          (f16 (FADDPv2i16p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>;
8604
8605// Prefer using the bottom lanes of addp Rn, Rn compared to
8606// addp extractlow(Rn), extracthigh(Rn)
8607def : Pat<(AArch64addp (v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 0))),
8608                       (v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 2)))),
8609          (v2i32 (EXTRACT_SUBREG (ADDPv4i32 $Rn, $Rn), dsub))>;
8610def : Pat<(AArch64addp (v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 0))),
8611                       (v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 4)))),
8612          (v4i16 (EXTRACT_SUBREG (ADDPv8i16 $Rn, $Rn), dsub))>;
8613def : Pat<(AArch64addp (v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 0))),
8614                       (v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 8)))),
8615          (v8i8 (EXTRACT_SUBREG (ADDPv16i8 $Rn, $Rn), dsub))>;
8616
8617def : Pat<(AArch64faddp (v2f32 (extract_subvector (v4f32 FPR128:$Rn), (i64 0))),
8618                        (v2f32 (extract_subvector (v4f32 FPR128:$Rn), (i64 2)))),
8619          (v2f32 (EXTRACT_SUBREG (FADDPv4f32 $Rn, $Rn), dsub))>;
8620def : Pat<(AArch64faddp (v4f16 (extract_subvector (v8f16 FPR128:$Rn), (i64 0))),
8621                        (v4f16 (extract_subvector (v8f16 FPR128:$Rn), (i64 4)))),
8622          (v4f16 (EXTRACT_SUBREG (FADDPv8f16 $Rn, $Rn), dsub))>;
8623
8624// Scalar 64-bit shifts in FPR64 registers.
8625def : Pat<(i64 (int_aarch64_neon_sshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
8626          (SSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
8627def : Pat<(i64 (int_aarch64_neon_ushl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
8628          (USHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
8629def : Pat<(i64 (int_aarch64_neon_srshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
8630          (SRSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
8631def : Pat<(i64 (int_aarch64_neon_urshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
8632          (URSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
8633
8634// Patterns for nontemporal/no-allocate stores.
8635// We have to resort to tricks to turn a single-input store into a store pair,
8636// because there is no single-input nontemporal store, only STNP.
8637let Predicates = [IsLE] in {
8638let AddedComplexity = 15 in {
8639class NTStore128Pat<ValueType VT> :
8640  Pat<(nontemporalstore (VT FPR128:$Rt),
8641        (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)),
8642      (STNPDi (EXTRACT_SUBREG FPR128:$Rt, dsub),
8643              (DUPi64 FPR128:$Rt, (i64 1)),
8644              GPR64sp:$Rn, simm7s8:$offset)>;
8645
8646def : NTStore128Pat<v2i64>;
8647def : NTStore128Pat<v4i32>;
8648def : NTStore128Pat<v8i16>;
8649def : NTStore128Pat<v16i8>;
8650
8651class NTStore64Pat<ValueType VT> :
8652  Pat<(nontemporalstore (VT FPR64:$Rt),
8653        (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)),
8654      (STNPSi (EXTRACT_SUBREG FPR64:$Rt, ssub),
8655              (DUPi32 (SUBREG_TO_REG (i64 0), FPR64:$Rt, dsub), (i64 1)),
8656              GPR64sp:$Rn, simm7s4:$offset)>;
8657
8658// FIXME: Shouldn't v1f64 loads/stores be promoted to v1i64?
8659def : NTStore64Pat<v1f64>;
8660def : NTStore64Pat<v1i64>;
8661def : NTStore64Pat<v2i32>;
8662def : NTStore64Pat<v4i16>;
8663def : NTStore64Pat<v8i8>;
8664
8665def : Pat<(nontemporalstore GPR64:$Rt,
8666            (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)),
8667          (STNPWi (EXTRACT_SUBREG GPR64:$Rt, sub_32),
8668                  (EXTRACT_SUBREG (UBFMXri GPR64:$Rt, 32, 63), sub_32),
8669                  GPR64sp:$Rn, simm7s4:$offset)>;
8670} // AddedComplexity=10
8671} // Predicates = [IsLE]
8672
8673// Tail call return handling. These are all compiler pseudo-instructions,
8674// so no encoding information or anything like that.
8675let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in {
8676  def TCRETURNdi : Pseudo<(outs), (ins i64imm:$dst, i32imm:$FPDiff), []>,
8677                   Sched<[WriteBrReg]>;
8678  def TCRETURNri : Pseudo<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff), []>,
8679                   Sched<[WriteBrReg]>;
8680  // Indirect tail-call with any register allowed, used by MachineOutliner when
8681  // this is proven safe.
8682  // FIXME: If we have to add any more hacks like this, we should instead relax
8683  // some verifier checks for outlined functions.
8684  def TCRETURNriALL : Pseudo<(outs), (ins GPR64:$dst, i32imm:$FPDiff), []>,
8685                      Sched<[WriteBrReg]>;
8686  // Indirect tail-call limited to only use registers (x16 and x17) which are
8687  // allowed to tail-call a "BTI c" instruction.
8688  def TCRETURNriBTI : Pseudo<(outs), (ins rtcGPR64:$dst, i32imm:$FPDiff), []>,
8689                      Sched<[WriteBrReg]>;
8690}
8691
8692def : Pat<(AArch64tcret tcGPR64:$dst, (i32 timm:$FPDiff)),
8693          (TCRETURNri tcGPR64:$dst, imm:$FPDiff)>,
8694      Requires<[NotUseBTI]>;
8695def : Pat<(AArch64tcret rtcGPR64:$dst, (i32 timm:$FPDiff)),
8696          (TCRETURNriBTI rtcGPR64:$dst, imm:$FPDiff)>,
8697      Requires<[UseBTI]>;
8698def : Pat<(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff)),
8699          (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>;
8700def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)),
8701          (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>;
8702
8703def MOVMCSym : Pseudo<(outs GPR64:$dst), (ins i64imm:$sym), []>, Sched<[]>;
8704def : Pat<(i64 (AArch64LocalRecover mcsym:$sym)), (MOVMCSym mcsym:$sym)>;
8705
8706// Extracting lane zero is a special case where we can just use a plain
8707// EXTRACT_SUBREG instruction, which will become FMOV. This is easier for the
8708// rest of the compiler, especially the register allocator and copy propagation,
8709// to reason about, so is preferred when it's possible to use it.
8710let AddedComplexity = 10 in {
8711  def : Pat<(i64 (extractelt (v2i64 V128:$V), (i64 0))), (EXTRACT_SUBREG V128:$V, dsub)>;
8712  def : Pat<(i32 (extractelt (v4i32 V128:$V), (i64 0))), (EXTRACT_SUBREG V128:$V, ssub)>;
8713  def : Pat<(i32 (extractelt (v2i32 V64:$V), (i64 0))), (EXTRACT_SUBREG V64:$V, ssub)>;
8714}
8715
8716// dot_v4i8
8717class mul_v4i8<SDPatternOperator ldop> :
8718  PatFrag<(ops node:$Rn, node:$Rm, node:$offset),
8719          (mul (ldop (add node:$Rn, node:$offset)),
8720               (ldop (add node:$Rm, node:$offset)))>;
8721class mulz_v4i8<SDPatternOperator ldop> :
8722  PatFrag<(ops node:$Rn, node:$Rm),
8723          (mul (ldop node:$Rn), (ldop node:$Rm))>;
8724
8725def load_v4i8 :
8726  OutPatFrag<(ops node:$R),
8727             (INSERT_SUBREG
8728              (v2i32 (IMPLICIT_DEF)),
8729               (i32 (COPY_TO_REGCLASS (LDRWui node:$R, (i64 0)), FPR32)),
8730              ssub)>;
8731
8732class dot_v4i8<Instruction DOT, SDPatternOperator ldop> :
8733  Pat<(i32 (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 3)),
8734           (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 2)),
8735           (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 1)),
8736                (mulz_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm))))),
8737      (EXTRACT_SUBREG (i64 (DOT (DUPv2i32gpr WZR),
8738                                (load_v4i8 GPR64sp:$Rn),
8739                                (load_v4i8 GPR64sp:$Rm))),
8740                      sub_32)>, Requires<[HasDotProd]>;
8741
8742// dot_v8i8
8743class ee_v8i8<SDPatternOperator extend> :
8744  PatFrag<(ops node:$V, node:$K),
8745          (v4i16 (extract_subvector (v8i16 (extend node:$V)), node:$K))>;
8746
8747class mul_v8i8<SDPatternOperator mulop, SDPatternOperator extend> :
8748  PatFrag<(ops node:$M, node:$N, node:$K),
8749          (mulop (v4i16 (ee_v8i8<extend> node:$M, node:$K)),
8750                 (v4i16 (ee_v8i8<extend> node:$N, node:$K)))>;
8751
8752class idot_v8i8<SDPatternOperator mulop, SDPatternOperator extend> :
8753  PatFrag<(ops node:$M, node:$N),
8754          (i32 (extractelt
8755           (v4i32 (AArch64uaddv
8756            (add (mul_v8i8<mulop, extend> node:$M, node:$N, (i64 0)),
8757                 (mul_v8i8<mulop, extend> node:$M, node:$N, (i64 4))))),
8758           (i64 0)))>;
8759
8760// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
8761def VADDV_32 : OutPatFrag<(ops node:$R), (ADDPv2i32 node:$R, node:$R)>;
8762
8763class odot_v8i8<Instruction DOT> :
8764  OutPatFrag<(ops node:$Vm, node:$Vn),
8765             (EXTRACT_SUBREG
8766              (VADDV_32
8767               (i64 (DOT (DUPv2i32gpr WZR),
8768                         (v8i8 node:$Vm),
8769                         (v8i8 node:$Vn)))),
8770              sub_32)>;
8771
8772class dot_v8i8<Instruction DOT, SDPatternOperator mulop,
8773                    SDPatternOperator extend> :
8774  Pat<(idot_v8i8<mulop, extend> V64:$Vm, V64:$Vn),
8775      (odot_v8i8<DOT> V64:$Vm, V64:$Vn)>,
8776  Requires<[HasDotProd]>;
8777
8778// dot_v16i8
8779class ee_v16i8<SDPatternOperator extend> :
8780  PatFrag<(ops node:$V, node:$K1, node:$K2),
8781          (v4i16 (extract_subvector
8782           (v8i16 (extend
8783            (v8i8 (extract_subvector node:$V, node:$K1)))), node:$K2))>;
8784
8785class mul_v16i8<SDPatternOperator mulop, SDPatternOperator extend> :
8786  PatFrag<(ops node:$M, node:$N, node:$K1, node:$K2),
8787          (v4i32
8788           (mulop (v4i16 (ee_v16i8<extend> node:$M, node:$K1, node:$K2)),
8789                  (v4i16 (ee_v16i8<extend> node:$N, node:$K1, node:$K2))))>;
8790
8791class idot_v16i8<SDPatternOperator m, SDPatternOperator x> :
8792  PatFrag<(ops node:$M, node:$N),
8793          (i32 (extractelt
8794           (v4i32 (AArch64uaddv
8795            (add
8796             (add (mul_v16i8<m, x> node:$M, node:$N, (i64 0), (i64 0)),
8797                  (mul_v16i8<m, x> node:$M, node:$N, (i64 8), (i64 0))),
8798             (add (mul_v16i8<m, x> node:$M, node:$N, (i64 0), (i64 4)),
8799                  (mul_v16i8<m, x> node:$M, node:$N, (i64 8), (i64 4)))))),
8800           (i64 0)))>;
8801
8802class odot_v16i8<Instruction DOT> :
8803  OutPatFrag<(ops node:$Vm, node:$Vn),
8804             (i32 (ADDVv4i32v
8805              (DOT (DUPv4i32gpr WZR), node:$Vm, node:$Vn)))>;
8806
8807class dot_v16i8<Instruction DOT, SDPatternOperator mulop,
8808                SDPatternOperator extend> :
8809  Pat<(idot_v16i8<mulop, extend> V128:$Vm, V128:$Vn),
8810      (odot_v16i8<DOT> V128:$Vm, V128:$Vn)>,
8811  Requires<[HasDotProd]>;
8812
8813let AddedComplexity = 10 in {
8814  def : dot_v4i8<SDOTv8i8, sextloadi8>;
8815  def : dot_v4i8<UDOTv8i8, zextloadi8>;
8816  def : dot_v8i8<SDOTv8i8, AArch64smull, sext>;
8817  def : dot_v8i8<UDOTv8i8, AArch64umull, zext>;
8818  def : dot_v16i8<SDOTv16i8, AArch64smull, sext>;
8819  def : dot_v16i8<UDOTv16i8, AArch64umull, zext>;
8820
8821  // FIXME: add patterns to generate vector by element dot product.
8822  // FIXME: add SVE dot-product patterns.
8823}
8824
8825// Custom DAG nodes and isel rules to make a 64-byte block out of eight GPRs,
8826// so that it can be used as input to inline asm, and vice versa.
8827def LS64_BUILD : SDNode<"AArch64ISD::LS64_BUILD", SDTypeProfile<1, 8, []>>;
8828def LS64_EXTRACT : SDNode<"AArch64ISD::LS64_EXTRACT", SDTypeProfile<1, 2, []>>;
8829def : Pat<(i64x8 (LS64_BUILD GPR64:$x0, GPR64:$x1, GPR64:$x2, GPR64:$x3,
8830                             GPR64:$x4, GPR64:$x5, GPR64:$x6, GPR64:$x7)),
8831          (REG_SEQUENCE GPR64x8Class,
8832              $x0, x8sub_0, $x1, x8sub_1, $x2, x8sub_2, $x3, x8sub_3,
8833              $x4, x8sub_4, $x5, x8sub_5, $x6, x8sub_6, $x7, x8sub_7)>;
8834foreach i = 0-7 in {
8835  def : Pat<(i64 (LS64_EXTRACT (i64x8 GPR64x8:$val), (i32 i))),
8836            (EXTRACT_SUBREG $val, !cast<SubRegIndex>("x8sub_"#i))>;
8837}
8838
8839let Predicates = [HasLS64] in {
8840  def LD64B: LoadStore64B<0b101, "ld64b", (ins GPR64sp:$Rn),
8841                                          (outs GPR64x8:$Rt)>;
8842  def ST64B: LoadStore64B<0b001, "st64b", (ins GPR64x8:$Rt, GPR64sp:$Rn),
8843                                          (outs)>;
8844  def ST64BV:   Store64BV<0b011, "st64bv">;
8845  def ST64BV0:  Store64BV<0b010, "st64bv0">;
8846
8847  class ST64BPattern<Intrinsic intrinsic, Instruction instruction>
8848    : Pat<(intrinsic GPR64sp:$addr, GPR64:$x0, GPR64:$x1, GPR64:$x2, GPR64:$x3, GPR64:$x4, GPR64:$x5, GPR64:$x6, GPR64:$x7),
8849          (instruction (REG_SEQUENCE GPR64x8Class, $x0, x8sub_0, $x1, x8sub_1, $x2, x8sub_2, $x3, x8sub_3, $x4, x8sub_4, $x5, x8sub_5, $x6, x8sub_6, $x7, x8sub_7), $addr)>;
8850
8851  def : ST64BPattern<int_aarch64_st64b, ST64B>;
8852  def : ST64BPattern<int_aarch64_st64bv, ST64BV>;
8853  def : ST64BPattern<int_aarch64_st64bv0, ST64BV0>;
8854}
8855
8856let Predicates = [HasMOPS] in {
8857  let Defs = [NZCV] in {
8858    defm CPYFP : MOPSMemoryCopyInsns<0b00, "cpyfp">;
8859
8860    defm CPYP : MOPSMemoryMoveInsns<0b00, "cpyp">;
8861
8862    defm SETP : MOPSMemorySetInsns<0b00, "setp">;
8863  }
8864  let Uses = [NZCV] in {
8865    defm CPYFM : MOPSMemoryCopyInsns<0b01, "cpyfm">;
8866    defm CPYFE : MOPSMemoryCopyInsns<0b10, "cpyfe">;
8867
8868    defm CPYM : MOPSMemoryMoveInsns<0b01, "cpym">;
8869    defm CPYE : MOPSMemoryMoveInsns<0b10, "cpye">;
8870
8871    defm SETM : MOPSMemorySetInsns<0b01, "setm">;
8872    defm SETE : MOPSMemorySetInsns<0b10, "sete">;
8873  }
8874}
8875let Predicates = [HasMOPS, HasMTE] in {
8876  let Defs = [NZCV] in {
8877    defm SETGP     : MOPSMemorySetTaggingInsns<0b00, "setgp">;
8878  }
8879  let Uses = [NZCV] in {
8880    defm SETGM     : MOPSMemorySetTaggingInsns<0b01, "setgm">;
8881    // Can't use SETGE because it's a reserved name in TargetSelectionDAG.td
8882    defm MOPSSETGE : MOPSMemorySetTaggingInsns<0b10, "setge">;
8883  }
8884}
8885
8886// MOPS Node operands: 0: Dst, 1: Src or Value, 2: Size, 3: Chain
8887// MOPS Node results: 0: Dst writeback, 1: Size writeback, 2: Chain
8888def SDT_AArch64mops : SDTypeProfile<2, 3, [ SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2> ]>;
8889def AArch64mops_memset : SDNode<"AArch64ISD::MOPS_MEMSET", SDT_AArch64mops>;
8890def AArch64mops_memset_tagging : SDNode<"AArch64ISD::MOPS_MEMSET_TAGGING", SDT_AArch64mops>;
8891def AArch64mops_memcopy : SDNode<"AArch64ISD::MOPS_MEMCOPY", SDT_AArch64mops>;
8892def AArch64mops_memmove : SDNode<"AArch64ISD::MOPS_MEMMOVE", SDT_AArch64mops>;
8893
8894// MOPS operations always contain three 4-byte instructions
8895let Predicates = [HasMOPS], Defs = [NZCV], Size = 12, mayStore = 1 in {
8896  let mayLoad = 1 in {
8897    def MOPSMemoryCopyPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb),
8898                                      (ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn),
8899                                      [], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>;
8900    def MOPSMemoryMovePseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb),
8901                                      (ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn),
8902                                      [], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>;
8903  }
8904  let mayLoad = 0 in {
8905    def MOPSMemorySetPseudo  : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb),
8906                                      (ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm),
8907                                      [], "$Rd = $Rd_wb,$Rn = $Rn_wb">, Sched<[]>;
8908  }
8909}
8910let Predicates = [HasMOPS, HasMTE], Defs = [NZCV], Size = 12, mayLoad = 0, mayStore = 1 in {
8911  def MOPSMemorySetTaggingPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb),
8912                                          (ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm),
8913                                          [], "$Rd = $Rd_wb,$Rn = $Rn_wb">, Sched<[]>;
8914}
8915
8916//-----------------------------------------------------------------------------
8917// v8.3 Pointer Authentication late patterns
8918
8919let Predicates = [HasPAuth] in {
8920def : Pat<(int_ptrauth_blend GPR64:$Rd, imm64_0_65535:$imm),
8921          (MOVKXi GPR64:$Rd, (trunc_imm imm64_0_65535:$imm), 48)>;
8922def : Pat<(int_ptrauth_blend GPR64:$Rd, GPR64:$Rn),
8923          (BFMXri GPR64:$Rd, GPR64:$Rn, 16, 15)>;
8924}
8925
8926//-----------------------------------------------------------------------------
8927
8928// This gets lowered into an instruction sequence of 20 bytes
8929let Defs = [X16, X17], mayStore = 1, isCodeGenOnly = 1, Size = 20 in
8930def StoreSwiftAsyncContext
8931      : Pseudo<(outs), (ins GPR64:$ctx, GPR64sp:$base, simm9:$offset),
8932               []>, Sched<[]>;
8933
8934def AArch64AssertZExtBool : SDNode<"AArch64ISD::ASSERT_ZEXT_BOOL", SDT_assert>;
8935def : Pat<(AArch64AssertZExtBool GPR32:$op),
8936          (i32 GPR32:$op)>;
8937
8938//===----------------------------===//
8939// 2022 Architecture Extensions:
8940//===----------------------------===//
8941
8942def : InstAlias<"clrbhb",  (HINT 22), 0>;
8943let Predicates = [HasCLRBHB] in {
8944  def : InstAlias<"clrbhb",  (HINT 22), 1>;
8945}
8946
8947//===----------------------------------------------------------------------===//
8948// Translation Hardening Extension (FEAT_THE)
8949//===----------------------------------------------------------------------===//
8950defm RCW     : ReadCheckWriteCompareAndSwap;
8951
8952defm RCWCLR  : ReadCheckWriteOperation<0b001, "clr">;
8953defm RCWSET  : ReadCheckWriteOperation<0b011, "set">;
8954defm RCWSWP  : ReadCheckWriteOperation<0b010, "swp">;
8955
8956//===----------------------------------------------------------------------===//
8957// General Data-Processing Instructions (FEAT_V94_DP)
8958//===----------------------------------------------------------------------===//
8959defm ABS : OneOperandData<0b001000, "abs", abs>, Requires<[HasCSSC]>;
8960defm CNT : OneOperandData<0b000111, "cnt", ctpop>, Requires<[HasCSSC]>;
8961defm CTZ : OneOperandData<0b000110, "ctz", cttz>, Requires<[HasCSSC]>;
8962
8963defm SMAX : ComparisonOp<0, 0, "smax", smax>, Requires<[HasCSSC]>;
8964defm SMIN : ComparisonOp<0, 1, "smin", smin>, Requires<[HasCSSC]>;
8965defm UMAX : ComparisonOp<1, 0, "umax", umax>, Requires<[HasCSSC]>;
8966defm UMIN : ComparisonOp<1, 1, "umin", umin>, Requires<[HasCSSC]>;
8967
8968def RPRFM:
8969    I<(outs), (ins rprfop:$Rt, GPR64:$Rm, GPR64sp:$Rn),
8970      "rprfm", "\t$Rt, $Rm, [$Rn]", "", []>,
8971    Sched<[]> {
8972  bits<6> Rt;
8973  bits<5> Rn;
8974  bits<5> Rm;
8975  let Inst{2-0} = Rt{2-0};
8976  let Inst{4-3} = 0b11;
8977  let Inst{9-5} = Rn;
8978  let Inst{11-10} = 0b10;
8979  let Inst{13-12} = Rt{4-3};
8980  let Inst{14} = 0b1;
8981  let Inst{15} = Rt{5};
8982  let Inst{20-16} = Rm;
8983  let Inst{31-21} = 0b11111000101;
8984  let mayLoad = 0;
8985  let mayStore = 0;
8986  let hasSideEffects = 1;
8987  // RPRFM overlaps with PRFM (reg), when the decoder method of PRFM returns
8988  // Fail, the decoder should attempt to decode RPRFM. This requires setting
8989  // the decoder namespace to "Fallback".
8990  let DecoderNamespace = "Fallback";
8991}
8992
8993//===----------------------------------------------------------------------===//
8994// 128-bit Atomics (FEAT_LSE128)
8995//===----------------------------------------------------------------------===//
8996let Predicates = [HasLSE128] in {
8997  def SWPP     : LSE128Base<0b000, 0b00, 0b1, "swpp">;
8998  def SWPPA    : LSE128Base<0b000, 0b10, 0b1, "swppa">;
8999  def SWPPAL   : LSE128Base<0b000, 0b11, 0b1, "swppal">;
9000  def SWPPL    : LSE128Base<0b000, 0b01, 0b1, "swppl">;
9001  def LDCLRP   : LSE128Base<0b001, 0b00, 0b0, "ldclrp">;
9002  def LDCLRPA  : LSE128Base<0b001, 0b10, 0b0, "ldclrpa">;
9003  def LDCLRPAL : LSE128Base<0b001, 0b11, 0b0, "ldclrpal">;
9004  def LDCLRPL  : LSE128Base<0b001, 0b01, 0b0, "ldclrpl">;
9005  def LDSETP   : LSE128Base<0b011, 0b00, 0b0, "ldsetp">;
9006  def LDSETPA  : LSE128Base<0b011, 0b10, 0b0, "ldsetpa">;
9007  def LDSETPAL : LSE128Base<0b011, 0b11, 0b0, "ldsetpal">;
9008  def LDSETPL  : LSE128Base<0b011, 0b01, 0b0, "ldsetpl">;
9009}
9010
9011//===----------------------------------------------------------------------===//
9012// RCPC Instructions (FEAT_LRCPC3)
9013//===----------------------------------------------------------------------===//
9014
9015let Predicates = [HasRCPC3] in {
9016  //                                             size   opc    opc2
9017  def STILPWpre:  BaseLRCPC3IntegerLoadStorePair<0b10, 0b00, 0b0000, (outs GPR64sp:$wback), (ins GPR32:$Rt, GPR32:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn, #-8]!", "$Rn = $wback">;
9018  def STILPXpre:  BaseLRCPC3IntegerLoadStorePair<0b11, 0b00, 0b0000, (outs GPR64sp:$wback), (ins GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn, #-16]!", "$Rn = $wback">;
9019  def STILPW:     BaseLRCPC3IntegerLoadStorePair<0b10, 0b00, 0b0001, (outs), (ins GPR32:$Rt, GPR32:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn]", "">;
9020  def STILPX:     BaseLRCPC3IntegerLoadStorePair<0b11, 0b00, 0b0001, (outs), (ins GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn]", "">;
9021  def LDIAPPWpre: BaseLRCPC3IntegerLoadStorePair<0b10, 0b01, 0b0000, (outs GPR64sp:$wback, GPR32:$Rt, GPR32:$Rt2), (ins GPR64sp:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn], #8", "$Rn = $wback">;
9022  def LDIAPPXpre: BaseLRCPC3IntegerLoadStorePair<0b11, 0b01, 0b0000, (outs GPR64sp:$wback, GPR64:$Rt, GPR64:$Rt2), (ins GPR64sp:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn], #16", "$Rn = $wback">;
9023  def LDIAPPW:    BaseLRCPC3IntegerLoadStorePair<0b10, 0b01, 0b0001, (outs GPR32:$Rt, GPR32:$Rt2), (ins GPR64sp0:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn]", "">;
9024  def LDIAPPX:    BaseLRCPC3IntegerLoadStorePair<0b11, 0b01, 0b0001, (outs GPR64:$Rt, GPR64:$Rt2), (ins GPR64sp0:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn]", "">;
9025
9026  def : Pat<(AArch64ldiapp GPR64sp:$Rn), (LDIAPPX GPR64sp:$Rn)>;
9027  def : Pat<(AArch64stilp GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), (STILPX GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn)>;
9028
9029  // Aliases for when offset=0
9030  def : InstAlias<"stilp\t$Rt, $Rt2, [$Rn, #0]", (STILPW GPR32: $Rt, GPR32: $Rt2, GPR64sp:$Rn)>;
9031  def : InstAlias<"stilp\t$Rt, $Rt2, [$Rn, #0]", (STILPX GPR64: $Rt, GPR64: $Rt2, GPR64sp:$Rn)>;
9032
9033  //                                        size   opc
9034  def STLRWpre:  BaseLRCPC3IntegerLoadStore<0b10, 0b10, (outs GPR64sp:$wback),            (ins GPR32:$Rt, GPR64sp:$Rn), "stlr",  "\t$Rt, [$Rn, #-4]!", "$Rn = $wback">;
9035  def STLRXpre:  BaseLRCPC3IntegerLoadStore<0b11, 0b10, (outs GPR64sp:$wback),            (ins GPR64:$Rt, GPR64sp:$Rn), "stlr",  "\t$Rt, [$Rn, #-8]!", "$Rn = $wback">;
9036  def LDAPRWpre: BaseLRCPC3IntegerLoadStore<0b10, 0b11, (outs GPR64sp:$wback, GPR32:$Rt), (ins GPR64sp:$Rn),            "ldapr", "\t$Rt, [$Rn], #4",   "$Rn = $wback">;
9037  def LDAPRXpre: BaseLRCPC3IntegerLoadStore<0b11, 0b11, (outs GPR64sp:$wback, GPR64:$Rt), (ins GPR64sp:$Rn),            "ldapr", "\t$Rt, [$Rn], #8",   "$Rn = $wback">;
9038}
9039
9040let Predicates = [HasRCPC3, HasNEON] in {
9041  //                                              size   opc regtype
9042  defm STLURb:  LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b00, FPR8  , (outs), (ins FPR8  :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">;
9043  defm STLURh:  LRCPC3NEONLoadStoreUnscaledOffset<0b01, 0b00, FPR16 , (outs), (ins FPR16 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">;
9044  defm STLURs:  LRCPC3NEONLoadStoreUnscaledOffset<0b10, 0b00, FPR32 , (outs), (ins FPR32 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">;
9045  defm STLURd:  LRCPC3NEONLoadStoreUnscaledOffset<0b11, 0b00, FPR64 , (outs), (ins FPR64 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">;
9046  defm STLURq:  LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b10, FPR128, (outs), (ins FPR128:$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">;
9047  defm LDAPURb: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b01, FPR8  , (outs FPR8  :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">;
9048  defm LDAPURh: LRCPC3NEONLoadStoreUnscaledOffset<0b01, 0b01, FPR16 , (outs FPR16 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">;
9049  defm LDAPURs: LRCPC3NEONLoadStoreUnscaledOffset<0b10, 0b01, FPR32 , (outs FPR32 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">;
9050  defm LDAPURd: LRCPC3NEONLoadStoreUnscaledOffset<0b11, 0b01, FPR64 , (outs FPR64 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">;
9051  defm LDAPURq: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b11, FPR128, (outs FPR128:$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">;
9052
9053  //                                L
9054  def STL1:  LRCPC3NEONLdStSingle<0b0, (outs), (ins VecListOned:$Vt, VectorIndexD:$Q, GPR64sp:$Rn) , "stl1", "">;
9055  def LDAP1: LRCPC3NEONLdStSingle<0b1, (outs VecListOned:$dst), (ins VecListOned:$Vt, VectorIndexD:$Q, GPR64sp0:$Rn), "ldap1", "$Vt = $dst">;
9056
9057  // Aliases for when offset=0
9058  def : InstAlias<"stl1\t$Vt$Q, [$Rn, #0]", (STL1 VecListOned:$Vt, VectorIndexD:$Q, GPR64sp:$Rn)>;
9059}
9060
9061//===----------------------------------------------------------------------===//
9062// 128-bit System Instructions (FEAT_SYSINSTR128)
9063//===----------------------------------------------------------------------===//
9064let Predicates = [HasD128] in {
9065  def SYSPxt  : SystemPXtI<0, "sysp">;
9066
9067  def SYSPxt_XZR
9068    : BaseSystemI<0, (outs),
9069        (ins imm0_7:$op1, sys_cr_op:$Cn, sys_cr_op:$Cm, imm0_7:$op2, SyspXzrPairOperand:$xzr_pair),
9070        "sysp", "\t$op1, $Cn, $Cm, $op2, $xzr_pair">,
9071      Sched<[WriteSys]>
9072  {
9073    // Had to use a custom decoder because tablegen interprets this as having 4 fields (why?)
9074    // and therefore autogenerates a decoder that builds an MC representation that has 4 fields
9075    // (decodeToMCInst), but when printing we expect the MC representation to have 5 fields (one
9076    // extra for the XZR) because AArch64InstPrinter::printInstruction in AArch64GenAsmWriter.inc
9077    // is based off of the asm template (maybe) and therefore wants to print 5 operands.
9078    // I could add a bits<5> xzr_pair. But without a way to constrain it to 0b11111 here it would
9079    // overlap with the main SYSP instruction.
9080    let DecoderMethod = "DecodeSyspXzrInstruction";
9081    bits<3> op1;
9082    bits<4> Cn;
9083    bits<4> Cm;
9084    bits<3> op2;
9085    let Inst{22}    = 0b1; // override BaseSystemI
9086    let Inst{20-19} = 0b01;
9087    let Inst{18-16} = op1;
9088    let Inst{15-12} = Cn;
9089    let Inst{11-8}  = Cm;
9090    let Inst{7-5}   = op2;
9091    let Inst{4-0}   = 0b11111;
9092  }
9093
9094  def : InstAlias<"sysp $op1, $Cn, $Cm, $op2",
9095                  (SYSPxt_XZR imm0_7:$op1, sys_cr_op:$Cn, sys_cr_op:$Cm, imm0_7:$op2, XZR)>;
9096}
9097
9098//---
9099// 128-bit System Registers (FEAT_SYSREG128)
9100//---
9101
9102// Instruction encoding:
9103//
9104//          31       22|21|20|19|18 16|15 12|11 8|7 5|4 0
9105// MRRS      1101010101| 1| 1|o0|  op1|   Cn|  Cm|op2| Rt
9106// MSRR      1101010101| 0| 1|o0|  op1|   Cn|  Cm|op2| Rt
9107
9108// Instruction syntax:
9109//
9110// MRRS <Xt>, <Xt+1>, <sysreg|S<op0>_<op1>_<Cn>_<Cm>_<op2>>
9111// MSRR <sysreg|S<op0>_<op1>_<Cn>_<Cm>_<op2>>, <Xt>, <Xt+1>
9112//
9113// ...where t is even (X0, X2, etc).
9114
9115let Predicates = [HasD128] in {
9116  def MRRS : RtSystemI128<1,
9117    (outs MrrsMssrPairClassOperand:$Rt), (ins mrs_sysreg_op:$systemreg),
9118    "mrrs", "\t$Rt, $systemreg">
9119  {
9120    bits<16> systemreg;
9121    let Inst{20-5} = systemreg;
9122  }
9123
9124  def MSRR : RtSystemI128<0,
9125    (outs),  (ins msr_sysreg_op:$systemreg, MrrsMssrPairClassOperand:$Rt),
9126    "msrr", "\t$systemreg, $Rt">
9127  {
9128    bits<16> systemreg;
9129    let Inst{20-5} = systemreg;
9130  }
9131}
9132
9133
9134include "AArch64InstrAtomics.td"
9135include "AArch64SVEInstrInfo.td"
9136include "AArch64SMEInstrInfo.td"
9137include "AArch64InstrGISel.td"
9138