1//===-- ARMInstrNEON.td - NEON support for ARM -------------*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file describes the ARM NEON instruction set.
10//
11//===----------------------------------------------------------------------===//
12
13
14//===----------------------------------------------------------------------===//
15// NEON-specific Operands.
16//===----------------------------------------------------------------------===//
17def nModImm : Operand<i32> {
18  let PrintMethod = "printVMOVModImmOperand";
19}
20
21def nImmSplatI8AsmOperand : AsmOperandClass { let Name = "NEONi8splat"; }
22def nImmSplatI8 : Operand<i32> {
23  let PrintMethod = "printVMOVModImmOperand";
24  let ParserMatchClass = nImmSplatI8AsmOperand;
25}
26def nImmSplatI16AsmOperand : AsmOperandClass { let Name = "NEONi16splat"; }
27def nImmSplatI16 : Operand<i32> {
28  let PrintMethod = "printVMOVModImmOperand";
29  let ParserMatchClass = nImmSplatI16AsmOperand;
30}
31def nImmSplatI32AsmOperand : AsmOperandClass { let Name = "NEONi32splat"; }
32def nImmSplatI32 : Operand<i32> {
33  let PrintMethod = "printVMOVModImmOperand";
34  let ParserMatchClass = nImmSplatI32AsmOperand;
35}
36def nImmSplatNotI16AsmOperand : AsmOperandClass { let Name = "NEONi16splatNot"; }
37def nImmSplatNotI16 : Operand<i32> {
38  let ParserMatchClass = nImmSplatNotI16AsmOperand;
39}
40def nImmSplatNotI32AsmOperand : AsmOperandClass { let Name = "NEONi32splatNot"; }
41def nImmSplatNotI32 : Operand<i32> {
42  let ParserMatchClass = nImmSplatNotI32AsmOperand;
43}
44def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; }
45def nImmVMOVI32 : Operand<i32> {
46  let PrintMethod = "printVMOVModImmOperand";
47  let ParserMatchClass = nImmVMOVI32AsmOperand;
48}
49
50class nImmVMOVIAsmOperandReplicate<ValueType From, ValueType To>
51  : AsmOperandClass {
52  let Name = "NEONi" # To.Size # "vmovi" # From.Size # "Replicate";
53  let PredicateMethod = "isNEONmovReplicate<" # From.Size # ", " # To.Size # ">";
54  let RenderMethod = "addNEONvmovi" # From.Size # "ReplicateOperands";
55}
56
57class nImmVINVIAsmOperandReplicate<ValueType From, ValueType To>
58  : AsmOperandClass {
59  let Name = "NEONi" # To.Size # "invi" # From.Size # "Replicate";
60  let PredicateMethod = "isNEONinvReplicate<" # From.Size # ", " # To.Size # ">";
61  let RenderMethod = "addNEONinvi" # From.Size # "ReplicateOperands";
62}
63
64class nImmVMOVIReplicate<ValueType From, ValueType To> : Operand<i32> {
65  let PrintMethod = "printVMOVModImmOperand";
66  let ParserMatchClass = nImmVMOVIAsmOperandReplicate<From, To>;
67}
68
69class nImmVINVIReplicate<ValueType From, ValueType To> : Operand<i32> {
70  let PrintMethod = "printVMOVModImmOperand";
71  let ParserMatchClass = nImmVINVIAsmOperandReplicate<From, To>;
72}
73
74def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; }
75def nImmVMOVI32Neg : Operand<i32> {
76  let PrintMethod = "printVMOVModImmOperand";
77  let ParserMatchClass = nImmVMOVI32NegAsmOperand;
78}
79def nImmVMOVF32 : Operand<i32> {
80  let PrintMethod = "printFPImmOperand";
81  let ParserMatchClass = FPImmOperand;
82}
83def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; }
84def nImmSplatI64 : Operand<i32> {
85  let PrintMethod = "printVMOVModImmOperand";
86  let ParserMatchClass = nImmSplatI64AsmOperand;
87}
88
89def VectorIndex8Operand  : AsmOperandClass { let Name = "VectorIndex8"; }
90def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; }
91def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; }
92def VectorIndex64Operand : AsmOperandClass { let Name = "VectorIndex64"; }
93def VectorIndex8 : Operand<i32>, ImmLeaf<i32, [{
94  return ((uint64_t)Imm) < 8;
95}]> {
96  let ParserMatchClass = VectorIndex8Operand;
97  let PrintMethod = "printVectorIndex";
98  let MIOperandInfo = (ops i32imm);
99}
100def VectorIndex16 : Operand<i32>, ImmLeaf<i32, [{
101  return ((uint64_t)Imm) < 4;
102}]> {
103  let ParserMatchClass = VectorIndex16Operand;
104  let PrintMethod = "printVectorIndex";
105  let MIOperandInfo = (ops i32imm);
106}
107def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{
108  return ((uint64_t)Imm) < 2;
109}]> {
110  let ParserMatchClass = VectorIndex32Operand;
111  let PrintMethod = "printVectorIndex";
112  let MIOperandInfo = (ops i32imm);
113}
114def VectorIndex64 : Operand<i32>, ImmLeaf<i32, [{
115  return ((uint64_t)Imm) < 1;
116}]> {
117  let ParserMatchClass = VectorIndex64Operand;
118  let PrintMethod = "printVectorIndex";
119  let MIOperandInfo = (ops i32imm);
120}
121
122// Register list of one D register.
123def VecListOneDAsmOperand : AsmOperandClass {
124  let Name = "VecListOneD";
125  let ParserMethod = "parseVectorList";
126  let RenderMethod = "addVecListOperands";
127}
128def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> {
129  let ParserMatchClass = VecListOneDAsmOperand;
130}
131// Register list of two sequential D registers.
132def VecListDPairAsmOperand : AsmOperandClass {
133  let Name = "VecListDPair";
134  let ParserMethod = "parseVectorList";
135  let RenderMethod = "addVecListOperands";
136}
137def VecListDPair : RegisterOperand<DPair, "printVectorListTwo"> {
138  let ParserMatchClass = VecListDPairAsmOperand;
139}
140// Register list of three sequential D registers.
141def VecListThreeDAsmOperand : AsmOperandClass {
142  let Name = "VecListThreeD";
143  let ParserMethod = "parseVectorList";
144  let RenderMethod = "addVecListOperands";
145}
146def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> {
147  let ParserMatchClass = VecListThreeDAsmOperand;
148}
149// Register list of four sequential D registers.
150def VecListFourDAsmOperand : AsmOperandClass {
151  let Name = "VecListFourD";
152  let ParserMethod = "parseVectorList";
153  let RenderMethod = "addVecListOperands";
154}
155def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> {
156  let ParserMatchClass = VecListFourDAsmOperand;
157}
158// Register list of two D registers spaced by 2 (two sequential Q registers).
159def VecListDPairSpacedAsmOperand : AsmOperandClass {
160  let Name = "VecListDPairSpaced";
161  let ParserMethod = "parseVectorList";
162  let RenderMethod = "addVecListOperands";
163}
164def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListTwoSpaced"> {
165  let ParserMatchClass = VecListDPairSpacedAsmOperand;
166}
167// Register list of three D registers spaced by 2 (three Q registers).
168def VecListThreeQAsmOperand : AsmOperandClass {
169  let Name = "VecListThreeQ";
170  let ParserMethod = "parseVectorList";
171  let RenderMethod = "addVecListOperands";
172}
173def VecListThreeQ : RegisterOperand<DPR, "printVectorListThreeSpaced"> {
174  let ParserMatchClass = VecListThreeQAsmOperand;
175}
176// Register list of three D registers spaced by 2 (three Q registers).
177def VecListFourQAsmOperand : AsmOperandClass {
178  let Name = "VecListFourQ";
179  let ParserMethod = "parseVectorList";
180  let RenderMethod = "addVecListOperands";
181}
182def VecListFourQ : RegisterOperand<DPR, "printVectorListFourSpaced"> {
183  let ParserMatchClass = VecListFourQAsmOperand;
184}
185
186// Register list of one D register, with "all lanes" subscripting.
187def VecListOneDAllLanesAsmOperand : AsmOperandClass {
188  let Name = "VecListOneDAllLanes";
189  let ParserMethod = "parseVectorList";
190  let RenderMethod = "addVecListOperands";
191}
192def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> {
193  let ParserMatchClass = VecListOneDAllLanesAsmOperand;
194}
195// Register list of two D registers, with "all lanes" subscripting.
196def VecListDPairAllLanesAsmOperand : AsmOperandClass {
197  let Name = "VecListDPairAllLanes";
198  let ParserMethod = "parseVectorList";
199  let RenderMethod = "addVecListOperands";
200}
201def VecListDPairAllLanes : RegisterOperand<DPair,
202                                           "printVectorListTwoAllLanes"> {
203  let ParserMatchClass = VecListDPairAllLanesAsmOperand;
204}
205// Register list of two D registers spaced by 2 (two sequential Q registers).
206def VecListDPairSpacedAllLanesAsmOperand : AsmOperandClass {
207  let Name = "VecListDPairSpacedAllLanes";
208  let ParserMethod = "parseVectorList";
209  let RenderMethod = "addVecListOperands";
210}
211def VecListDPairSpacedAllLanes : RegisterOperand<DPairSpc,
212                                         "printVectorListTwoSpacedAllLanes"> {
213  let ParserMatchClass = VecListDPairSpacedAllLanesAsmOperand;
214}
215// Register list of three D registers, with "all lanes" subscripting.
216def VecListThreeDAllLanesAsmOperand : AsmOperandClass {
217  let Name = "VecListThreeDAllLanes";
218  let ParserMethod = "parseVectorList";
219  let RenderMethod = "addVecListOperands";
220}
221def VecListThreeDAllLanes : RegisterOperand<DPR,
222                                            "printVectorListThreeAllLanes"> {
223  let ParserMatchClass = VecListThreeDAllLanesAsmOperand;
224}
225// Register list of three D registers spaced by 2 (three sequential Q regs).
226def VecListThreeQAllLanesAsmOperand : AsmOperandClass {
227  let Name = "VecListThreeQAllLanes";
228  let ParserMethod = "parseVectorList";
229  let RenderMethod = "addVecListOperands";
230}
231def VecListThreeQAllLanes : RegisterOperand<DPR,
232                                         "printVectorListThreeSpacedAllLanes"> {
233  let ParserMatchClass = VecListThreeQAllLanesAsmOperand;
234}
235// Register list of four D registers, with "all lanes" subscripting.
236def VecListFourDAllLanesAsmOperand : AsmOperandClass {
237  let Name = "VecListFourDAllLanes";
238  let ParserMethod = "parseVectorList";
239  let RenderMethod = "addVecListOperands";
240}
241def VecListFourDAllLanes : RegisterOperand<DPR, "printVectorListFourAllLanes"> {
242  let ParserMatchClass = VecListFourDAllLanesAsmOperand;
243}
244// Register list of four D registers spaced by 2 (four sequential Q regs).
245def VecListFourQAllLanesAsmOperand : AsmOperandClass {
246  let Name = "VecListFourQAllLanes";
247  let ParserMethod = "parseVectorList";
248  let RenderMethod = "addVecListOperands";
249}
250def VecListFourQAllLanes : RegisterOperand<DPR,
251                                         "printVectorListFourSpacedAllLanes"> {
252  let ParserMatchClass = VecListFourQAllLanesAsmOperand;
253}
254
255
256// Register list of one D register, with byte lane subscripting.
257def VecListOneDByteIndexAsmOperand : AsmOperandClass {
258  let Name = "VecListOneDByteIndexed";
259  let ParserMethod = "parseVectorList";
260  let RenderMethod = "addVecListIndexedOperands";
261}
262def VecListOneDByteIndexed : Operand<i32> {
263  let ParserMatchClass = VecListOneDByteIndexAsmOperand;
264  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
265}
266// ...with half-word lane subscripting.
267def VecListOneDHWordIndexAsmOperand : AsmOperandClass {
268  let Name = "VecListOneDHWordIndexed";
269  let ParserMethod = "parseVectorList";
270  let RenderMethod = "addVecListIndexedOperands";
271}
272def VecListOneDHWordIndexed : Operand<i32> {
273  let ParserMatchClass = VecListOneDHWordIndexAsmOperand;
274  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
275}
276// ...with word lane subscripting.
277def VecListOneDWordIndexAsmOperand : AsmOperandClass {
278  let Name = "VecListOneDWordIndexed";
279  let ParserMethod = "parseVectorList";
280  let RenderMethod = "addVecListIndexedOperands";
281}
282def VecListOneDWordIndexed : Operand<i32> {
283  let ParserMatchClass = VecListOneDWordIndexAsmOperand;
284  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
285}
286
287// Register list of two D registers with byte lane subscripting.
288def VecListTwoDByteIndexAsmOperand : AsmOperandClass {
289  let Name = "VecListTwoDByteIndexed";
290  let ParserMethod = "parseVectorList";
291  let RenderMethod = "addVecListIndexedOperands";
292}
293def VecListTwoDByteIndexed : Operand<i32> {
294  let ParserMatchClass = VecListTwoDByteIndexAsmOperand;
295  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
296}
297// ...with half-word lane subscripting.
298def VecListTwoDHWordIndexAsmOperand : AsmOperandClass {
299  let Name = "VecListTwoDHWordIndexed";
300  let ParserMethod = "parseVectorList";
301  let RenderMethod = "addVecListIndexedOperands";
302}
303def VecListTwoDHWordIndexed : Operand<i32> {
304  let ParserMatchClass = VecListTwoDHWordIndexAsmOperand;
305  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
306}
307// ...with word lane subscripting.
308def VecListTwoDWordIndexAsmOperand : AsmOperandClass {
309  let Name = "VecListTwoDWordIndexed";
310  let ParserMethod = "parseVectorList";
311  let RenderMethod = "addVecListIndexedOperands";
312}
313def VecListTwoDWordIndexed : Operand<i32> {
314  let ParserMatchClass = VecListTwoDWordIndexAsmOperand;
315  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
316}
317// Register list of two Q registers with half-word lane subscripting.
318def VecListTwoQHWordIndexAsmOperand : AsmOperandClass {
319  let Name = "VecListTwoQHWordIndexed";
320  let ParserMethod = "parseVectorList";
321  let RenderMethod = "addVecListIndexedOperands";
322}
323def VecListTwoQHWordIndexed : Operand<i32> {
324  let ParserMatchClass = VecListTwoQHWordIndexAsmOperand;
325  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
326}
327// ...with word lane subscripting.
328def VecListTwoQWordIndexAsmOperand : AsmOperandClass {
329  let Name = "VecListTwoQWordIndexed";
330  let ParserMethod = "parseVectorList";
331  let RenderMethod = "addVecListIndexedOperands";
332}
333def VecListTwoQWordIndexed : Operand<i32> {
334  let ParserMatchClass = VecListTwoQWordIndexAsmOperand;
335  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
336}
337
338
339// Register list of three D registers with byte lane subscripting.
340def VecListThreeDByteIndexAsmOperand : AsmOperandClass {
341  let Name = "VecListThreeDByteIndexed";
342  let ParserMethod = "parseVectorList";
343  let RenderMethod = "addVecListIndexedOperands";
344}
345def VecListThreeDByteIndexed : Operand<i32> {
346  let ParserMatchClass = VecListThreeDByteIndexAsmOperand;
347  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
348}
349// ...with half-word lane subscripting.
350def VecListThreeDHWordIndexAsmOperand : AsmOperandClass {
351  let Name = "VecListThreeDHWordIndexed";
352  let ParserMethod = "parseVectorList";
353  let RenderMethod = "addVecListIndexedOperands";
354}
355def VecListThreeDHWordIndexed : Operand<i32> {
356  let ParserMatchClass = VecListThreeDHWordIndexAsmOperand;
357  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
358}
359// ...with word lane subscripting.
360def VecListThreeDWordIndexAsmOperand : AsmOperandClass {
361  let Name = "VecListThreeDWordIndexed";
362  let ParserMethod = "parseVectorList";
363  let RenderMethod = "addVecListIndexedOperands";
364}
365def VecListThreeDWordIndexed : Operand<i32> {
366  let ParserMatchClass = VecListThreeDWordIndexAsmOperand;
367  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
368}
369// Register list of three Q registers with half-word lane subscripting.
370def VecListThreeQHWordIndexAsmOperand : AsmOperandClass {
371  let Name = "VecListThreeQHWordIndexed";
372  let ParserMethod = "parseVectorList";
373  let RenderMethod = "addVecListIndexedOperands";
374}
375def VecListThreeQHWordIndexed : Operand<i32> {
376  let ParserMatchClass = VecListThreeQHWordIndexAsmOperand;
377  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
378}
379// ...with word lane subscripting.
380def VecListThreeQWordIndexAsmOperand : AsmOperandClass {
381  let Name = "VecListThreeQWordIndexed";
382  let ParserMethod = "parseVectorList";
383  let RenderMethod = "addVecListIndexedOperands";
384}
385def VecListThreeQWordIndexed : Operand<i32> {
386  let ParserMatchClass = VecListThreeQWordIndexAsmOperand;
387  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
388}
389
390// Register list of four D registers with byte lane subscripting.
391def VecListFourDByteIndexAsmOperand : AsmOperandClass {
392  let Name = "VecListFourDByteIndexed";
393  let ParserMethod = "parseVectorList";
394  let RenderMethod = "addVecListIndexedOperands";
395}
396def VecListFourDByteIndexed : Operand<i32> {
397  let ParserMatchClass = VecListFourDByteIndexAsmOperand;
398  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
399}
400// ...with half-word lane subscripting.
401def VecListFourDHWordIndexAsmOperand : AsmOperandClass {
402  let Name = "VecListFourDHWordIndexed";
403  let ParserMethod = "parseVectorList";
404  let RenderMethod = "addVecListIndexedOperands";
405}
406def VecListFourDHWordIndexed : Operand<i32> {
407  let ParserMatchClass = VecListFourDHWordIndexAsmOperand;
408  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
409}
410// ...with word lane subscripting.
411def VecListFourDWordIndexAsmOperand : AsmOperandClass {
412  let Name = "VecListFourDWordIndexed";
413  let ParserMethod = "parseVectorList";
414  let RenderMethod = "addVecListIndexedOperands";
415}
416def VecListFourDWordIndexed : Operand<i32> {
417  let ParserMatchClass = VecListFourDWordIndexAsmOperand;
418  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
419}
420// Register list of four Q registers with half-word lane subscripting.
421def VecListFourQHWordIndexAsmOperand : AsmOperandClass {
422  let Name = "VecListFourQHWordIndexed";
423  let ParserMethod = "parseVectorList";
424  let RenderMethod = "addVecListIndexedOperands";
425}
426def VecListFourQHWordIndexed : Operand<i32> {
427  let ParserMatchClass = VecListFourQHWordIndexAsmOperand;
428  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
429}
430// ...with word lane subscripting.
431def VecListFourQWordIndexAsmOperand : AsmOperandClass {
432  let Name = "VecListFourQWordIndexed";
433  let ParserMethod = "parseVectorList";
434  let RenderMethod = "addVecListIndexedOperands";
435}
436def VecListFourQWordIndexed : Operand<i32> {
437  let ParserMatchClass = VecListFourQWordIndexAsmOperand;
438  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
439}
440
441def dword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
442  return cast<LoadSDNode>(N)->getAlignment() >= 8;
443}]>;
444def dword_alignedstore : PatFrag<(ops node:$val, node:$ptr),
445                                 (store node:$val, node:$ptr), [{
446  return cast<StoreSDNode>(N)->getAlignment() >= 8;
447}]>;
448def word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
449  return cast<LoadSDNode>(N)->getAlignment() == 4;
450}]>;
451def word_alignedstore : PatFrag<(ops node:$val, node:$ptr),
452                                 (store node:$val, node:$ptr), [{
453  return cast<StoreSDNode>(N)->getAlignment() == 4;
454}]>;
455def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
456  return cast<LoadSDNode>(N)->getAlignment() == 2;
457}]>;
458def hword_alignedstore : PatFrag<(ops node:$val, node:$ptr),
459                                 (store node:$val, node:$ptr), [{
460  return cast<StoreSDNode>(N)->getAlignment() == 2;
461}]>;
462def byte_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
463  return cast<LoadSDNode>(N)->getAlignment() == 1;
464}]>;
465def byte_alignedstore : PatFrag<(ops node:$val, node:$ptr),
466                             (store node:$val, node:$ptr), [{
467  return cast<StoreSDNode>(N)->getAlignment() == 1;
468}]>;
469def non_word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
470  return cast<LoadSDNode>(N)->getAlignment() < 4;
471}]>;
472def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr),
473                                    (store node:$val, node:$ptr), [{
474  return cast<StoreSDNode>(N)->getAlignment() < 4;
475}]>;
476
477//===----------------------------------------------------------------------===//
478// NEON-specific DAG Nodes.
479//===----------------------------------------------------------------------===//
480
481def SDTARMVTST    : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>;
482def NEONvtst      : SDNode<"ARMISD::VTST", SDTARMVTST>;
483
484// Types for vector shift by immediates.  The "SHX" version is for long and
485// narrow operations where the source and destination vectors have different
486// types.  The "SHINS" version is for shift and insert operations.
487def SDTARMVSHXIMM    : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
488                                            SDTCisVT<2, i32>]>;
489def SDTARMVSHINSIMM  : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
490                                            SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
491
492def NEONvshrnImm     : SDNode<"ARMISD::VSHRNIMM", SDTARMVSHXIMM>;
493
494def NEONvrshrsImm    : SDNode<"ARMISD::VRSHRsIMM", SDTARMVSHIMM>;
495def NEONvrshruImm    : SDNode<"ARMISD::VRSHRuIMM", SDTARMVSHIMM>;
496def NEONvrshrnImm    : SDNode<"ARMISD::VRSHRNIMM", SDTARMVSHXIMM>;
497
498def NEONvqshlsImm    : SDNode<"ARMISD::VQSHLsIMM", SDTARMVSHIMM>;
499def NEONvqshluImm    : SDNode<"ARMISD::VQSHLuIMM", SDTARMVSHIMM>;
500def NEONvqshlsuImm   : SDNode<"ARMISD::VQSHLsuIMM", SDTARMVSHIMM>;
501def NEONvqshrnsImm   : SDNode<"ARMISD::VQSHRNsIMM", SDTARMVSHXIMM>;
502def NEONvqshrnuImm   : SDNode<"ARMISD::VQSHRNuIMM", SDTARMVSHXIMM>;
503def NEONvqshrnsuImm  : SDNode<"ARMISD::VQSHRNsuIMM", SDTARMVSHXIMM>;
504
505def NEONvqrshrnsImm  : SDNode<"ARMISD::VQRSHRNsIMM", SDTARMVSHXIMM>;
506def NEONvqrshrnuImm  : SDNode<"ARMISD::VQRSHRNuIMM", SDTARMVSHXIMM>;
507def NEONvqrshrnsuImm : SDNode<"ARMISD::VQRSHRNsuIMM", SDTARMVSHXIMM>;
508
509def NEONvsliImm      : SDNode<"ARMISD::VSLIIMM", SDTARMVSHINSIMM>;
510def NEONvsriImm      : SDNode<"ARMISD::VSRIIMM", SDTARMVSHINSIMM>;
511
512def NEONvbsp      : SDNode<"ARMISD::VBSP",
513                           SDTypeProfile<1, 3, [SDTCisVec<0>,
514                                                SDTCisSameAs<0, 1>,
515                                                SDTCisSameAs<0, 2>,
516                                                SDTCisSameAs<0, 3>]>>;
517
518def SDTARMVEXT    : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
519                                         SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
520def NEONvext      : SDNode<"ARMISD::VEXT", SDTARMVEXT>;
521
522def SDTARMVSHUF2  : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
523                                         SDTCisSameAs<0, 2>,
524                                         SDTCisSameAs<0, 3>]>;
525def NEONzip       : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>;
526def NEONuzp       : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>;
527def NEONtrn       : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>;
528
529def SDTARMVTBL1   : SDTypeProfile<1, 2, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>,
530                                         SDTCisVT<2, v8i8>]>;
531def SDTARMVTBL2   : SDTypeProfile<1, 3, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>,
532                                         SDTCisVT<2, v8i8>, SDTCisVT<3, v8i8>]>;
533def NEONvtbl1     : SDNode<"ARMISD::VTBL1", SDTARMVTBL1>;
534def NEONvtbl2     : SDNode<"ARMISD::VTBL2", SDTARMVTBL2>;
535
536
537//===----------------------------------------------------------------------===//
538// NEON load / store instructions
539//===----------------------------------------------------------------------===//
540
541// Use VLDM to load a Q register as a D register pair.
542// This is a pseudo instruction that is expanded to VLDMD after reg alloc.
543def VLDMQIA
544  : PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn),
545                    IIC_fpLoad_m, "",
546                   [(set DPair:$dst, (v2f64 (word_alignedload GPR:$Rn)))]>;
547
548// Use VSTM to store a Q register as a D register pair.
549// This is a pseudo instruction that is expanded to VSTMD after reg alloc.
550def VSTMQIA
551  : PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn),
552                    IIC_fpStore_m, "",
553                   [(word_alignedstore (v2f64 DPair:$src), GPR:$Rn)]>;
554
555// Classes for VLD* pseudo-instructions with multi-register operands.
556// These are expanded to real instructions after register allocation.
557class VLDQPseudo<InstrItinClass itin>
558  : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">;
559class VLDQWBPseudo<InstrItinClass itin>
560  : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
561                (ins addrmode6:$addr, am6offset:$offset), itin,
562                "$addr.addr = $wb">;
563class VLDQWBfixedPseudo<InstrItinClass itin>
564  : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
565                (ins addrmode6:$addr), itin,
566                "$addr.addr = $wb">;
567class VLDQWBregisterPseudo<InstrItinClass itin>
568  : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
569                (ins addrmode6:$addr, rGPR:$offset), itin,
570                "$addr.addr = $wb">;
571
572class VLDQQPseudo<InstrItinClass itin>
573  : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">;
574class VLDQQWBPseudo<InstrItinClass itin>
575  : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
576                (ins addrmode6:$addr, am6offset:$offset), itin,
577                "$addr.addr = $wb">;
578class VLDQQWBfixedPseudo<InstrItinClass itin>
579  : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
580                (ins addrmode6:$addr), itin,
581                "$addr.addr = $wb">;
582class VLDQQWBregisterPseudo<InstrItinClass itin>
583  : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
584                (ins addrmode6:$addr, rGPR:$offset), itin,
585                "$addr.addr = $wb">;
586
587
588class VLDQQQQPseudo<InstrItinClass itin>
589  : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin,
590                "$src = $dst">;
591class VLDQQQQWBPseudo<InstrItinClass itin>
592  : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
593                (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
594                "$addr.addr = $wb, $src = $dst">;
595
596let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
597
598//   VLD1     : Vector Load (multiple single elements)
599class VLD1D<bits<4> op7_4, string Dt, Operand AddrMode>
600  : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd),
601          (ins AddrMode:$Rn), IIC_VLD1,
602          "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD1]> {
603  let Rm = 0b1111;
604  let Inst{4} = Rn{4};
605  let DecoderMethod = "DecodeVLDST1Instruction";
606}
607class VLD1Q<bits<4> op7_4, string Dt, Operand AddrMode>
608  : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd),
609          (ins AddrMode:$Rn), IIC_VLD1x2,
610          "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD2]> {
611  let Rm = 0b1111;
612  let Inst{5-4} = Rn{5-4};
613  let DecoderMethod = "DecodeVLDST1Instruction";
614}
615
616def  VLD1d8   : VLD1D<{0,0,0,?}, "8",  addrmode6align64>;
617def  VLD1d16  : VLD1D<{0,1,0,?}, "16", addrmode6align64>;
618def  VLD1d32  : VLD1D<{1,0,0,?}, "32", addrmode6align64>;
619def  VLD1d64  : VLD1D<{1,1,0,?}, "64", addrmode6align64>;
620
621def  VLD1q8   : VLD1Q<{0,0,?,?}, "8",  addrmode6align64or128>;
622def  VLD1q16  : VLD1Q<{0,1,?,?}, "16", addrmode6align64or128>;
623def  VLD1q32  : VLD1Q<{1,0,?,?}, "32", addrmode6align64or128>;
624def  VLD1q64  : VLD1Q<{1,1,?,?}, "64", addrmode6align64or128>;
625
626// ...with address register writeback:
627multiclass VLD1DWB<bits<4> op7_4, string Dt, Operand AddrMode> {
628  def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
629                     (ins AddrMode:$Rn), IIC_VLD1u,
630                     "vld1", Dt, "$Vd, $Rn!",
631                     "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
632    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
633    let Inst{4} = Rn{4};
634    let DecoderMethod = "DecodeVLDST1Instruction";
635  }
636  def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
637                        (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1u,
638                        "vld1", Dt, "$Vd, $Rn, $Rm",
639                        "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
640    let Inst{4} = Rn{4};
641    let DecoderMethod = "DecodeVLDST1Instruction";
642  }
643}
644multiclass VLD1QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
645  def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
646                    (ins AddrMode:$Rn), IIC_VLD1x2u,
647                     "vld1", Dt, "$Vd, $Rn!",
648                     "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
649    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
650    let Inst{5-4} = Rn{5-4};
651    let DecoderMethod = "DecodeVLDST1Instruction";
652  }
653  def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
654                        (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
655                        "vld1", Dt, "$Vd, $Rn, $Rm",
656                        "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
657    let Inst{5-4} = Rn{5-4};
658    let DecoderMethod = "DecodeVLDST1Instruction";
659  }
660}
661
662defm VLD1d8wb  : VLD1DWB<{0,0,0,?}, "8",  addrmode6align64>;
663defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16", addrmode6align64>;
664defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32", addrmode6align64>;
665defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64", addrmode6align64>;
666defm VLD1q8wb  : VLD1QWB<{0,0,?,?}, "8",  addrmode6align64or128>;
667defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16", addrmode6align64or128>;
668defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32", addrmode6align64or128>;
669defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64", addrmode6align64or128>;
670
671// ...with 3 registers
672class VLD1D3<bits<4> op7_4, string Dt, Operand AddrMode>
673  : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd),
674          (ins AddrMode:$Rn), IIC_VLD1x3, "vld1", Dt,
675          "$Vd, $Rn", "", []>, Sched<[WriteVLD3]> {
676  let Rm = 0b1111;
677  let Inst{4} = Rn{4};
678  let DecoderMethod = "DecodeVLDST1Instruction";
679}
680multiclass VLD1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> {
681  def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
682                    (ins AddrMode:$Rn), IIC_VLD1x2u,
683                     "vld1", Dt, "$Vd, $Rn!",
684                     "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> {
685    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
686    let Inst{4} = Rn{4};
687    let DecoderMethod = "DecodeVLDST1Instruction";
688  }
689  def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
690                        (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
691                        "vld1", Dt, "$Vd, $Rn, $Rm",
692                        "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> {
693    let Inst{4} = Rn{4};
694    let DecoderMethod = "DecodeVLDST1Instruction";
695  }
696}
697
698def VLD1d8T      : VLD1D3<{0,0,0,?}, "8",  addrmode6align64>;
699def VLD1d16T     : VLD1D3<{0,1,0,?}, "16", addrmode6align64>;
700def VLD1d32T     : VLD1D3<{1,0,0,?}, "32", addrmode6align64>;
701def VLD1d64T     : VLD1D3<{1,1,0,?}, "64", addrmode6align64>;
702
703defm VLD1d8Twb  : VLD1D3WB<{0,0,0,?}, "8",  addrmode6align64>;
704defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16", addrmode6align64>;
705defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32", addrmode6align64>;
706defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64", addrmode6align64>;
707
708def VLD1d8TPseudo  : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
709def VLD1d16TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
710def VLD1d32TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
711def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
712def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
713def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
714
715def VLD1q8HighTPseudo     : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
716def VLD1q8LowTPseudo_UPD  : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
717def VLD1q16HighTPseudo    : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
718def VLD1q16LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
719def VLD1q32HighTPseudo    : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
720def VLD1q32LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
721def VLD1q64HighTPseudo    : VLDQQQQPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
722def VLD1q64LowTPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x3>, Sched<[WriteVLD3]>;
723
724// ...with 4 registers
725class VLD1D4<bits<4> op7_4, string Dt, Operand AddrMode>
726  : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd),
727          (ins AddrMode:$Rn), IIC_VLD1x4, "vld1", Dt,
728          "$Vd, $Rn", "", []>, Sched<[WriteVLD4]> {
729  let Rm = 0b1111;
730  let Inst{5-4} = Rn{5-4};
731  let DecoderMethod = "DecodeVLDST1Instruction";
732}
733multiclass VLD1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> {
734  def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb),
735                    (ins AddrMode:$Rn), IIC_VLD1x2u,
736                     "vld1", Dt, "$Vd, $Rn!",
737                     "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> {
738    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
739    let Inst{5-4} = Rn{5-4};
740    let DecoderMethod = "DecodeVLDST1Instruction";
741  }
742  def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb),
743                        (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
744                        "vld1", Dt, "$Vd, $Rn, $Rm",
745                        "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> {
746    let Inst{5-4} = Rn{5-4};
747    let DecoderMethod = "DecodeVLDST1Instruction";
748  }
749}
750
751def VLD1d8Q      : VLD1D4<{0,0,?,?}, "8",  addrmode6align64or128or256>;
752def VLD1d16Q     : VLD1D4<{0,1,?,?}, "16", addrmode6align64or128or256>;
753def VLD1d32Q     : VLD1D4<{1,0,?,?}, "32", addrmode6align64or128or256>;
754def VLD1d64Q     : VLD1D4<{1,1,?,?}, "64", addrmode6align64or128or256>;
755
756defm VLD1d8Qwb   : VLD1D4WB<{0,0,?,?}, "8",  addrmode6align64or128or256>;
757defm VLD1d16Qwb  : VLD1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>;
758defm VLD1d32Qwb  : VLD1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>;
759defm VLD1d64Qwb  : VLD1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>;
760
761def VLD1d8QPseudo  : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
762def VLD1d16QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
763def VLD1d32QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
764def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
765def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
766def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
767
768def VLD1q8LowQPseudo_UPD  : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
769def VLD1q8HighQPseudo     : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
770def VLD1q16LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
771def VLD1q16HighQPseudo    : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
772def VLD1q32LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
773def VLD1q32HighQPseudo    : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
774def VLD1q64LowQPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
775def VLD1q64HighQPseudo    : VLDQQQQPseudo<IIC_VLD1x4>, Sched<[WriteVLD4]>;
776
777//   VLD2     : Vector Load (multiple 2-element structures)
778class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
779           InstrItinClass itin, Operand AddrMode>
780  : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd),
781          (ins AddrMode:$Rn), itin,
782          "vld2", Dt, "$Vd, $Rn", "", []> {
783  let Rm = 0b1111;
784  let Inst{5-4} = Rn{5-4};
785  let DecoderMethod = "DecodeVLDST2Instruction";
786}
787
788def  VLD2d8   : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2,
789                     addrmode6align64or128>, Sched<[WriteVLD2]>;
790def  VLD2d16  : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2,
791                     addrmode6align64or128>, Sched<[WriteVLD2]>;
792def  VLD2d32  : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2,
793                     addrmode6align64or128>, Sched<[WriteVLD2]>;
794
795def  VLD2q8   : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2,
796                     addrmode6align64or128or256>, Sched<[WriteVLD4]>;
797def  VLD2q16  : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2,
798                     addrmode6align64or128or256>, Sched<[WriteVLD4]>;
799def  VLD2q32  : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2,
800                     addrmode6align64or128or256>, Sched<[WriteVLD4]>;
801
802def  VLD2q8Pseudo  : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>;
803def  VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>;
804def  VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>, Sched<[WriteVLD4]>;
805
806// ...with address register writeback:
807multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt,
808                  RegisterOperand VdTy, InstrItinClass itin, Operand AddrMode> {
809  def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
810                     (ins AddrMode:$Rn), itin,
811                     "vld2", Dt, "$Vd, $Rn!",
812                     "$Rn.addr = $wb", []> {
813    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
814    let Inst{5-4} = Rn{5-4};
815    let DecoderMethod = "DecodeVLDST2Instruction";
816  }
817  def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
818                        (ins AddrMode:$Rn, rGPR:$Rm), itin,
819                        "vld2", Dt, "$Vd, $Rn, $Rm",
820                        "$Rn.addr = $wb", []> {
821    let Inst{5-4} = Rn{5-4};
822    let DecoderMethod = "DecodeVLDST2Instruction";
823  }
824}
825
826defm VLD2d8wb  : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u,
827                        addrmode6align64or128>, Sched<[WriteVLD2]>;
828defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u,
829                        addrmode6align64or128>, Sched<[WriteVLD2]>;
830defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u,
831                        addrmode6align64or128>, Sched<[WriteVLD2]>;
832
833defm VLD2q8wb  : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u,
834                        addrmode6align64or128or256>, Sched<[WriteVLD4]>;
835defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u,
836                        addrmode6align64or128or256>, Sched<[WriteVLD4]>;
837defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u,
838                        addrmode6align64or128or256>, Sched<[WriteVLD4]>;
839
840def VLD2q8PseudoWB_fixed     : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
841def VLD2q16PseudoWB_fixed    : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
842def VLD2q32PseudoWB_fixed    : VLDQQWBfixedPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
843def VLD2q8PseudoWB_register  : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
844def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
845def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>, Sched<[WriteVLD4]>;
846
847// ...with double-spaced registers
848def  VLD2b8    : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2,
849                      addrmode6align64or128>, Sched<[WriteVLD2]>;
850def  VLD2b16   : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2,
851                      addrmode6align64or128>, Sched<[WriteVLD2]>;
852def  VLD2b32   : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2,
853                      addrmode6align64or128>, Sched<[WriteVLD2]>;
854defm VLD2b8wb  : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u,
855                        addrmode6align64or128>, Sched<[WriteVLD2]>;
856defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u,
857                        addrmode6align64or128>, Sched<[WriteVLD2]>;
858defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u,
859                        addrmode6align64or128>, Sched<[WriteVLD2]>;
860
861//   VLD3     : Vector Load (multiple 3-element structures)
862class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>
863  : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
864          (ins addrmode6:$Rn), IIC_VLD3,
865          "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []>, Sched<[WriteVLD3]> {
866  let Rm = 0b1111;
867  let Inst{4} = Rn{4};
868  let DecoderMethod = "DecodeVLDST3Instruction";
869}
870
871def  VLD3d8   : VLD3D<0b0100, {0,0,0,?}, "8">;
872def  VLD3d16  : VLD3D<0b0100, {0,1,0,?}, "16">;
873def  VLD3d32  : VLD3D<0b0100, {1,0,0,?}, "32">;
874
875def  VLD3d8Pseudo  : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
876def  VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
877def  VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
878
879// ...with address register writeback:
880class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
881  : NLdSt<0, 0b10, op11_8, op7_4,
882          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
883          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u,
884          "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm",
885          "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> {
886  let Inst{4} = Rn{4};
887  let DecoderMethod = "DecodeVLDST3Instruction";
888}
889
890def VLD3d8_UPD  : VLD3DWB<0b0100, {0,0,0,?}, "8">;
891def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">;
892def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">;
893
894def VLD3d8Pseudo_UPD  : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
895def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
896def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
897
898// ...with double-spaced registers:
899def VLD3q8      : VLD3D<0b0101, {0,0,0,?}, "8">;
900def VLD3q16     : VLD3D<0b0101, {0,1,0,?}, "16">;
901def VLD3q32     : VLD3D<0b0101, {1,0,0,?}, "32">;
902def VLD3q8_UPD  : VLD3DWB<0b0101, {0,0,0,?}, "8">;
903def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">;
904def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">;
905
906def VLD3q8Pseudo_UPD  : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
907def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
908def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
909
910// ...alternate versions to be allocated odd register numbers:
911def VLD3q8oddPseudo   : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
912def VLD3q16oddPseudo  : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
913def VLD3q32oddPseudo  : VLDQQQQPseudo<IIC_VLD3>, Sched<[WriteVLD3]>;
914
915def VLD3q8oddPseudo_UPD  : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
916def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
917def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>, Sched<[WriteVLD3]>;
918
919//   VLD4     : Vector Load (multiple 4-element structures)
920class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt>
921  : NLdSt<0, 0b10, op11_8, op7_4,
922          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
923          (ins addrmode6:$Rn), IIC_VLD4,
924          "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []>,
925    Sched<[WriteVLD4]> {
926  let Rm = 0b1111;
927  let Inst{5-4} = Rn{5-4};
928  let DecoderMethod = "DecodeVLDST4Instruction";
929}
930
931def  VLD4d8   : VLD4D<0b0000, {0,0,?,?}, "8">;
932def  VLD4d16  : VLD4D<0b0000, {0,1,?,?}, "16">;
933def  VLD4d32  : VLD4D<0b0000, {1,0,?,?}, "32">;
934
935def  VLD4d8Pseudo  : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
936def  VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
937def  VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
938
939// ...with address register writeback:
940class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
941  : NLdSt<0, 0b10, op11_8, op7_4,
942          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
943          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u,
944          "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm",
945          "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> {
946  let Inst{5-4} = Rn{5-4};
947  let DecoderMethod = "DecodeVLDST4Instruction";
948}
949
950def VLD4d8_UPD  : VLD4DWB<0b0000, {0,0,?,?}, "8">;
951def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">;
952def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">;
953
954def VLD4d8Pseudo_UPD  : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
955def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
956def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
957
958// ...with double-spaced registers:
959def VLD4q8      : VLD4D<0b0001, {0,0,?,?}, "8">;
960def VLD4q16     : VLD4D<0b0001, {0,1,?,?}, "16">;
961def VLD4q32     : VLD4D<0b0001, {1,0,?,?}, "32">;
962def VLD4q8_UPD  : VLD4DWB<0b0001, {0,0,?,?}, "8">;
963def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">;
964def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">;
965
966def VLD4q8Pseudo_UPD  : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
967def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
968def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
969
970// ...alternate versions to be allocated odd register numbers:
971def VLD4q8oddPseudo   : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
972def VLD4q16oddPseudo  : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
973def VLD4q32oddPseudo  : VLDQQQQPseudo<IIC_VLD4>, Sched<[WriteVLD4]>;
974
975def VLD4q8oddPseudo_UPD  : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
976def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
977def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>, Sched<[WriteVLD4]>;
978
979} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
980
981// Classes for VLD*LN pseudo-instructions with multi-register operands.
982// These are expanded to real instructions after register allocation.
983class VLDQLNPseudo<InstrItinClass itin>
984  : PseudoNLdSt<(outs QPR:$dst),
985                (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
986                itin, "$src = $dst">;
987class VLDQLNWBPseudo<InstrItinClass itin>
988  : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
989                (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
990                 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
991class VLDQQLNPseudo<InstrItinClass itin>
992  : PseudoNLdSt<(outs QQPR:$dst),
993                (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
994                itin, "$src = $dst">;
995class VLDQQLNWBPseudo<InstrItinClass itin>
996  : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
997                (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
998                 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
999class VLDQQQQLNPseudo<InstrItinClass itin>
1000  : PseudoNLdSt<(outs QQQQPR:$dst),
1001                (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
1002                itin, "$src = $dst">;
1003class VLDQQQQLNWBPseudo<InstrItinClass itin>
1004  : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
1005                (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
1006                 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
1007
1008//   VLD1LN   : Vector Load (single element to one lane)
1009class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
1010             PatFrag LoadOp>
1011  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd),
1012          (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane),
1013          IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
1014          "$src = $Vd",
1015          [(set DPR:$Vd, (vector_insert (Ty DPR:$src),
1016                                         (i32 (LoadOp addrmode6:$Rn)),
1017                                         imm:$lane))]> {
1018  let Rm = 0b1111;
1019  let DecoderMethod = "DecodeVLD1LN";
1020}
1021class VLD1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
1022             PatFrag LoadOp>
1023  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd),
1024          (ins addrmode6oneL32:$Rn, DPR:$src, nohash_imm:$lane),
1025          IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
1026          "$src = $Vd",
1027          [(set DPR:$Vd, (vector_insert (Ty DPR:$src),
1028                                         (i32 (LoadOp addrmode6oneL32:$Rn)),
1029                                         imm:$lane))]>, Sched<[WriteVLD1]> {
1030  let Rm = 0b1111;
1031  let DecoderMethod = "DecodeVLD1LN";
1032}
1033class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln>,
1034                                                    Sched<[WriteVLD1]> {
1035  let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src),
1036                                               (i32 (LoadOp addrmode6:$addr)),
1037                                               imm:$lane))];
1038}
1039
1040def VLD1LNd8  : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> {
1041  let Inst{7-5} = lane{2-0};
1042}
1043def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> {
1044  let Inst{7-6} = lane{1-0};
1045  let Inst{5-4} = Rn{5-4};
1046}
1047def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> {
1048  let Inst{7} = lane{0};
1049  let Inst{5-4} = Rn{5-4};
1050}
1051
1052def VLD1LNq8Pseudo  : VLD1QLNPseudo<v16i8, extloadi8>;
1053def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>;
1054def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>;
1055
1056let Predicates = [HasNEON] in {
1057def : Pat<(vector_insert (v4f16 DPR:$src),
1058                         (f16 (load addrmode6:$addr)), imm:$lane),
1059          (VLD1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>;
1060def : Pat<(vector_insert (v8f16 QPR:$src),
1061                         (f16 (load addrmode6:$addr)), imm:$lane),
1062          (VLD1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
1063def : Pat<(vector_insert (v4bf16 DPR:$src),
1064                         (bf16 (load addrmode6:$addr)), imm:$lane),
1065          (VLD1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>;
1066def : Pat<(vector_insert (v8bf16 QPR:$src),
1067                         (bf16 (load addrmode6:$addr)), imm:$lane),
1068          (VLD1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
1069def : Pat<(vector_insert (v2f32 DPR:$src),
1070                         (f32 (load addrmode6:$addr)), imm:$lane),
1071          (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
1072def : Pat<(vector_insert (v4f32 QPR:$src),
1073                         (f32 (load addrmode6:$addr)), imm:$lane),
1074          (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
1075
1076// A 64-bit subvector insert to the first 128-bit vector position
1077// is a subregister copy that needs no instruction.
1078def : Pat<(insert_subvector undef, (v1i64 DPR:$src), (i32 0)),
1079          (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
1080def : Pat<(insert_subvector undef, (v2i32 DPR:$src), (i32 0)),
1081          (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
1082def : Pat<(insert_subvector undef, (v2f32 DPR:$src), (i32 0)),
1083          (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
1084def : Pat<(insert_subvector undef, (v4i16 DPR:$src), (i32 0)),
1085          (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
1086def : Pat<(insert_subvector undef, (v4f16 DPR:$src), (i32 0)),
1087          (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
1088def : Pat<(insert_subvector (v16i8 undef), (v8i8 DPR:$src), (i32 0)),
1089          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
1090}
1091
1092
1093let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
1094
1095// ...with address register writeback:
1096class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1097  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb),
1098          (ins addrmode6:$Rn, am6offset:$Rm,
1099           DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt,
1100          "\\{$Vd[$lane]\\}, $Rn$Rm",
1101          "$src = $Vd, $Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
1102  let DecoderMethod = "DecodeVLD1LN";
1103}
1104
1105def VLD1LNd8_UPD  : VLD1LNWB<0b0000, {?,?,?,0}, "8"> {
1106  let Inst{7-5} = lane{2-0};
1107}
1108def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> {
1109  let Inst{7-6} = lane{1-0};
1110  let Inst{4}   = Rn{4};
1111}
1112def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> {
1113  let Inst{7} = lane{0};
1114  let Inst{5} = Rn{4};
1115  let Inst{4} = Rn{4};
1116}
1117
1118def VLD1LNq8Pseudo_UPD  : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>;
1119def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>;
1120def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>, Sched<[WriteVLD1]>;
1121
1122//   VLD2LN   : Vector Load (single 2-element structure to one lane)
1123class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
1124  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2),
1125          (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane),
1126          IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn",
1127          "$src1 = $Vd, $src2 = $dst2", []>, Sched<[WriteVLD1]> {
1128  let Rm = 0b1111;
1129  let Inst{4}   = Rn{4};
1130  let DecoderMethod = "DecodeVLD2LN";
1131}
1132
1133def VLD2LNd8  : VLD2LN<0b0001, {?,?,?,?}, "8"> {
1134  let Inst{7-5} = lane{2-0};
1135}
1136def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> {
1137  let Inst{7-6} = lane{1-0};
1138}
1139def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> {
1140  let Inst{7} = lane{0};
1141}
1142
1143def VLD2LNd8Pseudo  : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
1144def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
1145def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
1146
1147// ...with double-spaced registers:
1148def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> {
1149  let Inst{7-6} = lane{1-0};
1150}
1151def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> {
1152  let Inst{7} = lane{0};
1153}
1154
1155def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
1156def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>, Sched<[WriteVLD1]>;
1157
1158// ...with address register writeback:
1159class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1160  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
1161          (ins addrmode6:$Rn, am6offset:$Rm,
1162           DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt,
1163          "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm",
1164          "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> {
1165  let Inst{4}   = Rn{4};
1166  let DecoderMethod = "DecodeVLD2LN";
1167}
1168
1169def VLD2LNd8_UPD  : VLD2LNWB<0b0001, {?,?,?,?}, "8"> {
1170  let Inst{7-5} = lane{2-0};
1171}
1172def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> {
1173  let Inst{7-6} = lane{1-0};
1174}
1175def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> {
1176  let Inst{7} = lane{0};
1177}
1178
1179def VLD2LNd8Pseudo_UPD  : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
1180def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
1181def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
1182
1183def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> {
1184  let Inst{7-6} = lane{1-0};
1185}
1186def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> {
1187  let Inst{7} = lane{0};
1188}
1189
1190def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
1191def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>, Sched<[WriteVLD1]>;
1192
1193//   VLD3LN   : Vector Load (single 3-element structure to one lane)
1194class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
1195  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
1196          (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3,
1197          nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt,
1198          "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn",
1199          "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []>, Sched<[WriteVLD2]> {
1200  let Rm = 0b1111;
1201  let DecoderMethod = "DecodeVLD3LN";
1202}
1203
1204def VLD3LNd8  : VLD3LN<0b0010, {?,?,?,0}, "8"> {
1205  let Inst{7-5} = lane{2-0};
1206}
1207def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> {
1208  let Inst{7-6} = lane{1-0};
1209}
1210def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> {
1211  let Inst{7}   = lane{0};
1212}
1213
1214def VLD3LNd8Pseudo  : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
1215def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
1216def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
1217
1218// ...with double-spaced registers:
1219def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> {
1220  let Inst{7-6} = lane{1-0};
1221}
1222def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> {
1223  let Inst{7}   = lane{0};
1224}
1225
1226def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
1227def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>, Sched<[WriteVLD2]>;
1228
1229// ...with address register writeback:
1230class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1231  : NLdStLn<1, 0b10, op11_8, op7_4,
1232          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
1233          (ins addrmode6:$Rn, am6offset:$Rm,
1234           DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane),
1235          IIC_VLD3lnu, "vld3", Dt,
1236          "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm",
1237          "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb",
1238          []>, Sched<[WriteVLD2]> {
1239  let DecoderMethod = "DecodeVLD3LN";
1240}
1241
1242def VLD3LNd8_UPD  : VLD3LNWB<0b0010, {?,?,?,0}, "8"> {
1243  let Inst{7-5} = lane{2-0};
1244}
1245def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> {
1246  let Inst{7-6} = lane{1-0};
1247}
1248def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> {
1249  let Inst{7} = lane{0};
1250}
1251
1252def VLD3LNd8Pseudo_UPD  : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
1253def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
1254def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
1255
1256def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> {
1257  let Inst{7-6} = lane{1-0};
1258}
1259def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> {
1260  let Inst{7} = lane{0};
1261}
1262
1263def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
1264def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>, Sched<[WriteVLD2]>;
1265
1266//   VLD4LN   : Vector Load (single 4-element structure to one lane)
1267class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
1268  : NLdStLn<1, 0b10, op11_8, op7_4,
1269          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
1270          (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
1271          nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt,
1272          "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn",
1273          "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>,
1274    Sched<[WriteVLD2]> {
1275  let Rm = 0b1111;
1276  let Inst{4} = Rn{4};
1277  let DecoderMethod = "DecodeVLD4LN";
1278}
1279
1280def VLD4LNd8  : VLD4LN<0b0011, {?,?,?,?}, "8"> {
1281  let Inst{7-5} = lane{2-0};
1282}
1283def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> {
1284  let Inst{7-6} = lane{1-0};
1285}
1286def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> {
1287  let Inst{7} = lane{0};
1288  let Inst{5} = Rn{5};
1289}
1290
1291def VLD4LNd8Pseudo  : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
1292def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
1293def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
1294
1295// ...with double-spaced registers:
1296def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> {
1297  let Inst{7-6} = lane{1-0};
1298}
1299def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> {
1300  let Inst{7} = lane{0};
1301  let Inst{5} = Rn{5};
1302}
1303
1304def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
1305def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>, Sched<[WriteVLD2]>;
1306
1307// ...with address register writeback:
1308class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1309  : NLdStLn<1, 0b10, op11_8, op7_4,
1310          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
1311          (ins addrmode6:$Rn, am6offset:$Rm,
1312           DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
1313          IIC_VLD4lnu, "vld4", Dt,
1314"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm",
1315"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb",
1316          []> {
1317  let Inst{4}   = Rn{4};
1318  let DecoderMethod = "DecodeVLD4LN"  ;
1319}
1320
1321def VLD4LNd8_UPD  : VLD4LNWB<0b0011, {?,?,?,?}, "8"> {
1322  let Inst{7-5} = lane{2-0};
1323}
1324def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> {
1325  let Inst{7-6} = lane{1-0};
1326}
1327def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> {
1328  let Inst{7} = lane{0};
1329  let Inst{5} = Rn{5};
1330}
1331
1332def VLD4LNd8Pseudo_UPD  : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
1333def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
1334def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
1335
1336def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> {
1337  let Inst{7-6} = lane{1-0};
1338}
1339def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> {
1340  let Inst{7} = lane{0};
1341  let Inst{5} = Rn{5};
1342}
1343
1344def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
1345def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>, Sched<[WriteVLD2]>;
1346
1347} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
1348
1349//   VLD1DUP  : Vector Load (single element to all lanes)
1350class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp,
1351              Operand AddrMode>
1352  : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd),
1353          (ins AddrMode:$Rn),
1354          IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "",
1355          [(set VecListOneDAllLanes:$Vd,
1356                (Ty (ARMvdup (i32 (LoadOp AddrMode:$Rn)))))]>,
1357   Sched<[WriteVLD2]> {
1358  let Rm = 0b1111;
1359  let Inst{4} = Rn{4};
1360  let DecoderMethod = "DecodeVLD1DupInstruction";
1361}
1362def VLD1DUPd8  : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8,
1363                         addrmode6dupalignNone>;
1364def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16,
1365                         addrmode6dupalign16>;
1366def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load,
1367                         addrmode6dupalign32>;
1368
1369let Predicates = [HasNEON] in {
1370def : Pat<(v2f32 (ARMvdup (f32 (load addrmode6dup:$addr)))),
1371          (VLD1DUPd32 addrmode6:$addr)>;
1372}
1373
1374class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp,
1375               Operand AddrMode>
1376  : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd),
1377          (ins AddrMode:$Rn), IIC_VLD1dup,
1378          "vld1", Dt, "$Vd, $Rn", "",
1379          [(set VecListDPairAllLanes:$Vd,
1380                (Ty (ARMvdup (i32 (LoadOp AddrMode:$Rn)))))]> {
1381  let Rm = 0b1111;
1382  let Inst{4} = Rn{4};
1383  let DecoderMethod = "DecodeVLD1DupInstruction";
1384}
1385
1386def VLD1DUPq8  : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8,
1387                          addrmode6dupalignNone>;
1388def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16,
1389                          addrmode6dupalign16>;
1390def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load,
1391                          addrmode6dupalign32>;
1392
1393let Predicates = [HasNEON] in {
1394def : Pat<(v4f32 (ARMvdup (f32 (load addrmode6dup:$addr)))),
1395          (VLD1DUPq32 addrmode6:$addr)>;
1396}
1397
1398let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in {
1399// ...with address register writeback:
1400multiclass VLD1DUPWB<bits<4> op7_4, string Dt, Operand AddrMode> {
1401  def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
1402                     (outs VecListOneDAllLanes:$Vd, GPR:$wb),
1403                     (ins AddrMode:$Rn), IIC_VLD1dupu,
1404                     "vld1", Dt, "$Vd, $Rn!",
1405                     "$Rn.addr = $wb", []> {
1406    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1407    let Inst{4} = Rn{4};
1408    let DecoderMethod = "DecodeVLD1DupInstruction";
1409  }
1410  def _register : NLdSt<1, 0b10, 0b1100, op7_4,
1411                        (outs VecListOneDAllLanes:$Vd, GPR:$wb),
1412                        (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu,
1413                        "vld1", Dt, "$Vd, $Rn, $Rm",
1414                        "$Rn.addr = $wb", []> {
1415    let Inst{4} = Rn{4};
1416    let DecoderMethod = "DecodeVLD1DupInstruction";
1417  }
1418}
1419multiclass VLD1QDUPWB<bits<4> op7_4, string Dt, Operand AddrMode> {
1420  def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
1421                     (outs VecListDPairAllLanes:$Vd, GPR:$wb),
1422                     (ins AddrMode:$Rn), IIC_VLD1dupu,
1423                     "vld1", Dt, "$Vd, $Rn!",
1424                     "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
1425    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1426    let Inst{4} = Rn{4};
1427    let DecoderMethod = "DecodeVLD1DupInstruction";
1428  }
1429  def _register : NLdSt<1, 0b10, 0b1100, op7_4,
1430                        (outs VecListDPairAllLanes:$Vd, GPR:$wb),
1431                        (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu,
1432                        "vld1", Dt, "$Vd, $Rn, $Rm",
1433                        "$Rn.addr = $wb", []> {
1434    let Inst{4} = Rn{4};
1435    let DecoderMethod = "DecodeVLD1DupInstruction";
1436  }
1437}
1438
1439defm VLD1DUPd8wb  : VLD1DUPWB<{0,0,0,0}, "8", addrmode6dupalignNone>;
1440defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16", addrmode6dupalign16>;
1441defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32", addrmode6dupalign32>;
1442
1443defm VLD1DUPq8wb  : VLD1QDUPWB<{0,0,1,0}, "8", addrmode6dupalignNone>;
1444defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16", addrmode6dupalign16>;
1445defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32", addrmode6dupalign32>;
1446
1447//   VLD2DUP  : Vector Load (single 2-element structure to all lanes)
1448class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy, Operand AddrMode>
1449  : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd),
1450          (ins AddrMode:$Rn), IIC_VLD2dup,
1451          "vld2", Dt, "$Vd, $Rn", "", []> {
1452  let Rm = 0b1111;
1453  let Inst{4} = Rn{4};
1454  let DecoderMethod = "DecodeVLD2DupInstruction";
1455}
1456
1457def VLD2DUPd8  : VLD2DUP<{0,0,0,?}, "8",  VecListDPairAllLanes,
1458                         addrmode6dupalign16>;
1459def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes,
1460                         addrmode6dupalign32>;
1461def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes,
1462                         addrmode6dupalign64>;
1463
1464// HACK this one, VLD2DUPd8x2 must be changed at the same time with VLD2b8 or
1465// "vld2.8 {d0[], d2[]}, [r4:32]" will become "vld2.8 {d0, d2}, [r4:32]".
1466// ...with double-spaced registers
1467def VLD2DUPd8x2  : VLD2DUP<{0,0,1,?}, "8",  VecListDPairSpacedAllLanes,
1468                           addrmode6dupalign16>;
1469def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes,
1470                           addrmode6dupalign32>;
1471def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes,
1472                           addrmode6dupalign64>;
1473
1474def VLD2DUPq8EvenPseudo  : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1475def VLD2DUPq8OddPseudo   : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1476def VLD2DUPq16EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1477def VLD2DUPq16OddPseudo  : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1478def VLD2DUPq32EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1479def VLD2DUPq32OddPseudo  : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
1480
1481// ...with address register writeback:
1482multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy,
1483                     Operand AddrMode> {
1484  def _fixed : NLdSt<1, 0b10, 0b1101, op7_4,
1485                     (outs VdTy:$Vd, GPR:$wb),
1486                     (ins AddrMode:$Rn), IIC_VLD2dupu,
1487                     "vld2", Dt, "$Vd, $Rn!",
1488                     "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
1489    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1490    let Inst{4} = Rn{4};
1491    let DecoderMethod = "DecodeVLD2DupInstruction";
1492  }
1493  def _register : NLdSt<1, 0b10, 0b1101, op7_4,
1494                        (outs VdTy:$Vd, GPR:$wb),
1495                        (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD2dupu,
1496                        "vld2", Dt, "$Vd, $Rn, $Rm",
1497                        "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> {
1498    let Inst{4} = Rn{4};
1499    let DecoderMethod = "DecodeVLD2DupInstruction";
1500  }
1501}
1502
1503defm VLD2DUPd8wb    : VLD2DUPWB<{0,0,0,0}, "8",  VecListDPairAllLanes,
1504                                addrmode6dupalign16>;
1505defm VLD2DUPd16wb   : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes,
1506                                addrmode6dupalign32>;
1507defm VLD2DUPd32wb   : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes,
1508                                addrmode6dupalign64>;
1509
1510defm VLD2DUPd8x2wb  : VLD2DUPWB<{0,0,1,0}, "8",  VecListDPairSpacedAllLanes,
1511                                addrmode6dupalign16>;
1512defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes,
1513                                addrmode6dupalign32>;
1514defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes,
1515                                addrmode6dupalign64>;
1516
1517//   VLD3DUP  : Vector Load (single 3-element structure to all lanes)
1518class VLD3DUP<bits<4> op7_4, string Dt>
1519  : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
1520          (ins addrmode6dup:$Rn), IIC_VLD3dup,
1521          "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []>,
1522    Sched<[WriteVLD2]> {
1523  let Rm = 0b1111;
1524  let Inst{4} = 0;
1525  let DecoderMethod = "DecodeVLD3DupInstruction";
1526}
1527
1528def VLD3DUPd8  : VLD3DUP<{0,0,0,?}, "8">;
1529def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">;
1530def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">;
1531
1532def VLD3DUPd8Pseudo  : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1533def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1534def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1535
1536// ...with double-spaced registers (not used for codegen):
1537def VLD3DUPq8  : VLD3DUP<{0,0,1,?}, "8">;
1538def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">;
1539def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">;
1540
1541def VLD3DUPq8EvenPseudo  : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1542def VLD3DUPq8OddPseudo   : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1543def VLD3DUPq16EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1544def VLD3DUPq16OddPseudo  : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1545def VLD3DUPq32EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1546def VLD3DUPq32OddPseudo  : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
1547
1548// ...with address register writeback:
1549class VLD3DUPWB<bits<4> op7_4, string Dt, Operand AddrMode>
1550  : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
1551          (ins AddrMode:$Rn, am6offset:$Rm), IIC_VLD3dupu,
1552          "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm",
1553          "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
1554  let Inst{4} = 0;
1555  let DecoderMethod = "DecodeVLD3DupInstruction";
1556}
1557
1558def VLD3DUPd8_UPD  : VLD3DUPWB<{0,0,0,0}, "8",  addrmode6dupalign64>;
1559def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16", addrmode6dupalign64>;
1560def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32", addrmode6dupalign64>;
1561
1562def VLD3DUPq8_UPD  : VLD3DUPWB<{0,0,1,0}, "8",  addrmode6dupalign64>;
1563def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16", addrmode6dupalign64>;
1564def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32", addrmode6dupalign64>;
1565
1566def VLD3DUPd8Pseudo_UPD  : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>;
1567def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>;
1568def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>, Sched<[WriteVLD2]>;
1569
1570//   VLD4DUP  : Vector Load (single 4-element structure to all lanes)
1571class VLD4DUP<bits<4> op7_4, string Dt>
1572  : NLdSt<1, 0b10, 0b1111, op7_4,
1573          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
1574          (ins addrmode6dup:$Rn), IIC_VLD4dup,
1575          "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> {
1576  let Rm = 0b1111;
1577  let Inst{4} = Rn{4};
1578  let DecoderMethod = "DecodeVLD4DupInstruction";
1579}
1580
1581def VLD4DUPd8  : VLD4DUP<{0,0,0,?}, "8">;
1582def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">;
1583def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
1584
1585def VLD4DUPd8Pseudo  : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1586def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1587def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1588
1589// ...with double-spaced registers (not used for codegen):
1590def VLD4DUPq8  : VLD4DUP<{0,0,1,?}, "8">;
1591def VLD4DUPq16 : VLD4DUP<{0,1,1,?}, "16">;
1592def VLD4DUPq32 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
1593
1594def VLD4DUPq8EvenPseudo  : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1595def VLD4DUPq8OddPseudo   : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1596def VLD4DUPq16EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1597def VLD4DUPq16OddPseudo  : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1598def VLD4DUPq32EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1599def VLD4DUPq32OddPseudo  : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
1600
1601// ...with address register writeback:
1602class VLD4DUPWB<bits<4> op7_4, string Dt>
1603  : NLdSt<1, 0b10, 0b1111, op7_4,
1604          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
1605          (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu,
1606          "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm",
1607          "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> {
1608  let Inst{4} = Rn{4};
1609  let DecoderMethod = "DecodeVLD4DupInstruction";
1610}
1611
1612def VLD4DUPd8_UPD  : VLD4DUPWB<{0,0,0,0}, "8">;
1613def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">;
1614def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
1615
1616def VLD4DUPq8_UPD  : VLD4DUPWB<{0,0,1,0}, "8">;
1617def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">;
1618def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
1619
1620def VLD4DUPd8Pseudo_UPD  : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>;
1621def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>;
1622def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>, Sched<[WriteVLD2]>;
1623
1624} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1
1625
1626let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in {
1627
1628// Classes for VST* pseudo-instructions with multi-register operands.
1629// These are expanded to real instructions after register allocation.
1630class VSTQPseudo<InstrItinClass itin>
1631  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">;
1632class VSTQWBPseudo<InstrItinClass itin>
1633  : PseudoNLdSt<(outs GPR:$wb),
1634                (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin,
1635                "$addr.addr = $wb">;
1636class VSTQWBfixedPseudo<InstrItinClass itin>
1637  : PseudoNLdSt<(outs GPR:$wb),
1638                (ins addrmode6:$addr, QPR:$src), itin,
1639                "$addr.addr = $wb">;
1640class VSTQWBregisterPseudo<InstrItinClass itin>
1641  : PseudoNLdSt<(outs GPR:$wb),
1642                (ins addrmode6:$addr, rGPR:$offset, QPR:$src), itin,
1643                "$addr.addr = $wb">;
1644class VSTQQPseudo<InstrItinClass itin>
1645  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">;
1646class VSTQQWBPseudo<InstrItinClass itin>
1647  : PseudoNLdSt<(outs GPR:$wb),
1648                (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin,
1649                "$addr.addr = $wb">;
1650class VSTQQWBfixedPseudo<InstrItinClass itin>
1651  : PseudoNLdSt<(outs GPR:$wb),
1652                (ins addrmode6:$addr, QQPR:$src), itin,
1653                "$addr.addr = $wb">;
1654class VSTQQWBregisterPseudo<InstrItinClass itin>
1655  : PseudoNLdSt<(outs GPR:$wb),
1656                (ins addrmode6:$addr, rGPR:$offset, QQPR:$src), itin,
1657                "$addr.addr = $wb">;
1658
1659class VSTQQQQPseudo<InstrItinClass itin>
1660  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">;
1661class VSTQQQQWBPseudo<InstrItinClass itin>
1662  : PseudoNLdSt<(outs GPR:$wb),
1663                (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
1664                "$addr.addr = $wb">;
1665
1666//   VST1     : Vector Store (multiple single elements)
1667class VST1D<bits<4> op7_4, string Dt, Operand AddrMode>
1668  : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins AddrMode:$Rn, VecListOneD:$Vd),
1669          IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST1]> {
1670  let Rm = 0b1111;
1671  let Inst{4} = Rn{4};
1672  let DecoderMethod = "DecodeVLDST1Instruction";
1673}
1674class VST1Q<bits<4> op7_4, string Dt, Operand AddrMode>
1675  : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins AddrMode:$Rn, VecListDPair:$Vd),
1676          IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST2]> {
1677  let Rm = 0b1111;
1678  let Inst{5-4} = Rn{5-4};
1679  let DecoderMethod = "DecodeVLDST1Instruction";
1680}
1681
1682def  VST1d8   : VST1D<{0,0,0,?}, "8",  addrmode6align64>;
1683def  VST1d16  : VST1D<{0,1,0,?}, "16", addrmode6align64>;
1684def  VST1d32  : VST1D<{1,0,0,?}, "32", addrmode6align64>;
1685def  VST1d64  : VST1D<{1,1,0,?}, "64", addrmode6align64>;
1686
1687def  VST1q8   : VST1Q<{0,0,?,?}, "8",  addrmode6align64or128>;
1688def  VST1q16  : VST1Q<{0,1,?,?}, "16", addrmode6align64or128>;
1689def  VST1q32  : VST1Q<{1,0,?,?}, "32", addrmode6align64or128>;
1690def  VST1q64  : VST1Q<{1,1,?,?}, "64", addrmode6align64or128>;
1691
1692// ...with address register writeback:
1693multiclass VST1DWB<bits<4> op7_4, string Dt, Operand AddrMode> {
1694  def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb),
1695                     (ins AddrMode:$Rn, VecListOneD:$Vd), IIC_VLD1u,
1696                     "vst1", Dt, "$Vd, $Rn!",
1697                     "$Rn.addr = $wb", []>, Sched<[WriteVST1]> {
1698    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1699    let Inst{4} = Rn{4};
1700    let DecoderMethod = "DecodeVLDST1Instruction";
1701  }
1702  def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb),
1703                        (ins AddrMode:$Rn, rGPR:$Rm, VecListOneD:$Vd),
1704                        IIC_VLD1u,
1705                        "vst1", Dt, "$Vd, $Rn, $Rm",
1706                        "$Rn.addr = $wb", []>, Sched<[WriteVST1]> {
1707    let Inst{4} = Rn{4};
1708    let DecoderMethod = "DecodeVLDST1Instruction";
1709  }
1710}
1711multiclass VST1QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
1712  def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
1713                    (ins AddrMode:$Rn, VecListDPair:$Vd), IIC_VLD1x2u,
1714                     "vst1", Dt, "$Vd, $Rn!",
1715                     "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
1716    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1717    let Inst{5-4} = Rn{5-4};
1718    let DecoderMethod = "DecodeVLDST1Instruction";
1719  }
1720  def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
1721                        (ins AddrMode:$Rn, rGPR:$Rm, VecListDPair:$Vd),
1722                        IIC_VLD1x2u,
1723                        "vst1", Dt, "$Vd, $Rn, $Rm",
1724                        "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
1725    let Inst{5-4} = Rn{5-4};
1726    let DecoderMethod = "DecodeVLDST1Instruction";
1727  }
1728}
1729
1730defm VST1d8wb  : VST1DWB<{0,0,0,?}, "8",  addrmode6align64>;
1731defm VST1d16wb : VST1DWB<{0,1,0,?}, "16", addrmode6align64>;
1732defm VST1d32wb : VST1DWB<{1,0,0,?}, "32", addrmode6align64>;
1733defm VST1d64wb : VST1DWB<{1,1,0,?}, "64", addrmode6align64>;
1734
1735defm VST1q8wb  : VST1QWB<{0,0,?,?}, "8",  addrmode6align64or128>;
1736defm VST1q16wb : VST1QWB<{0,1,?,?}, "16", addrmode6align64or128>;
1737defm VST1q32wb : VST1QWB<{1,0,?,?}, "32", addrmode6align64or128>;
1738defm VST1q64wb : VST1QWB<{1,1,?,?}, "64", addrmode6align64or128>;
1739
1740// ...with 3 registers
1741class VST1D3<bits<4> op7_4, string Dt, Operand AddrMode>
1742  : NLdSt<0, 0b00, 0b0110, op7_4, (outs),
1743          (ins AddrMode:$Rn, VecListThreeD:$Vd),
1744          IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST3]> {
1745  let Rm = 0b1111;
1746  let Inst{4} = Rn{4};
1747  let DecoderMethod = "DecodeVLDST1Instruction";
1748}
1749multiclass VST1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> {
1750  def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
1751                    (ins AddrMode:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u,
1752                     "vst1", Dt, "$Vd, $Rn!",
1753                     "$Rn.addr = $wb", []>, Sched<[WriteVST3]> {
1754    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1755    let Inst{5-4} = Rn{5-4};
1756    let DecoderMethod = "DecodeVLDST1Instruction";
1757  }
1758  def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
1759                        (ins AddrMode:$Rn, rGPR:$Rm, VecListThreeD:$Vd),
1760                        IIC_VLD1x3u,
1761                        "vst1", Dt, "$Vd, $Rn, $Rm",
1762                        "$Rn.addr = $wb", []>, Sched<[WriteVST3]> {
1763    let Inst{5-4} = Rn{5-4};
1764    let DecoderMethod = "DecodeVLDST1Instruction";
1765  }
1766}
1767
1768def VST1d8T     : VST1D3<{0,0,0,?}, "8",  addrmode6align64>;
1769def VST1d16T    : VST1D3<{0,1,0,?}, "16", addrmode6align64>;
1770def VST1d32T    : VST1D3<{1,0,0,?}, "32", addrmode6align64>;
1771def VST1d64T    : VST1D3<{1,1,0,?}, "64", addrmode6align64>;
1772
1773defm VST1d8Twb  : VST1D3WB<{0,0,0,?}, "8",  addrmode6align64>;
1774defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16", addrmode6align64>;
1775defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32", addrmode6align64>;
1776defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64", addrmode6align64>;
1777
1778def VST1d8TPseudo             : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1779def VST1d8TPseudoWB_fixed     : VSTQQWBfixedPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>;
1780def VST1d8TPseudoWB_register  : VSTQQWBPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>;
1781def VST1d16TPseudo            : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1782def VST1d16TPseudoWB_fixed    : VSTQQWBfixedPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>;
1783def VST1d16TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>;
1784def VST1d32TPseudo            : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1785def VST1d32TPseudoWB_fixed    : VSTQQWBfixedPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>;
1786def VST1d32TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>;
1787def VST1d64TPseudo            : VSTQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1788def VST1d64TPseudoWB_fixed    : VSTQQWBfixedPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>;
1789def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>, Sched<[WriteVST3]>;
1790
1791def VST1q8HighTPseudo     : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1792def VST1q16HighTPseudo    : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1793def VST1q32HighTPseudo    : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1794def VST1q64HighTPseudo    : VSTQQQQPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1795
1796def VST1q8HighTPseudo_UPD  : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1797def VST1q16HighTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1798def VST1q32HighTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1799def VST1q64HighTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1800
1801def VST1q8LowTPseudo_UPD  : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1802def VST1q16LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1803def VST1q32LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1804def VST1q64LowTPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x3>, Sched<[WriteVST3]>;
1805
1806// ...with 4 registers
1807class VST1D4<bits<4> op7_4, string Dt, Operand AddrMode>
1808  : NLdSt<0, 0b00, 0b0010, op7_4, (outs),
1809          (ins AddrMode:$Rn, VecListFourD:$Vd),
1810          IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "",
1811          []>, Sched<[WriteVST4]> {
1812  let Rm = 0b1111;
1813  let Inst{5-4} = Rn{5-4};
1814  let DecoderMethod = "DecodeVLDST1Instruction";
1815}
1816multiclass VST1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> {
1817  def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
1818                    (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1x4u,
1819                     "vst1", Dt, "$Vd, $Rn!",
1820                     "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
1821    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1822    let Inst{5-4} = Rn{5-4};
1823    let DecoderMethod = "DecodeVLDST1Instruction";
1824  }
1825  def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
1826                        (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd),
1827                        IIC_VLD1x4u,
1828                        "vst1", Dt, "$Vd, $Rn, $Rm",
1829                        "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
1830    let Inst{5-4} = Rn{5-4};
1831    let DecoderMethod = "DecodeVLDST1Instruction";
1832  }
1833}
1834
1835def VST1d8Q     : VST1D4<{0,0,?,?}, "8",  addrmode6align64or128or256>;
1836def VST1d16Q    : VST1D4<{0,1,?,?}, "16", addrmode6align64or128or256>;
1837def VST1d32Q    : VST1D4<{1,0,?,?}, "32", addrmode6align64or128or256>;
1838def VST1d64Q    : VST1D4<{1,1,?,?}, "64", addrmode6align64or128or256>;
1839
1840defm VST1d8Qwb  : VST1D4WB<{0,0,?,?}, "8",  addrmode6align64or128or256>;
1841defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>;
1842defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>;
1843defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>;
1844
1845def VST1d8QPseudo             : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1846def VST1d8QPseudoWB_fixed     : VSTQQWBfixedPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>;
1847def VST1d8QPseudoWB_register  : VSTQQWBPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>;
1848def VST1d16QPseudo            : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1849def VST1d16QPseudoWB_fixed    : VSTQQWBfixedPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>;
1850def VST1d16QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>;
1851def VST1d32QPseudo            : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1852def VST1d32QPseudoWB_fixed    : VSTQQWBfixedPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>;
1853def VST1d32QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>;
1854def VST1d64QPseudo            : VSTQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1855def VST1d64QPseudoWB_fixed    : VSTQQWBfixedPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>;
1856def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>, Sched<[WriteVST4]>;
1857
1858def VST1q8HighQPseudo     : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1859def VST1q16HighQPseudo    : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1860def VST1q32HighQPseudo    : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1861def VST1q64HighQPseudo    : VSTQQQQPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1862
1863def VST1q8HighQPseudo_UPD  : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1864def VST1q16HighQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1865def VST1q32HighQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1866def VST1q64HighQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1867
1868def VST1q8LowQPseudo_UPD  : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1869def VST1q16LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1870def VST1q32LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1871def VST1q64LowQPseudo_UPD : VSTQQQQWBPseudo<IIC_VST1x4>, Sched<[WriteVST4]>;
1872
1873//   VST2     : Vector Store (multiple 2-element structures)
1874class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
1875            InstrItinClass itin, Operand AddrMode>
1876  : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins AddrMode:$Rn, VdTy:$Vd),
1877          itin, "vst2", Dt, "$Vd, $Rn", "", []> {
1878  let Rm = 0b1111;
1879  let Inst{5-4} = Rn{5-4};
1880  let DecoderMethod = "DecodeVLDST2Instruction";
1881}
1882
1883def  VST2d8   : VST2<0b1000, {0,0,?,?}, "8",  VecListDPair, IIC_VST2,
1884                     addrmode6align64or128>, Sched<[WriteVST2]>;
1885def  VST2d16  : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2,
1886                     addrmode6align64or128>, Sched<[WriteVST2]>;
1887def  VST2d32  : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2,
1888                     addrmode6align64or128>, Sched<[WriteVST2]>;
1889
1890def  VST2q8   : VST2<0b0011, {0,0,?,?}, "8",  VecListFourD, IIC_VST2x2,
1891                     addrmode6align64or128or256>, Sched<[WriteVST4]>;
1892def  VST2q16  : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2,
1893                     addrmode6align64or128or256>, Sched<[WriteVST4]>;
1894def  VST2q32  : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2,
1895                     addrmode6align64or128or256>, Sched<[WriteVST4]>;
1896
1897def  VST2q8Pseudo  : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>;
1898def  VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>;
1899def  VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>, Sched<[WriteVST4]>;
1900
1901// ...with address register writeback:
1902multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt,
1903                   RegisterOperand VdTy, Operand AddrMode> {
1904  def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
1905                     (ins AddrMode:$Rn, VdTy:$Vd), IIC_VLD1u,
1906                     "vst2", Dt, "$Vd, $Rn!",
1907                     "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
1908    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1909    let Inst{5-4} = Rn{5-4};
1910    let DecoderMethod = "DecodeVLDST2Instruction";
1911  }
1912  def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
1913                        (ins AddrMode:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u,
1914                        "vst2", Dt, "$Vd, $Rn, $Rm",
1915                        "$Rn.addr = $wb", []>, Sched<[WriteVST2]> {
1916    let Inst{5-4} = Rn{5-4};
1917    let DecoderMethod = "DecodeVLDST2Instruction";
1918  }
1919}
1920multiclass VST2QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
1921  def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
1922                     (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1u,
1923                     "vst2", Dt, "$Vd, $Rn!",
1924                     "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
1925    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
1926    let Inst{5-4} = Rn{5-4};
1927    let DecoderMethod = "DecodeVLDST2Instruction";
1928  }
1929  def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
1930                        (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd),
1931                        IIC_VLD1u,
1932                        "vst2", Dt, "$Vd, $Rn, $Rm",
1933                        "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
1934    let Inst{5-4} = Rn{5-4};
1935    let DecoderMethod = "DecodeVLDST2Instruction";
1936  }
1937}
1938
1939defm VST2d8wb    : VST2DWB<0b1000, {0,0,?,?}, "8",  VecListDPair,
1940                           addrmode6align64or128>;
1941defm VST2d16wb   : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair,
1942                           addrmode6align64or128>;
1943defm VST2d32wb   : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair,
1944                           addrmode6align64or128>;
1945
1946defm VST2q8wb    : VST2QWB<{0,0,?,?}, "8", addrmode6align64or128or256>;
1947defm VST2q16wb   : VST2QWB<{0,1,?,?}, "16", addrmode6align64or128or256>;
1948defm VST2q32wb   : VST2QWB<{1,0,?,?}, "32", addrmode6align64or128or256>;
1949
1950def VST2q8PseudoWB_fixed     : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
1951def VST2q16PseudoWB_fixed    : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
1952def VST2q32PseudoWB_fixed    : VSTQQWBfixedPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
1953def VST2q8PseudoWB_register  : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
1954def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
1955def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>, Sched<[WriteVST4]>;
1956
1957// ...with double-spaced registers
1958def VST2b8      : VST2<0b1001, {0,0,?,?}, "8",  VecListDPairSpaced, IIC_VST2,
1959                      addrmode6align64or128>;
1960def VST2b16     : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2,
1961                      addrmode6align64or128>;
1962def VST2b32     : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2,
1963                      addrmode6align64or128>;
1964defm VST2b8wb   : VST2DWB<0b1001, {0,0,?,?}, "8",  VecListDPairSpaced,
1965                          addrmode6align64or128>;
1966defm VST2b16wb  : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced,
1967                          addrmode6align64or128>;
1968defm VST2b32wb  : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced,
1969                          addrmode6align64or128>;
1970
1971//   VST3     : Vector Store (multiple 3-element structures)
1972class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
1973  : NLdSt<0, 0b00, op11_8, op7_4, (outs),
1974          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3,
1975          "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []>, Sched<[WriteVST3]> {
1976  let Rm = 0b1111;
1977  let Inst{4} = Rn{4};
1978  let DecoderMethod = "DecodeVLDST3Instruction";
1979}
1980
1981def  VST3d8   : VST3D<0b0100, {0,0,0,?}, "8">;
1982def  VST3d16  : VST3D<0b0100, {0,1,0,?}, "16">;
1983def  VST3d32  : VST3D<0b0100, {1,0,0,?}, "32">;
1984
1985def  VST3d8Pseudo  : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
1986def  VST3d16Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
1987def  VST3d32Pseudo : VSTQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
1988
1989// ...with address register writeback:
1990class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
1991  : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
1992          (ins addrmode6:$Rn, am6offset:$Rm,
1993           DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u,
1994          "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
1995          "$Rn.addr = $wb", []>, Sched<[WriteVST3]> {
1996  let Inst{4} = Rn{4};
1997  let DecoderMethod = "DecodeVLDST3Instruction";
1998}
1999
2000def VST3d8_UPD  : VST3DWB<0b0100, {0,0,0,?}, "8">;
2001def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">;
2002def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">;
2003
2004def VST3d8Pseudo_UPD  : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2005def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2006def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2007
2008// ...with double-spaced registers:
2009def VST3q8      : VST3D<0b0101, {0,0,0,?}, "8">;
2010def VST3q16     : VST3D<0b0101, {0,1,0,?}, "16">;
2011def VST3q32     : VST3D<0b0101, {1,0,0,?}, "32">;
2012def VST3q8_UPD  : VST3DWB<0b0101, {0,0,0,?}, "8">;
2013def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">;
2014def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">;
2015
2016def VST3q8Pseudo_UPD  : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2017def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2018def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2019
2020// ...alternate versions to be allocated odd register numbers:
2021def VST3q8oddPseudo   : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
2022def VST3q16oddPseudo  : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
2023def VST3q32oddPseudo  : VSTQQQQPseudo<IIC_VST3>, Sched<[WriteVST3]>;
2024
2025def VST3q8oddPseudo_UPD  : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2026def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2027def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>, Sched<[WriteVST3]>;
2028
2029//   VST4     : Vector Store (multiple 4-element structures)
2030class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt>
2031  : NLdSt<0, 0b00, op11_8, op7_4, (outs),
2032          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
2033          IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn",
2034          "", []>, Sched<[WriteVST4]> {
2035  let Rm = 0b1111;
2036  let Inst{5-4} = Rn{5-4};
2037  let DecoderMethod = "DecodeVLDST4Instruction";
2038}
2039
2040def  VST4d8   : VST4D<0b0000, {0,0,?,?}, "8">;
2041def  VST4d16  : VST4D<0b0000, {0,1,?,?}, "16">;
2042def  VST4d32  : VST4D<0b0000, {1,0,?,?}, "32">;
2043
2044def  VST4d8Pseudo  : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
2045def  VST4d16Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
2046def  VST4d32Pseudo : VSTQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
2047
2048// ...with address register writeback:
2049class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
2050  : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
2051          (ins addrmode6:$Rn, am6offset:$Rm,
2052           DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u,
2053           "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
2054          "$Rn.addr = $wb", []>, Sched<[WriteVST4]> {
2055  let Inst{5-4} = Rn{5-4};
2056  let DecoderMethod = "DecodeVLDST4Instruction";
2057}
2058
2059def VST4d8_UPD  : VST4DWB<0b0000, {0,0,?,?}, "8">;
2060def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">;
2061def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">;
2062
2063def VST4d8Pseudo_UPD  : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2064def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2065def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2066
2067// ...with double-spaced registers:
2068def VST4q8      : VST4D<0b0001, {0,0,?,?}, "8">;
2069def VST4q16     : VST4D<0b0001, {0,1,?,?}, "16">;
2070def VST4q32     : VST4D<0b0001, {1,0,?,?}, "32">;
2071def VST4q8_UPD  : VST4DWB<0b0001, {0,0,?,?}, "8">;
2072def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">;
2073def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">;
2074
2075def VST4q8Pseudo_UPD  : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2076def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2077def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2078
2079// ...alternate versions to be allocated odd register numbers:
2080def VST4q8oddPseudo   : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
2081def VST4q16oddPseudo  : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
2082def VST4q32oddPseudo  : VSTQQQQPseudo<IIC_VST4>, Sched<[WriteVST4]>;
2083
2084def VST4q8oddPseudo_UPD  : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2085def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2086def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>, Sched<[WriteVST4]>;
2087
2088} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1
2089
2090// Classes for VST*LN pseudo-instructions with multi-register operands.
2091// These are expanded to real instructions after register allocation.
2092class VSTQLNPseudo<InstrItinClass itin>
2093  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
2094                itin, "">;
2095class VSTQLNWBPseudo<InstrItinClass itin>
2096  : PseudoNLdSt<(outs GPR:$wb),
2097                (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
2098                 nohash_imm:$lane), itin, "$addr.addr = $wb">;
2099class VSTQQLNPseudo<InstrItinClass itin>
2100  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
2101                itin, "">;
2102class VSTQQLNWBPseudo<InstrItinClass itin>
2103  : PseudoNLdSt<(outs GPR:$wb),
2104                (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
2105                 nohash_imm:$lane), itin, "$addr.addr = $wb">;
2106class VSTQQQQLNPseudo<InstrItinClass itin>
2107  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
2108                itin, "">;
2109class VSTQQQQLNWBPseudo<InstrItinClass itin>
2110  : PseudoNLdSt<(outs GPR:$wb),
2111                (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
2112                 nohash_imm:$lane), itin, "$addr.addr = $wb">;
2113
2114//   VST1LN   : Vector Store (single element from one lane)
2115class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
2116             PatFrag StoreOp, SDNode ExtractOp, Operand AddrMode>
2117  : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
2118          (ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane),
2119          IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "",
2120          [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]>,
2121     Sched<[WriteVST1]> {
2122  let Rm = 0b1111;
2123  let DecoderMethod = "DecodeVST1LN";
2124}
2125class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
2126  : VSTQLNPseudo<IIC_VST1ln>, Sched<[WriteVST1]> {
2127  let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
2128                          addrmode6:$addr)];
2129}
2130
2131def VST1LNd8  : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8,
2132                       ARMvgetlaneu, addrmode6> {
2133  let Inst{7-5} = lane{2-0};
2134}
2135def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16,
2136                       ARMvgetlaneu, addrmode6> {
2137  let Inst{7-6} = lane{1-0};
2138  let Inst{4}   = Rn{4};
2139}
2140
2141def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt,
2142                       addrmode6oneL32> {
2143  let Inst{7}   = lane{0};
2144  let Inst{5-4} = Rn{5-4};
2145}
2146
2147def VST1LNq8Pseudo  : VST1QLNPseudo<v16i8, truncstorei8, ARMvgetlaneu>;
2148def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, ARMvgetlaneu>;
2149def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>;
2150
2151let Predicates = [HasNEON] in {
2152def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr),
2153          (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
2154def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr),
2155          (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
2156
2157def : Pat<(store (extractelt (v4f16 DPR:$src), imm:$lane), addrmode6:$addr),
2158          (VST1LNd16 addrmode6:$addr, DPR:$src, imm:$lane)>;
2159def : Pat<(store (extractelt (v8f16 QPR:$src), imm:$lane), addrmode6:$addr),
2160          (VST1LNq16Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
2161}
2162
2163// ...with address register writeback:
2164class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
2165               PatFrag StoreOp, SDNode ExtractOp, Operand AdrMode>
2166  : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
2167          (ins AdrMode:$Rn, am6offset:$Rm,
2168           DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt,
2169          "\\{$Vd[$lane]\\}, $Rn$Rm",
2170          "$Rn.addr = $wb",
2171          [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane),
2172                                  AdrMode:$Rn, am6offset:$Rm))]>,
2173    Sched<[WriteVST1]> {
2174  let DecoderMethod = "DecodeVST1LN";
2175}
2176class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
2177  : VSTQLNWBPseudo<IIC_VST1lnu>, Sched<[WriteVST1]> {
2178  let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
2179                                        addrmode6:$addr, am6offset:$offset))];
2180}
2181
2182def VST1LNd8_UPD  : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8,
2183                             ARMvgetlaneu, addrmode6> {
2184  let Inst{7-5} = lane{2-0};
2185}
2186def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16,
2187                             ARMvgetlaneu, addrmode6> {
2188  let Inst{7-6} = lane{1-0};
2189  let Inst{4}   = Rn{4};
2190}
2191def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store,
2192                             extractelt, addrmode6oneL32> {
2193  let Inst{7}   = lane{0};
2194  let Inst{5-4} = Rn{5-4};
2195}
2196
2197def VST1LNq8Pseudo_UPD  : VST1QLNWBPseudo<v16i8, post_truncsti8, ARMvgetlaneu>;
2198def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,ARMvgetlaneu>;
2199def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>;
2200
2201let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in {
2202
2203//   VST2LN   : Vector Store (single 2-element structure from one lane)
2204class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
2205  : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
2206          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane),
2207          IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn",
2208          "", []>, Sched<[WriteVST1]> {
2209  let Rm = 0b1111;
2210  let Inst{4}   = Rn{4};
2211  let DecoderMethod = "DecodeVST2LN";
2212}
2213
2214def VST2LNd8  : VST2LN<0b0001, {?,?,?,?}, "8"> {
2215  let Inst{7-5} = lane{2-0};
2216}
2217def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> {
2218  let Inst{7-6} = lane{1-0};
2219}
2220def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> {
2221  let Inst{7}   = lane{0};
2222}
2223
2224def VST2LNd8Pseudo  : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
2225def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
2226def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
2227
2228// ...with double-spaced registers:
2229def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> {
2230  let Inst{7-6} = lane{1-0};
2231  let Inst{4}   = Rn{4};
2232}
2233def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> {
2234  let Inst{7}   = lane{0};
2235  let Inst{4}   = Rn{4};
2236}
2237
2238def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
2239def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>, Sched<[WriteVST1]>;
2240
2241// ...with address register writeback:
2242class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
2243  : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
2244          (ins addrmode6:$Rn, am6offset:$Rm,
2245           DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt,
2246          "\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm",
2247          "$Rn.addr = $wb", []> {
2248  let Inst{4}   = Rn{4};
2249  let DecoderMethod = "DecodeVST2LN";
2250}
2251
2252def VST2LNd8_UPD  : VST2LNWB<0b0001, {?,?,?,?}, "8"> {
2253  let Inst{7-5} = lane{2-0};
2254}
2255def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> {
2256  let Inst{7-6} = lane{1-0};
2257}
2258def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> {
2259  let Inst{7}   = lane{0};
2260}
2261
2262def VST2LNd8Pseudo_UPD  : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
2263def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
2264def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
2265
2266def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> {
2267  let Inst{7-6} = lane{1-0};
2268}
2269def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> {
2270  let Inst{7}   = lane{0};
2271}
2272
2273def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
2274def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>, Sched<[WriteVST1]>;
2275
2276//   VST3LN   : Vector Store (single 3-element structure from one lane)
2277class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
2278  : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
2279          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3,
2280           nohash_imm:$lane), IIC_VST3ln, "vst3", Dt,
2281          "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []>,
2282    Sched<[WriteVST2]> {
2283  let Rm = 0b1111;
2284  let DecoderMethod = "DecodeVST3LN";
2285}
2286
2287def VST3LNd8  : VST3LN<0b0010, {?,?,?,0}, "8"> {
2288  let Inst{7-5} = lane{2-0};
2289}
2290def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> {
2291  let Inst{7-6} = lane{1-0};
2292}
2293def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> {
2294  let Inst{7}   = lane{0};
2295}
2296
2297def VST3LNd8Pseudo  : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>;
2298def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>;
2299def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>, Sched<[WriteVST2]>;
2300
2301// ...with double-spaced registers:
2302def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> {
2303  let Inst{7-6} = lane{1-0};
2304}
2305def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> {
2306  let Inst{7}   = lane{0};
2307}
2308
2309def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
2310def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
2311
2312// ...with address register writeback:
2313class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
2314  : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
2315          (ins addrmode6:$Rn, am6offset:$Rm,
2316           DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane),
2317          IIC_VST3lnu, "vst3", Dt,
2318          "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm",
2319          "$Rn.addr = $wb", []> {
2320  let DecoderMethod = "DecodeVST3LN";
2321}
2322
2323def VST3LNd8_UPD  : VST3LNWB<0b0010, {?,?,?,0}, "8"> {
2324  let Inst{7-5} = lane{2-0};
2325}
2326def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> {
2327  let Inst{7-6} = lane{1-0};
2328}
2329def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> {
2330  let Inst{7}   = lane{0};
2331}
2332
2333def VST3LNd8Pseudo_UPD  : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
2334def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
2335def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
2336
2337def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> {
2338  let Inst{7-6} = lane{1-0};
2339}
2340def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> {
2341  let Inst{7}   = lane{0};
2342}
2343
2344def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
2345def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>, Sched<[WriteVST2]>;
2346
2347//   VST4LN   : Vector Store (single 4-element structure from one lane)
2348class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
2349  : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
2350          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4,
2351           nohash_imm:$lane), IIC_VST4ln, "vst4", Dt,
2352          "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn",
2353          "", []>, Sched<[WriteVST2]> {
2354  let Rm = 0b1111;
2355  let Inst{4} = Rn{4};
2356  let DecoderMethod = "DecodeVST4LN";
2357}
2358
2359def VST4LNd8  : VST4LN<0b0011, {?,?,?,?}, "8"> {
2360  let Inst{7-5} = lane{2-0};
2361}
2362def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> {
2363  let Inst{7-6} = lane{1-0};
2364}
2365def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> {
2366  let Inst{7}   = lane{0};
2367  let Inst{5} = Rn{5};
2368}
2369
2370def VST4LNd8Pseudo  : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
2371def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
2372def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
2373
2374// ...with double-spaced registers:
2375def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> {
2376  let Inst{7-6} = lane{1-0};
2377}
2378def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> {
2379  let Inst{7}   = lane{0};
2380  let Inst{5} = Rn{5};
2381}
2382
2383def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
2384def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>, Sched<[WriteVST2]>;
2385
2386// ...with address register writeback:
2387class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
2388  : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
2389          (ins addrmode6:$Rn, am6offset:$Rm,
2390           DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
2391          IIC_VST4lnu, "vst4", Dt,
2392  "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm",
2393          "$Rn.addr = $wb", []> {
2394  let Inst{4} = Rn{4};
2395  let DecoderMethod = "DecodeVST4LN";
2396}
2397
2398def VST4LNd8_UPD  : VST4LNWB<0b0011, {?,?,?,?}, "8"> {
2399  let Inst{7-5} = lane{2-0};
2400}
2401def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> {
2402  let Inst{7-6} = lane{1-0};
2403}
2404def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> {
2405  let Inst{7}   = lane{0};
2406  let Inst{5} = Rn{5};
2407}
2408
2409def VST4LNd8Pseudo_UPD  : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
2410def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
2411def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
2412
2413def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> {
2414  let Inst{7-6} = lane{1-0};
2415}
2416def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> {
2417  let Inst{7}   = lane{0};
2418  let Inst{5} = Rn{5};
2419}
2420
2421def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
2422def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>, Sched<[WriteVST2]>;
2423
2424} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1
2425
2426// Use vld1/vst1 for unaligned f64 load / store
2427let Predicates = [IsLE,HasNEON] in {
2428def : Pat<(f64 (hword_alignedload addrmode6:$addr)),
2429          (VLD1d16 addrmode6:$addr)>;
2430def : Pat<(hword_alignedstore (f64 DPR:$value), addrmode6:$addr),
2431          (VST1d16 addrmode6:$addr, DPR:$value)>;
2432def : Pat<(f64 (byte_alignedload addrmode6:$addr)),
2433          (VLD1d8 addrmode6:$addr)>;
2434def : Pat<(byte_alignedstore (f64 DPR:$value), addrmode6:$addr),
2435          (VST1d8 addrmode6:$addr, DPR:$value)>;
2436}
2437let Predicates = [IsBE,HasNEON] in {
2438def : Pat<(f64 (non_word_alignedload addrmode6:$addr)),
2439          (VLD1d64 addrmode6:$addr)>;
2440def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr),
2441          (VST1d64 addrmode6:$addr, DPR:$value)>;
2442}
2443
2444// Use vld1/vst1 for Q and QQ. Also use them for unaligned v2f64
2445// load / store if it's legal.
2446let Predicates = [HasNEON] in {
2447def : Pat<(v2f64 (dword_alignedload addrmode6:$addr)),
2448          (VLD1q64 addrmode6:$addr)>;
2449def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
2450          (VST1q64 addrmode6:$addr, QPR:$value)>;
2451}
2452let Predicates = [IsLE,HasNEON] in {
2453def : Pat<(v2f64 (word_alignedload addrmode6:$addr)),
2454          (VLD1q32 addrmode6:$addr)>;
2455def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
2456          (VST1q32 addrmode6:$addr, QPR:$value)>;
2457def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)),
2458          (VLD1q16 addrmode6:$addr)>;
2459def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
2460          (VST1q16 addrmode6:$addr, QPR:$value)>;
2461def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)),
2462          (VLD1q8 addrmode6:$addr)>;
2463def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
2464          (VST1q8 addrmode6:$addr, QPR:$value)>;
2465}
2466
2467//===----------------------------------------------------------------------===//
2468// Instruction Classes
2469//===----------------------------------------------------------------------===//
2470
2471// Basic 2-register operations: double- and quad-register.
2472class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2473           bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
2474           string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
2475  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
2476        (ins DPR:$Vm), IIC_VUNAD, OpcodeStr, Dt,"$Vd, $Vm", "",
2477        [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm))))]>;
2478class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2479           bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
2480           string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
2481  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
2482        (ins QPR:$Vm), IIC_VUNAQ, OpcodeStr, Dt,"$Vd, $Vm", "",
2483        [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm))))]>;
2484
2485// Basic 2-register intrinsics, both double- and quad-register.
2486class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2487              bits<2> op17_16, bits<5> op11_7, bit op4,
2488              InstrItinClass itin, string OpcodeStr, string Dt,
2489              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2490  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
2491        (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2492        [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
2493class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2494              bits<2> op17_16, bits<5> op11_7, bit op4,
2495              InstrItinClass itin, string OpcodeStr, string Dt,
2496              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2497  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
2498        (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2499        [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
2500
2501// Same as above, but not predicated.
2502class N2VDIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7,
2503              InstrItinClass itin, string OpcodeStr, string Dt,
2504              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2505  : N2Vnp<op19_18, op17_16, op10_8, op7, 0,  (outs DPR:$Vd), (ins DPR:$Vm),
2506          itin, OpcodeStr, Dt,
2507          [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
2508
2509class N2VQIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7,
2510              InstrItinClass itin, string OpcodeStr, string Dt,
2511              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2512  : N2Vnp<op19_18, op17_16, op10_8, op7, 1,  (outs QPR:$Vd), (ins QPR:$Vm),
2513          itin, OpcodeStr, Dt,
2514          [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
2515
2516// Similar to NV2VQIntnp with some more encoding bits exposed (crypto).
2517class N2VQIntXnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6,
2518              bit op7, InstrItinClass itin, string OpcodeStr, string Dt,
2519              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2520  : N2Vnp<op19_18, op17_16, op10_8, op7, op6,  (outs QPR:$Vd), (ins QPR:$Vm),
2521          itin, OpcodeStr, Dt,
2522          [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
2523
2524// Same as N2VQIntXnp but with Vd as a src register.
2525class N2VQIntX2np<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6,
2526              bit op7, InstrItinClass itin, string OpcodeStr, string Dt,
2527              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2528  : N2Vnp<op19_18, op17_16, op10_8, op7, op6,
2529          (outs QPR:$Vd), (ins QPR:$src, QPR:$Vm),
2530          itin, OpcodeStr, Dt,
2531          [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vm))))]> {
2532  let Constraints = "$src = $Vd";
2533}
2534
2535// Narrow 2-register operations.
2536class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2537           bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
2538           InstrItinClass itin, string OpcodeStr, string Dt,
2539           ValueType TyD, ValueType TyQ, SDNode OpNode>
2540  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
2541        (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2542        [(set DPR:$Vd, (TyD (OpNode (TyQ QPR:$Vm))))]>;
2543
2544// Narrow 2-register intrinsics.
2545class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2546              bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
2547              InstrItinClass itin, string OpcodeStr, string Dt,
2548              ValueType TyD, ValueType TyQ, SDPatternOperator IntOp>
2549  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
2550        (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2551        [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vm))))]>;
2552
2553// Long 2-register operations (currently only used for VMOVL).
2554class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2555           bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
2556           InstrItinClass itin, string OpcodeStr, string Dt,
2557           ValueType TyQ, ValueType TyD, SDNode OpNode>
2558  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
2559        (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2560        [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vm))))]>;
2561
2562// Long 2-register intrinsics.
2563class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
2564              bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
2565              InstrItinClass itin, string OpcodeStr, string Dt,
2566              ValueType TyQ, ValueType TyD, SDPatternOperator IntOp>
2567  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
2568        (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
2569        [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vm))))]>;
2570
2571// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register.
2572class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt>
2573  : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm),
2574        (ins DPR:$src1, DPR:$src2), IIC_VPERMD,
2575        OpcodeStr, Dt, "$Vd, $Vm",
2576        "$src1 = $Vd, $src2 = $Vm", []>;
2577class N2VQShuffle<bits<2> op19_18, bits<5> op11_7,
2578                  InstrItinClass itin, string OpcodeStr, string Dt>
2579  : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm),
2580        (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm",
2581        "$src1 = $Vd, $src2 = $Vm", []>;
2582
2583// Basic 3-register operations: double- and quad-register.
2584class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2585           InstrItinClass itin, string OpcodeStr, string Dt,
2586           ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
2587  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2588        (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2589        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2590        [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
2591  // All of these have a two-operand InstAlias.
2592  let TwoOperandAliasConstraint = "$Vn = $Vd";
2593  let isCommutable = Commutable;
2594}
2595// Same as N3VD but no data type.
2596class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2597           InstrItinClass itin, string OpcodeStr,
2598           ValueType ResTy, ValueType OpTy,
2599           SDNode OpNode, bit Commutable>
2600  : N3VX<op24, op23, op21_20, op11_8, 0, op4,
2601         (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2602         OpcodeStr, "$Vd, $Vn, $Vm", "",
2603         [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{
2604  // All of these have a two-operand InstAlias.
2605  let TwoOperandAliasConstraint = "$Vn = $Vd";
2606  let isCommutable = Commutable;
2607}
2608
2609class N3VDSL<bits<2> op21_20, bits<4> op11_8,
2610             InstrItinClass itin, string OpcodeStr, string Dt,
2611             ValueType Ty, SDNode ShOp>
2612  : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
2613        (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2614        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2615        [(set (Ty DPR:$Vd),
2616              (Ty (ShOp (Ty DPR:$Vn),
2617                        (Ty (ARMvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> {
2618  // All of these have a two-operand InstAlias.
2619  let TwoOperandAliasConstraint = "$Vn = $Vd";
2620  let isCommutable = 0;
2621}
2622class N3VDSL16<bits<2> op21_20, bits<4> op11_8,
2623               string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
2624  : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
2625        (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2626        NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane","",
2627        [(set (Ty DPR:$Vd),
2628              (Ty (ShOp (Ty DPR:$Vn),
2629                        (Ty (ARMvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
2630  // All of these have a two-operand InstAlias.
2631  let TwoOperandAliasConstraint = "$Vn = $Vd";
2632  let isCommutable = 0;
2633}
2634
2635class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2636           InstrItinClass itin, string OpcodeStr, string Dt,
2637           ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
2638  : N3V<op24, op23, op21_20, op11_8, 1, op4,
2639        (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2640        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2641        [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
2642  // All of these have a two-operand InstAlias.
2643  let TwoOperandAliasConstraint = "$Vn = $Vd";
2644  let isCommutable = Commutable;
2645}
2646class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2647           InstrItinClass itin, string OpcodeStr,
2648           ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
2649  : N3VX<op24, op23, op21_20, op11_8, 1, op4,
2650         (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2651         OpcodeStr, "$Vd, $Vn, $Vm", "",
2652         [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{
2653  // All of these have a two-operand InstAlias.
2654  let TwoOperandAliasConstraint = "$Vn = $Vd";
2655  let isCommutable = Commutable;
2656}
2657class N3VQSL<bits<2> op21_20, bits<4> op11_8,
2658             InstrItinClass itin, string OpcodeStr, string Dt,
2659             ValueType ResTy, ValueType OpTy, SDNode ShOp>
2660  : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
2661        (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2662        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2663        [(set (ResTy QPR:$Vd),
2664              (ResTy (ShOp (ResTy QPR:$Vn),
2665                           (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm),
2666                                                imm:$lane)))))]> {
2667  // All of these have a two-operand InstAlias.
2668  let TwoOperandAliasConstraint = "$Vn = $Vd";
2669  let isCommutable = 0;
2670}
2671class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt,
2672               ValueType ResTy, ValueType OpTy, SDNode ShOp>
2673  : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
2674        (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2675        NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane", "",
2676        [(set (ResTy QPR:$Vd),
2677              (ResTy (ShOp (ResTy QPR:$Vn),
2678                           (ResTy (ARMvduplane (OpTy DPR_8:$Vm),
2679                                                imm:$lane)))))]> {
2680  // All of these have a two-operand InstAlias.
2681  let TwoOperandAliasConstraint = "$Vn = $Vd";
2682  let isCommutable = 0;
2683}
2684
2685// Basic 3-register intrinsics, both double- and quad-register.
2686class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2687              Format f, InstrItinClass itin, string OpcodeStr, string Dt,
2688              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable>
2689  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2690        (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin,
2691        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2692        [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
2693  // All of these have a two-operand InstAlias.
2694  let TwoOperandAliasConstraint = "$Vn = $Vd";
2695  let isCommutable = Commutable;
2696}
2697
2698class N3VDIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
2699                bit op4, Format f, InstrItinClass itin, string OpcodeStr,
2700                string Dt, ValueType ResTy, ValueType OpTy,
2701                SDPatternOperator IntOp, bit Commutable>
2702  : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
2703          (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt,
2704          [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
2705
2706class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2707                string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp>
2708  : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
2709        (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2710        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2711        [(set (Ty DPR:$Vd),
2712              (Ty (IntOp (Ty DPR:$Vn),
2713                         (Ty (ARMvduplane (Ty DPR_VFP2:$Vm),
2714                                           imm:$lane)))))]> {
2715  let isCommutable = 0;
2716}
2717
2718class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2719                  string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp>
2720  : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
2721        (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2722        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2723        [(set (Ty DPR:$Vd),
2724              (Ty (IntOp (Ty DPR:$Vn),
2725                         (Ty (ARMvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
2726  let isCommutable = 0;
2727}
2728class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2729              Format f, InstrItinClass itin, string OpcodeStr, string Dt,
2730              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2731  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2732        (outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin,
2733        OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
2734        [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> {
2735  let TwoOperandAliasConstraint = "$Vm = $Vd";
2736  let isCommutable = 0;
2737}
2738
2739class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2740              Format f, InstrItinClass itin, string OpcodeStr, string Dt,
2741              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable>
2742  : N3V<op24, op23, op21_20, op11_8, 1, op4,
2743        (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin,
2744        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
2745        [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
2746  // All of these have a two-operand InstAlias.
2747  let TwoOperandAliasConstraint = "$Vn = $Vd";
2748  let isCommutable = Commutable;
2749}
2750
2751class N3VQIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
2752                bit op4, Format f, InstrItinClass itin, string OpcodeStr,
2753                string Dt, ValueType ResTy, ValueType OpTy,
2754                SDPatternOperator IntOp, bit Commutable>
2755  : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
2756          (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr, Dt,
2757          [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>;
2758
2759// Same as N3VQIntnp but with Vd as a src register.
2760class N3VQInt3np<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
2761                bit op4, Format f, InstrItinClass itin, string OpcodeStr,
2762                string Dt, ValueType ResTy, ValueType OpTy,
2763                SDPatternOperator IntOp, bit Commutable>
2764  : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
2765          (outs QPR:$Vd), (ins QPR:$src, QPR:$Vn, QPR:$Vm),
2766          f, itin, OpcodeStr, Dt,
2767          [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vn),
2768                                       (OpTy QPR:$Vm))))]> {
2769  let Constraints = "$src = $Vd";
2770}
2771
2772class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2773                string OpcodeStr, string Dt,
2774                ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2775  : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
2776        (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2777        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2778        [(set (ResTy QPR:$Vd),
2779              (ResTy (IntOp (ResTy QPR:$Vn),
2780                            (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm),
2781                                                 imm:$lane)))))]> {
2782  let isCommutable = 0;
2783}
2784class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2785                  string OpcodeStr, string Dt,
2786                  ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2787  : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
2788        (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2789        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
2790        [(set (ResTy QPR:$Vd),
2791              (ResTy (IntOp (ResTy QPR:$Vn),
2792                            (ResTy (ARMvduplane (OpTy DPR_8:$Vm),
2793                                                 imm:$lane)))))]> {
2794  let isCommutable = 0;
2795}
2796class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2797              Format f, InstrItinClass itin, string OpcodeStr, string Dt,
2798              ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2799  : N3V<op24, op23, op21_20, op11_8, 1, op4,
2800        (outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin,
2801        OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
2802        [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> {
2803  let TwoOperandAliasConstraint = "$Vm = $Vd";
2804  let isCommutable = 0;
2805}
2806
2807// Multiply-Add/Sub operations: double- and quad-register.
2808class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2809                InstrItinClass itin, string OpcodeStr, string Dt,
2810                ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode>
2811  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2812        (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2813        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2814        [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
2815                             (Ty (MulOp DPR:$Vn, DPR:$Vm)))))]>;
2816
2817class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2818                  string OpcodeStr, string Dt,
2819                  ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp>
2820  : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
2821        (outs DPR:$Vd),
2822        (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2823        NVMulSLFrm, itin,
2824        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2825        [(set (Ty DPR:$Vd),
2826              (Ty (ShOp (Ty DPR:$src1),
2827                        (Ty (MulOp DPR:$Vn,
2828                                   (Ty (ARMvduplane (Ty DPR_VFP2:$Vm),
2829                                                     imm:$lane)))))))]>;
2830class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2831                    string OpcodeStr, string Dt,
2832                    ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp>
2833  : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
2834        (outs DPR:$Vd),
2835        (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2836        NVMulSLFrm, itin,
2837        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2838        [(set (Ty DPR:$Vd),
2839              (Ty (ShOp (Ty DPR:$src1),
2840                        (Ty (MulOp DPR:$Vn,
2841                                   (Ty (ARMvduplane (Ty DPR_8:$Vm),
2842                                                     imm:$lane)))))))]>;
2843
2844class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2845                InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty,
2846                SDPatternOperator MulOp, SDPatternOperator OpNode>
2847  : N3V<op24, op23, op21_20, op11_8, 1, op4,
2848        (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2849        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2850        [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
2851                             (Ty (MulOp QPR:$Vn, QPR:$Vm)))))]>;
2852class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2853                  string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
2854                  SDPatternOperator MulOp, SDPatternOperator ShOp>
2855  : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
2856        (outs QPR:$Vd),
2857        (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2858        NVMulSLFrm, itin,
2859        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2860        [(set (ResTy QPR:$Vd),
2861              (ResTy (ShOp (ResTy QPR:$src1),
2862                           (ResTy (MulOp QPR:$Vn,
2863                                   (ResTy (ARMvduplane (OpTy DPR_VFP2:$Vm),
2864                                                        imm:$lane)))))))]>;
2865class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2866                    string OpcodeStr, string Dt,
2867                    ValueType ResTy, ValueType OpTy,
2868                    SDPatternOperator MulOp, SDPatternOperator ShOp>
2869  : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
2870        (outs QPR:$Vd),
2871        (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2872        NVMulSLFrm, itin,
2873        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2874        [(set (ResTy QPR:$Vd),
2875              (ResTy (ShOp (ResTy QPR:$src1),
2876                           (ResTy (MulOp QPR:$Vn,
2877                                   (ResTy (ARMvduplane (OpTy DPR_8:$Vm),
2878                                                        imm:$lane)))))))]>;
2879
2880// Neon Intrinsic-Op instructions (VABA): double- and quad-register.
2881class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2882                InstrItinClass itin, string OpcodeStr, string Dt,
2883                ValueType Ty, SDPatternOperator IntOp, SDNode OpNode>
2884  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2885        (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2886        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2887        [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
2888                             (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>;
2889class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2890                InstrItinClass itin, string OpcodeStr, string Dt,
2891                ValueType Ty, SDPatternOperator IntOp, SDNode OpNode>
2892  : N3V<op24, op23, op21_20, op11_8, 1, op4,
2893        (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2894        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2895        [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
2896                             (Ty (IntOp (Ty QPR:$Vn), (Ty QPR:$Vm))))))]>;
2897
2898// Neon 3-argument intrinsics, both double- and quad-register.
2899// The destination register is also used as the first source operand register.
2900class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2901               InstrItinClass itin, string OpcodeStr, string Dt,
2902               ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2903  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2904        (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2905        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2906        [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$src1),
2907                                      (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
2908class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2909               InstrItinClass itin, string OpcodeStr, string Dt,
2910               ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2911  : N3V<op24, op23, op21_20, op11_8, 1, op4,
2912        (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
2913        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2914        [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src1),
2915                                      (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>;
2916
2917// Long Multiply-Add/Sub operations.
2918class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2919                InstrItinClass itin, string OpcodeStr, string Dt,
2920                ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
2921  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2922        (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2923        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2924        [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
2925                                (TyQ (MulOp (TyD DPR:$Vn),
2926                                            (TyD DPR:$Vm)))))]>;
2927class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8,
2928                  InstrItinClass itin, string OpcodeStr, string Dt,
2929                  ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
2930  : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
2931        (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2932        NVMulSLFrm, itin,
2933        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2934        [(set QPR:$Vd,
2935          (OpNode (TyQ QPR:$src1),
2936                  (TyQ (MulOp (TyD DPR:$Vn),
2937                              (TyD (ARMvduplane (TyD DPR_VFP2:$Vm),
2938                                                 imm:$lane))))))]>;
2939class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
2940                    InstrItinClass itin, string OpcodeStr, string Dt,
2941                    ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
2942  : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
2943        (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2944        NVMulSLFrm, itin,
2945        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2946        [(set QPR:$Vd,
2947          (OpNode (TyQ QPR:$src1),
2948                  (TyQ (MulOp (TyD DPR:$Vn),
2949                              (TyD (ARMvduplane (TyD DPR_8:$Vm),
2950                                                 imm:$lane))))))]>;
2951
2952// Long Intrinsic-Op vector operations with explicit extend (VABAL).
2953class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2954                   InstrItinClass itin, string OpcodeStr, string Dt,
2955                   ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp,
2956                   SDNode OpNode>
2957  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2958        (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2959        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2960        [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
2961                                (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
2962                                                        (TyD DPR:$Vm)))))))]>;
2963
2964// Neon Long 3-argument intrinsic.  The destination register is
2965// a quad-register and is also used as the first source operand register.
2966class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
2967               InstrItinClass itin, string OpcodeStr, string Dt,
2968               ValueType TyQ, ValueType TyD, SDPatternOperator IntOp>
2969  : N3V<op24, op23, op21_20, op11_8, 0, op4,
2970        (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
2971        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
2972        [(set QPR:$Vd,
2973          (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>;
2974class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
2975                 string OpcodeStr, string Dt,
2976                 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2977  : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
2978        (outs QPR:$Vd),
2979        (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
2980        NVMulSLFrm, itin,
2981        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2982        [(set (ResTy QPR:$Vd),
2983              (ResTy (IntOp (ResTy QPR:$src1),
2984                            (OpTy DPR:$Vn),
2985                            (OpTy (ARMvduplane (OpTy DPR_VFP2:$Vm),
2986                                                imm:$lane)))))]>;
2987class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8,
2988                   InstrItinClass itin, string OpcodeStr, string Dt,
2989                   ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
2990  : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
2991        (outs QPR:$Vd),
2992        (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
2993        NVMulSLFrm, itin,
2994        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
2995        [(set (ResTy QPR:$Vd),
2996              (ResTy (IntOp (ResTy QPR:$src1),
2997                            (OpTy DPR:$Vn),
2998                            (OpTy (ARMvduplane (OpTy DPR_8:$Vm),
2999                                                imm:$lane)))))]>;
3000
3001// Narrowing 3-register intrinsics.
3002class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3003              string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ,
3004              SDPatternOperator IntOp, bit Commutable>
3005  : N3V<op24, op23, op21_20, op11_8, 0, op4,
3006        (outs DPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINi4D,
3007        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3008        [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vn), (TyQ QPR:$Vm))))]> {
3009  let isCommutable = Commutable;
3010}
3011
3012// Long 3-register operations.
3013class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3014           InstrItinClass itin, string OpcodeStr, string Dt,
3015           ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable>
3016  : N3V<op24, op23, op21_20, op11_8, 0, op4,
3017        (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
3018        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3019        [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
3020  let isCommutable = Commutable;
3021}
3022
3023class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8,
3024             InstrItinClass itin, string OpcodeStr, string Dt,
3025             ValueType TyQ, ValueType TyD, SDNode OpNode>
3026  : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
3027        (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
3028        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
3029        [(set QPR:$Vd,
3030          (TyQ (OpNode (TyD DPR:$Vn),
3031                       (TyD (ARMvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>;
3032class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
3033               InstrItinClass itin, string OpcodeStr, string Dt,
3034               ValueType TyQ, ValueType TyD, SDNode OpNode>
3035  : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
3036        (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
3037        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
3038        [(set QPR:$Vd,
3039          (TyQ (OpNode (TyD DPR:$Vn),
3040                       (TyD (ARMvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>;
3041
3042// Long 3-register operations with explicitly extended operands.
3043class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3044              InstrItinClass itin, string OpcodeStr, string Dt,
3045              ValueType TyQ, ValueType TyD, SDNode OpNode, SDPatternOperator ExtOp,
3046              bit Commutable>
3047  : N3V<op24, op23, op21_20, op11_8, 0, op4,
3048        (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
3049        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3050        [(set QPR:$Vd, (OpNode (TyQ (ExtOp (TyD DPR:$Vn))),
3051                                (TyQ (ExtOp (TyD DPR:$Vm)))))]> {
3052  let isCommutable = Commutable;
3053}
3054
3055// Long 3-register intrinsics with explicit extend (VABDL).
3056class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3057                 InstrItinClass itin, string OpcodeStr, string Dt,
3058                 ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, SDNode ExtOp,
3059                 bit Commutable>
3060  : N3V<op24, op23, op21_20, op11_8, 0, op4,
3061        (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
3062        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3063        [(set QPR:$Vd, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
3064                                                (TyD DPR:$Vm))))))]> {
3065  let isCommutable = Commutable;
3066}
3067
3068// Long 3-register intrinsics.
3069class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3070              InstrItinClass itin, string OpcodeStr, string Dt,
3071              ValueType TyQ, ValueType TyD, SDPatternOperator IntOp, bit Commutable>
3072  : N3V<op24, op23, op21_20, op11_8, 0, op4,
3073        (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
3074        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3075        [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
3076  let isCommutable = Commutable;
3077}
3078
3079// Same as above, but not predicated.
3080class N3VLIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
3081                bit op4, InstrItinClass itin, string OpcodeStr,
3082                string Dt, ValueType ResTy, ValueType OpTy,
3083                SDPatternOperator IntOp, bit Commutable>
3084  : N3Vnp<op27_23, op21_20, op11_8, op6, op4,
3085          (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt,
3086          [(set QPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
3087
3088class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
3089                string OpcodeStr, string Dt,
3090                ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3091  : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
3092        (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
3093        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
3094        [(set (ResTy QPR:$Vd),
3095              (ResTy (IntOp (OpTy DPR:$Vn),
3096                            (OpTy (ARMvduplane (OpTy DPR_VFP2:$Vm),
3097                                                imm:$lane)))))]>;
3098class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
3099                  InstrItinClass itin, string OpcodeStr, string Dt,
3100                  ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3101  : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
3102        (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
3103        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
3104        [(set (ResTy QPR:$Vd),
3105              (ResTy (IntOp (OpTy DPR:$Vn),
3106                            (OpTy (ARMvduplane (OpTy DPR_8:$Vm),
3107                                                imm:$lane)))))]>;
3108
3109// Wide 3-register operations.
3110class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3111           string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD,
3112           SDNode OpNode, SDPatternOperator ExtOp, bit Commutable>
3113  : N3V<op24, op23, op21_20, op11_8, 0, op4,
3114        (outs QPR:$Vd), (ins QPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VSUBiD,
3115        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3116        [(set QPR:$Vd, (OpNode (TyQ QPR:$Vn),
3117                                (TyQ (ExtOp (TyD DPR:$Vm)))))]> {
3118  // All of these have a two-operand InstAlias.
3119  let TwoOperandAliasConstraint = "$Vn = $Vd";
3120  let isCommutable = Commutable;
3121}
3122
3123// Pairwise long 2-register intrinsics, both double- and quad-register.
3124class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
3125                bits<2> op17_16, bits<5> op11_7, bit op4,
3126                string OpcodeStr, string Dt,
3127                ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3128  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
3129        (ins DPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
3130        [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
3131class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
3132                bits<2> op17_16, bits<5> op11_7, bit op4,
3133                string OpcodeStr, string Dt,
3134                ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3135  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
3136        (ins QPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
3137        [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
3138
3139// Pairwise long 2-register accumulate intrinsics,
3140// both double- and quad-register.
3141// The destination register is also used as the first source operand register.
3142class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
3143                 bits<2> op17_16, bits<5> op11_7, bit op4,
3144                 string OpcodeStr, string Dt,
3145                 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3146  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
3147        (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD,
3148        OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
3149        [(set DPR:$Vd, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$Vm))))]>;
3150class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
3151                 bits<2> op17_16, bits<5> op11_7, bit op4,
3152                 string OpcodeStr, string Dt,
3153                 ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
3154  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4,
3155        (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ,
3156        OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
3157        [(set QPR:$Vd, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$Vm))))]>;
3158
3159// Shift by immediate,
3160// both double- and quad-register.
3161let TwoOperandAliasConstraint = "$Vm = $Vd" in {
3162class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3163             Format f, InstrItinClass itin, Operand ImmTy,
3164             string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode>
3165  : N2VImm<op24, op23, op11_8, op7, 0, op4,
3166           (outs DPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), f, itin,
3167           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3168           [(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>;
3169class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3170             Format f, InstrItinClass itin, Operand ImmTy,
3171             string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode>
3172  : N2VImm<op24, op23, op11_8, op7, 1, op4,
3173           (outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin,
3174           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3175           [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>;
3176}
3177
3178// Long shift by immediate.
3179class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
3180             string OpcodeStr, string Dt,
3181             ValueType ResTy, ValueType OpTy, Operand ImmTy,
3182             SDPatternOperator OpNode>
3183  : N2VImm<op24, op23, op11_8, op7, op6, op4,
3184           (outs QPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), N2RegVShLFrm,
3185           IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3186           [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm), ImmTy:$SIMM)))]>;
3187
3188// Narrow shift by immediate.
3189class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
3190             InstrItinClass itin, string OpcodeStr, string Dt,
3191             ValueType ResTy, ValueType OpTy, Operand ImmTy,
3192             SDPatternOperator OpNode>
3193  : N2VImm<op24, op23, op11_8, op7, op6, op4,
3194           (outs DPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, itin,
3195           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3196           [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm),
3197                                          (i32 ImmTy:$SIMM))))]>;
3198
3199// Shift right by immediate and accumulate,
3200// both double- and quad-register.
3201let TwoOperandAliasConstraint = "$Vm = $Vd" in {
3202class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3203                Operand ImmTy, string OpcodeStr, string Dt,
3204                ValueType Ty, SDNode ShOp>
3205  : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
3206           (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD,
3207           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
3208           [(set DPR:$Vd, (Ty (add DPR:$src1,
3209                                (Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>;
3210class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3211                Operand ImmTy, string OpcodeStr, string Dt,
3212                ValueType Ty, SDNode ShOp>
3213  : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
3214           (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD,
3215           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
3216           [(set QPR:$Vd, (Ty (add QPR:$src1,
3217                                (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>;
3218}
3219
3220// Shift by immediate and insert,
3221// both double- and quad-register.
3222let TwoOperandAliasConstraint = "$Vm = $Vd" in {
3223class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3224                Operand ImmTy, Format f, string OpcodeStr, string Dt,
3225                ValueType Ty,SDNode ShOp>
3226  : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
3227           (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiD,
3228           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
3229           [(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>;
3230class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3231                Operand ImmTy, Format f, string OpcodeStr, string Dt,
3232                ValueType Ty,SDNode ShOp>
3233  : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
3234           (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ,
3235           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
3236           [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>;
3237}
3238
3239// Convert, with fractional bits immediate,
3240// both double- and quad-register.
3241class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3242              string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
3243              SDPatternOperator IntOp>
3244  : N2VImm<op24, op23, op11_8, op7, 0, op4,
3245           (outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
3246           IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3247           [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>;
3248class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
3249              string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
3250              SDPatternOperator IntOp>
3251  : N2VImm<op24, op23, op11_8, op7, 1, op4,
3252           (outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
3253           IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
3254           [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>;
3255
3256//===----------------------------------------------------------------------===//
3257// Multiclasses
3258//===----------------------------------------------------------------------===//
3259
3260// Abbreviations used in multiclass suffixes:
3261//   Q = quarter int (8 bit) elements
3262//   H = half int (16 bit) elements
3263//   S = single int (32 bit) elements
3264//   D = double int (64 bit) elements
3265
3266// Neon 2-register vector operations and intrinsics.
3267
3268// Neon 2-register comparisons.
3269//   source operand element sizes of 8, 16 and 32 bits:
3270multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3271                       bits<5> op11_7, bit op4, string opc, string Dt,
3272                       string asm, PatFrag fc> {
3273  // 64-bit vector types.
3274  def v8i8  : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4,
3275                  (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3276                  opc, !strconcat(Dt, "8"), asm, "",
3277                  [(set DPR:$Vd, (v8i8 (ARMvcmpz (v8i8 DPR:$Vm), fc)))]>;
3278  def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
3279                  (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3280                  opc, !strconcat(Dt, "16"), asm, "",
3281                  [(set DPR:$Vd, (v4i16 (ARMvcmpz (v4i16 DPR:$Vm), fc)))]>;
3282  def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
3283                  (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3284                  opc, !strconcat(Dt, "32"), asm, "",
3285                  [(set DPR:$Vd, (v2i32 (ARMvcmpz (v2i32 DPR:$Vm), fc)))]>;
3286  def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
3287                  (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3288                  opc, "f32", asm, "",
3289                  [(set DPR:$Vd, (v2i32 (ARMvcmpz (v2f32 DPR:$Vm), fc)))]> {
3290    let Inst{10} = 1; // overwrite F = 1
3291  }
3292  def v4f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
3293                  (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
3294                  opc, "f16", asm, "",
3295                  [(set DPR:$Vd, (v4i16 (ARMvcmpz (v4f16 DPR:$Vm), fc)))]>,
3296              Requires<[HasNEON,HasFullFP16]> {
3297    let Inst{10} = 1; // overwrite F = 1
3298  }
3299
3300  // 128-bit vector types.
3301  def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4,
3302                  (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3303                  opc, !strconcat(Dt, "8"), asm, "",
3304                  [(set QPR:$Vd, (v16i8 (ARMvcmpz (v16i8 QPR:$Vm), fc)))]>;
3305  def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
3306                  (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3307                  opc, !strconcat(Dt, "16"), asm, "",
3308                  [(set QPR:$Vd, (v8i16 (ARMvcmpz (v8i16 QPR:$Vm), fc)))]>;
3309  def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
3310                  (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3311                  opc, !strconcat(Dt, "32"), asm, "",
3312                  [(set QPR:$Vd, (v4i32 (ARMvcmpz (v4i32 QPR:$Vm), fc)))]>;
3313  def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
3314                  (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3315                  opc, "f32", asm, "",
3316                  [(set QPR:$Vd, (v4i32 (ARMvcmpz (v4f32 QPR:$Vm), fc)))]> {
3317    let Inst{10} = 1; // overwrite F = 1
3318  }
3319  def v8f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
3320                  (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
3321                  opc, "f16", asm, "",
3322                  [(set QPR:$Vd, (v8i16 (ARMvcmpz (v8f16 QPR:$Vm), fc)))]>,
3323              Requires<[HasNEON,HasFullFP16]> {
3324    let Inst{10} = 1; // overwrite F = 1
3325  }
3326}
3327
3328// Neon 3-register comparisons.
3329class N3VQ_cmp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3330               InstrItinClass itin, string OpcodeStr, string Dt,
3331               ValueType ResTy, ValueType OpTy, PatFrag fc, bit Commutable>
3332  : N3V<op24, op23, op21_20, op11_8, 1, op4,
3333        (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
3334        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3335        [(set QPR:$Vd, (ResTy (ARMvcmp (OpTy QPR:$Vn), (OpTy QPR:$Vm), fc)))]> {
3336  // All of these have a two-operand InstAlias.
3337  let TwoOperandAliasConstraint = "$Vn = $Vd";
3338  let isCommutable = Commutable;
3339}
3340
3341class N3VD_cmp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
3342               InstrItinClass itin, string OpcodeStr, string Dt,
3343               ValueType ResTy, ValueType OpTy, PatFrag fc, bit Commutable>
3344  : N3V<op24, op23, op21_20, op11_8, 0, op4,
3345        (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
3346        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
3347        [(set DPR:$Vd, (ResTy (ARMvcmp (OpTy DPR:$Vn), (OpTy DPR:$Vm), fc)))]> {
3348  // All of these have a two-operand InstAlias.
3349  let TwoOperandAliasConstraint = "$Vn = $Vd";
3350  let isCommutable = Commutable;
3351}
3352
3353multiclass N3V_QHS_cmp<bit op24, bit op23, bits<4> op11_8, bit op4,
3354                       InstrItinClass itinD16, InstrItinClass itinD32,
3355                       InstrItinClass itinQ16, InstrItinClass itinQ32,
3356                       string OpcodeStr, string Dt,
3357                       PatFrag fc, bit Commutable = 0> {
3358  // 64-bit vector types.
3359  def v8i8  : N3VD_cmp<op24, op23, 0b00, op11_8, op4, itinD16,
3360                       OpcodeStr, !strconcat(Dt, "8"),
3361                       v8i8, v8i8, fc, Commutable>;
3362  def v4i16 : N3VD_cmp<op24, op23, 0b01, op11_8, op4, itinD16,
3363                       OpcodeStr, !strconcat(Dt, "16"),
3364                       v4i16, v4i16, fc, Commutable>;
3365  def v2i32 : N3VD_cmp<op24, op23, 0b10, op11_8, op4, itinD32,
3366                       OpcodeStr, !strconcat(Dt, "32"),
3367                       v2i32, v2i32, fc, Commutable>;
3368
3369  // 128-bit vector types.
3370  def v16i8 : N3VQ_cmp<op24, op23, 0b00, op11_8, op4, itinQ16,
3371                       OpcodeStr, !strconcat(Dt, "8"),
3372                       v16i8, v16i8, fc, Commutable>;
3373  def v8i16 : N3VQ_cmp<op24, op23, 0b01, op11_8, op4, itinQ16,
3374                       OpcodeStr, !strconcat(Dt, "16"),
3375                       v8i16, v8i16, fc, Commutable>;
3376  def v4i32 : N3VQ_cmp<op24, op23, 0b10, op11_8, op4, itinQ32,
3377                       OpcodeStr, !strconcat(Dt, "32"),
3378                       v4i32, v4i32, fc, Commutable>;
3379}
3380
3381
3382// Neon 2-register vector intrinsics,
3383//   element sizes of 8, 16 and 32 bits:
3384multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3385                      bits<5> op11_7, bit op4,
3386                      InstrItinClass itinD, InstrItinClass itinQ,
3387                      string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3388  // 64-bit vector types.
3389  def v8i8  : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3390                      itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
3391  def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3392                      itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>;
3393  def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3394                      itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>;
3395
3396  // 128-bit vector types.
3397  def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3398                      itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>;
3399  def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3400                      itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>;
3401  def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3402                      itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>;
3403}
3404
3405
3406// Neon Narrowing 2-register vector operations,
3407//   source operand element sizes of 16, 32 and 64 bits:
3408multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3409                    bits<5> op11_7, bit op6, bit op4,
3410                    InstrItinClass itin, string OpcodeStr, string Dt,
3411                    SDNode OpNode> {
3412  def v8i8  : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
3413                   itin, OpcodeStr, !strconcat(Dt, "16"),
3414                   v8i8, v8i16, OpNode>;
3415  def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
3416                   itin, OpcodeStr, !strconcat(Dt, "32"),
3417                   v4i16, v4i32, OpNode>;
3418  def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
3419                   itin, OpcodeStr, !strconcat(Dt, "64"),
3420                   v2i32, v2i64, OpNode>;
3421}
3422
3423// Neon Narrowing 2-register vector intrinsics,
3424//   source operand element sizes of 16, 32 and 64 bits:
3425multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3426                       bits<5> op11_7, bit op6, bit op4,
3427                       InstrItinClass itin, string OpcodeStr, string Dt,
3428                       SDPatternOperator IntOp> {
3429  def v8i8  : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
3430                      itin, OpcodeStr, !strconcat(Dt, "16"),
3431                      v8i8, v8i16, IntOp>;
3432  def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
3433                      itin, OpcodeStr, !strconcat(Dt, "32"),
3434                      v4i16, v4i32, IntOp>;
3435  def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
3436                      itin, OpcodeStr, !strconcat(Dt, "64"),
3437                      v2i32, v2i64, IntOp>;
3438}
3439
3440
3441// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL).
3442//   source operand element sizes of 16, 32 and 64 bits:
3443multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4,
3444                    string OpcodeStr, string Dt, SDNode OpNode> {
3445  def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
3446                   OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>;
3447  def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
3448                   OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
3449  def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
3450                   OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
3451}
3452
3453
3454// Neon 3-register vector operations.
3455
3456// First with only element sizes of 8, 16 and 32 bits:
3457multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3458                   InstrItinClass itinD16, InstrItinClass itinD32,
3459                   InstrItinClass itinQ16, InstrItinClass itinQ32,
3460                   string OpcodeStr, string Dt,
3461                   SDNode OpNode, bit Commutable = 0> {
3462  // 64-bit vector types.
3463  def v8i8  : N3VD<op24, op23, 0b00, op11_8, op4, itinD16,
3464                   OpcodeStr, !strconcat(Dt, "8"),
3465                   v8i8, v8i8, OpNode, Commutable>;
3466  def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16,
3467                   OpcodeStr, !strconcat(Dt, "16"),
3468                   v4i16, v4i16, OpNode, Commutable>;
3469  def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32,
3470                   OpcodeStr, !strconcat(Dt, "32"),
3471                   v2i32, v2i32, OpNode, Commutable>;
3472
3473  // 128-bit vector types.
3474  def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16,
3475                   OpcodeStr, !strconcat(Dt, "8"),
3476                   v16i8, v16i8, OpNode, Commutable>;
3477  def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16,
3478                   OpcodeStr, !strconcat(Dt, "16"),
3479                   v8i16, v8i16, OpNode, Commutable>;
3480  def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32,
3481                   OpcodeStr, !strconcat(Dt, "32"),
3482                   v4i32, v4i32, OpNode, Commutable>;
3483}
3484
3485multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> {
3486  def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, "i16", v4i16, ShOp>;
3487  def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, "i32", v2i32, ShOp>;
3488  def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, "i16", v8i16, v4i16, ShOp>;
3489  def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, "i32",
3490                     v4i32, v2i32, ShOp>;
3491}
3492
3493// ....then also with element size 64 bits:
3494multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3495                    InstrItinClass itinD, InstrItinClass itinQ,
3496                    string OpcodeStr, string Dt,
3497                    SDNode OpNode, bit Commutable = 0>
3498  : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ,
3499            OpcodeStr, Dt, OpNode, Commutable> {
3500  def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD,
3501                   OpcodeStr, !strconcat(Dt, "64"),
3502                   v1i64, v1i64, OpNode, Commutable>;
3503  def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ,
3504                   OpcodeStr, !strconcat(Dt, "64"),
3505                   v2i64, v2i64, OpNode, Commutable>;
3506}
3507
3508
3509// Neon 3-register vector intrinsics.
3510
3511// First with only element sizes of 16 and 32 bits:
3512multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3513                     InstrItinClass itinD16, InstrItinClass itinD32,
3514                     InstrItinClass itinQ16, InstrItinClass itinQ32,
3515                     string OpcodeStr, string Dt,
3516                     SDPatternOperator IntOp, bit Commutable = 0> {
3517  // 64-bit vector types.
3518  def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16,
3519                      OpcodeStr, !strconcat(Dt, "16"),
3520                      v4i16, v4i16, IntOp, Commutable>;
3521  def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32,
3522                      OpcodeStr, !strconcat(Dt, "32"),
3523                      v2i32, v2i32, IntOp, Commutable>;
3524
3525  // 128-bit vector types.
3526  def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16,
3527                      OpcodeStr, !strconcat(Dt, "16"),
3528                      v8i16, v8i16, IntOp, Commutable>;
3529  def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32,
3530                      OpcodeStr, !strconcat(Dt, "32"),
3531                      v4i32, v4i32, IntOp, Commutable>;
3532}
3533multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3534                     InstrItinClass itinD16, InstrItinClass itinD32,
3535                     InstrItinClass itinQ16, InstrItinClass itinQ32,
3536                     string OpcodeStr, string Dt,
3537                     SDPatternOperator IntOp> {
3538  // 64-bit vector types.
3539  def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16,
3540                      OpcodeStr, !strconcat(Dt, "16"),
3541                      v4i16, v4i16, IntOp>;
3542  def v2i32 : N3VDIntSh<op24, op23, 0b10, op11_8, op4, f, itinD32,
3543                      OpcodeStr, !strconcat(Dt, "32"),
3544                      v2i32, v2i32, IntOp>;
3545
3546  // 128-bit vector types.
3547  def v8i16 : N3VQIntSh<op24, op23, 0b01, op11_8, op4, f, itinQ16,
3548                      OpcodeStr, !strconcat(Dt, "16"),
3549                      v8i16, v8i16, IntOp>;
3550  def v4i32 : N3VQIntSh<op24, op23, 0b10, op11_8, op4, f, itinQ32,
3551                      OpcodeStr, !strconcat(Dt, "32"),
3552                      v4i32, v4i32, IntOp>;
3553}
3554
3555multiclass N3VIntSL_HS<bits<4> op11_8,
3556                       InstrItinClass itinD16, InstrItinClass itinD32,
3557                       InstrItinClass itinQ16, InstrItinClass itinQ32,
3558                       string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3559  def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16,
3560                          OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>;
3561  def v2i32 : N3VDIntSL<0b10, op11_8, itinD32,
3562                        OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>;
3563  def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16,
3564                          OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>;
3565  def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32,
3566                        OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>;
3567}
3568
3569// ....then also with element size of 8 bits:
3570multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3571                      InstrItinClass itinD16, InstrItinClass itinD32,
3572                      InstrItinClass itinQ16, InstrItinClass itinQ32,
3573                      string OpcodeStr, string Dt,
3574                      SDPatternOperator IntOp, bit Commutable = 0>
3575  : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
3576              OpcodeStr, Dt, IntOp, Commutable> {
3577  def v8i8  : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16,
3578                      OpcodeStr, !strconcat(Dt, "8"),
3579                      v8i8, v8i8, IntOp, Commutable>;
3580  def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16,
3581                      OpcodeStr, !strconcat(Dt, "8"),
3582                      v16i8, v16i8, IntOp, Commutable>;
3583}
3584multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3585                      InstrItinClass itinD16, InstrItinClass itinD32,
3586                      InstrItinClass itinQ16, InstrItinClass itinQ32,
3587                      string OpcodeStr, string Dt,
3588                      SDPatternOperator IntOp>
3589  : N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
3590              OpcodeStr, Dt, IntOp> {
3591  def v8i8  : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16,
3592                      OpcodeStr, !strconcat(Dt, "8"),
3593                      v8i8, v8i8, IntOp>;
3594  def v16i8 : N3VQIntSh<op24, op23, 0b00, op11_8, op4, f, itinQ16,
3595                      OpcodeStr, !strconcat(Dt, "8"),
3596                      v16i8, v16i8, IntOp>;
3597}
3598
3599
3600// ....then also with element size of 64 bits:
3601multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3602                       InstrItinClass itinD16, InstrItinClass itinD32,
3603                       InstrItinClass itinQ16, InstrItinClass itinQ32,
3604                       string OpcodeStr, string Dt,
3605                       SDPatternOperator IntOp, bit Commutable = 0>
3606  : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
3607               OpcodeStr, Dt, IntOp, Commutable> {
3608  def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32,
3609                      OpcodeStr, !strconcat(Dt, "64"),
3610                      v1i64, v1i64, IntOp, Commutable>;
3611  def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32,
3612                      OpcodeStr, !strconcat(Dt, "64"),
3613                      v2i64, v2i64, IntOp, Commutable>;
3614}
3615multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
3616                       InstrItinClass itinD16, InstrItinClass itinD32,
3617                       InstrItinClass itinQ16, InstrItinClass itinQ32,
3618                       string OpcodeStr, string Dt,
3619                       SDPatternOperator IntOp>
3620  : N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
3621               OpcodeStr, Dt, IntOp> {
3622  def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32,
3623                      OpcodeStr, !strconcat(Dt, "64"),
3624                      v1i64, v1i64, IntOp>;
3625  def v2i64 : N3VQIntSh<op24, op23, 0b11, op11_8, op4, f, itinQ32,
3626                      OpcodeStr, !strconcat(Dt, "64"),
3627                      v2i64, v2i64, IntOp>;
3628}
3629
3630// Neon Narrowing 3-register vector intrinsics,
3631//   source operand element sizes of 16, 32 and 64 bits:
3632multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3633                       string OpcodeStr, string Dt,
3634                       SDPatternOperator IntOp, bit Commutable = 0> {
3635  def v8i8  : N3VNInt<op24, op23, 0b00, op11_8, op4,
3636                      OpcodeStr, !strconcat(Dt, "16"),
3637                      v8i8, v8i16, IntOp, Commutable>;
3638  def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4,
3639                      OpcodeStr, !strconcat(Dt, "32"),
3640                      v4i16, v4i32, IntOp, Commutable>;
3641  def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4,
3642                      OpcodeStr, !strconcat(Dt, "64"),
3643                      v2i32, v2i64, IntOp, Commutable>;
3644}
3645
3646
3647// Neon Long 3-register vector operations.
3648
3649multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3650                    InstrItinClass itin16, InstrItinClass itin32,
3651                    string OpcodeStr, string Dt,
3652                    SDNode OpNode, bit Commutable = 0> {
3653  def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16,
3654                   OpcodeStr, !strconcat(Dt, "8"),
3655                   v8i16, v8i8, OpNode, Commutable>;
3656  def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16,
3657                   OpcodeStr, !strconcat(Dt, "16"),
3658                   v4i32, v4i16, OpNode, Commutable>;
3659  def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32,
3660                   OpcodeStr, !strconcat(Dt, "32"),
3661                   v2i64, v2i32, OpNode, Commutable>;
3662}
3663
3664multiclass N3VLSL_HS<bit op24, bits<4> op11_8,
3665                     InstrItinClass itin, string OpcodeStr, string Dt,
3666                     SDNode OpNode> {
3667  def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr,
3668                       !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
3669  def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr,
3670                     !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
3671}
3672
3673multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3674                       InstrItinClass itin16, InstrItinClass itin32,
3675                       string OpcodeStr, string Dt,
3676                       SDNode OpNode, SDPatternOperator ExtOp, bit Commutable = 0> {
3677  def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16,
3678                      OpcodeStr, !strconcat(Dt, "8"),
3679                      v8i16, v8i8, OpNode, ExtOp, Commutable>;
3680  def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16,
3681                      OpcodeStr, !strconcat(Dt, "16"),
3682                      v4i32, v4i16, OpNode, ExtOp, Commutable>;
3683  def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32,
3684                      OpcodeStr, !strconcat(Dt, "32"),
3685                      v2i64, v2i32, OpNode, ExtOp, Commutable>;
3686}
3687
3688// Neon Long 3-register vector intrinsics.
3689
3690// First with only element sizes of 16 and 32 bits:
3691multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
3692                      InstrItinClass itin16, InstrItinClass itin32,
3693                      string OpcodeStr, string Dt,
3694                      SDPatternOperator IntOp, bit Commutable = 0> {
3695  def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16,
3696                      OpcodeStr, !strconcat(Dt, "16"),
3697                      v4i32, v4i16, IntOp, Commutable>;
3698  def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32,
3699                      OpcodeStr, !strconcat(Dt, "32"),
3700                      v2i64, v2i32, IntOp, Commutable>;
3701}
3702
3703multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8,
3704                        InstrItinClass itin, string OpcodeStr, string Dt,
3705                        SDPatternOperator IntOp> {
3706  def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin,
3707                          OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
3708  def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin,
3709                        OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
3710}
3711
3712// ....then also with element size of 8 bits:
3713multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3714                       InstrItinClass itin16, InstrItinClass itin32,
3715                       string OpcodeStr, string Dt,
3716                       SDPatternOperator IntOp, bit Commutable = 0>
3717  : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt,
3718               IntOp, Commutable> {
3719  def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16,
3720                      OpcodeStr, !strconcat(Dt, "8"),
3721                      v8i16, v8i8, IntOp, Commutable>;
3722}
3723
3724// ....with explicit extend (VABDL).
3725multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3726                       InstrItinClass itin, string OpcodeStr, string Dt,
3727                       SDPatternOperator IntOp, SDNode ExtOp, bit Commutable = 0> {
3728  def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin,
3729                         OpcodeStr, !strconcat(Dt, "8"),
3730                         v8i16, v8i8, IntOp, ExtOp, Commutable>;
3731  def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin,
3732                         OpcodeStr, !strconcat(Dt, "16"),
3733                         v4i32, v4i16, IntOp, ExtOp, Commutable>;
3734  def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin,
3735                         OpcodeStr, !strconcat(Dt, "32"),
3736                         v2i64, v2i32, IntOp, ExtOp, Commutable>;
3737}
3738
3739
3740// Neon Wide 3-register vector intrinsics,
3741//   source operand element sizes of 8, 16 and 32 bits:
3742multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3743                    string OpcodeStr, string Dt,
3744                    SDNode OpNode, SDPatternOperator ExtOp, bit Commutable = 0> {
3745  def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4,
3746                   OpcodeStr, !strconcat(Dt, "8"),
3747                   v8i16, v8i8, OpNode, ExtOp, Commutable>;
3748  def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4,
3749                   OpcodeStr, !strconcat(Dt, "16"),
3750                   v4i32, v4i16, OpNode, ExtOp, Commutable>;
3751  def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4,
3752                   OpcodeStr, !strconcat(Dt, "32"),
3753                   v2i64, v2i32, OpNode, ExtOp, Commutable>;
3754}
3755
3756
3757// Neon Multiply-Op vector operations,
3758//   element sizes of 8, 16 and 32 bits:
3759multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3760                        InstrItinClass itinD16, InstrItinClass itinD32,
3761                        InstrItinClass itinQ16, InstrItinClass itinQ32,
3762                        string OpcodeStr, string Dt, SDNode OpNode> {
3763  // 64-bit vector types.
3764  def v8i8  : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16,
3765                        OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>;
3766  def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16,
3767                        OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>;
3768  def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32,
3769                        OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>;
3770
3771  // 128-bit vector types.
3772  def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16,
3773                        OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>;
3774  def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16,
3775                        OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>;
3776  def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32,
3777                        OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>;
3778}
3779
3780multiclass N3VMulOpSL_HS<bits<4> op11_8,
3781                         InstrItinClass itinD16, InstrItinClass itinD32,
3782                         InstrItinClass itinQ16, InstrItinClass itinQ32,
3783                         string OpcodeStr, string Dt, SDPatternOperator ShOp> {
3784  def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16,
3785                            OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>;
3786  def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32,
3787                          OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>;
3788  def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16,
3789                            OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16,
3790                            mul, ShOp>;
3791  def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32,
3792                          OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32,
3793                          mul, ShOp>;
3794}
3795
3796// Neon Intrinsic-Op vector operations,
3797//   element sizes of 8, 16 and 32 bits:
3798multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3799                        InstrItinClass itinD, InstrItinClass itinQ,
3800                        string OpcodeStr, string Dt, SDPatternOperator IntOp,
3801                        SDNode OpNode> {
3802  // 64-bit vector types.
3803  def v8i8  : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD,
3804                        OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>;
3805  def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD,
3806                        OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>;
3807  def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD,
3808                        OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>;
3809
3810  // 128-bit vector types.
3811  def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ,
3812                        OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>;
3813  def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ,
3814                        OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>;
3815  def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ,
3816                        OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>;
3817}
3818
3819// Neon 3-argument intrinsics,
3820//   element sizes of 16 and 32 bits:
3821multiclass N3VInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
3822                       InstrItinClass itinD16, InstrItinClass itinD32,
3823                       InstrItinClass itinQ16, InstrItinClass itinQ32,
3824                       string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3825  // 64-bit vector types.
3826  def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD16,
3827                       OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>;
3828  def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD32,
3829                       OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>;
3830
3831  // 128-bit vector types.
3832  def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ16,
3833                       OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>;
3834  def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ32,
3835                       OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>;
3836}
3837
3838//   element sizes of 8, 16 and 32 bits:
3839multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3840                       InstrItinClass itinD16, InstrItinClass itinD32,
3841                       InstrItinClass itinQ16, InstrItinClass itinQ32,
3842                       string OpcodeStr, string Dt, SDPatternOperator IntOp>
3843           :N3VInt3_HS <op24, op23, op11_8, op4, itinD16, itinD32,
3844                        itinQ16, itinQ32, OpcodeStr, Dt, IntOp>{
3845  // 64-bit vector types.
3846  def v8i8  : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD16,
3847                       OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
3848  // 128-bit vector types.
3849  def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ16,
3850                       OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>;
3851}
3852
3853// Neon Long Multiply-Op vector operations,
3854//   element sizes of 8, 16 and 32 bits:
3855multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3856                         InstrItinClass itin16, InstrItinClass itin32,
3857                         string OpcodeStr, string Dt, SDNode MulOp,
3858                         SDNode OpNode> {
3859  def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr,
3860                        !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>;
3861  def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr,
3862                        !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>;
3863  def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr,
3864                        !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
3865}
3866
3867multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr,
3868                          string Dt, SDNode MulOp, SDNode OpNode> {
3869  def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr,
3870                            !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>;
3871  def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr,
3872                          !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
3873}
3874
3875
3876// Neon Long 3-argument intrinsics.
3877
3878// First with only element sizes of 16 and 32 bits:
3879multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
3880                       InstrItinClass itin16, InstrItinClass itin32,
3881                       string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3882  def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16,
3883                       OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
3884  def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32,
3885                       OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
3886}
3887
3888multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8,
3889                         string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3890  def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D,
3891                           OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>;
3892  def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D,
3893                         OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
3894}
3895
3896// ....then also with element size of 8 bits:
3897multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3898                        InstrItinClass itin16, InstrItinClass itin32,
3899                        string OpcodeStr, string Dt, SDPatternOperator IntOp>
3900  : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> {
3901  def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16,
3902                       OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>;
3903}
3904
3905// ....with explicit extend (VABAL).
3906multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
3907                            InstrItinClass itin, string OpcodeStr, string Dt,
3908                            SDPatternOperator IntOp, SDNode ExtOp, SDNode OpNode> {
3909  def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin,
3910                           OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8,
3911                           IntOp, ExtOp, OpNode>;
3912  def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin,
3913                           OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16,
3914                           IntOp, ExtOp, OpNode>;
3915  def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin,
3916                           OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32,
3917                           IntOp, ExtOp, OpNode>;
3918}
3919
3920
3921// Neon Pairwise long 2-register intrinsics,
3922//   element sizes of 8, 16 and 32 bits:
3923multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3924                        bits<5> op11_7, bit op4,
3925                        string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3926  // 64-bit vector types.
3927  def v8i8  : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3928                        OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
3929  def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3930                        OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
3931  def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3932                        OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
3933
3934  // 128-bit vector types.
3935  def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3936                        OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
3937  def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3938                        OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
3939  def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3940                        OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
3941}
3942
3943
3944// Neon Pairwise long 2-register accumulate intrinsics,
3945//   element sizes of 8, 16 and 32 bits:
3946multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
3947                         bits<5> op11_7, bit op4,
3948                         string OpcodeStr, string Dt, SDPatternOperator IntOp> {
3949  // 64-bit vector types.
3950  def v8i8  : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3951                         OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
3952  def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3953                         OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
3954  def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3955                         OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
3956
3957  // 128-bit vector types.
3958  def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
3959                         OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
3960  def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
3961                         OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
3962  def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
3963                         OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
3964}
3965
3966
3967// Neon 2-register vector shift by immediate,
3968//   with f of either N2RegVShLFrm or N2RegVShRFrm
3969//   element sizes of 8, 16, 32 and 64 bits:
3970multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
3971                       InstrItinClass itin, string OpcodeStr, string Dt,
3972                       SDNode OpNode> {
3973  // 64-bit vector types.
3974  def v8i8  : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3975                     OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
3976    let Inst{21-19} = 0b001; // imm6 = 001xxx
3977  }
3978  def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3979                     OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
3980    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
3981  }
3982  def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3983                     OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
3984    let Inst{21} = 0b1;      // imm6 = 1xxxxx
3985  }
3986  def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm,
3987                     OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
3988                             // imm6 = xxxxxx
3989
3990  // 128-bit vector types.
3991  def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3992                     OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
3993    let Inst{21-19} = 0b001; // imm6 = 001xxx
3994  }
3995  def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
3996                     OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
3997    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
3998  }
3999  def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm,
4000                     OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
4001    let Inst{21} = 0b1;      // imm6 = 1xxxxx
4002  }
4003  def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm,
4004                     OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
4005                             // imm6 = xxxxxx
4006}
4007multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
4008                       InstrItinClass itin, string OpcodeStr, string Dt,
4009                       string baseOpc, SDNode OpNode> {
4010  // 64-bit vector types.
4011  def v8i8  : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
4012                     OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
4013    let Inst{21-19} = 0b001; // imm6 = 001xxx
4014  }
4015  def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16,
4016                     OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
4017    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
4018  }
4019  def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32,
4020                     OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
4021    let Inst{21} = 0b1;      // imm6 = 1xxxxx
4022  }
4023  def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
4024                     OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
4025                             // imm6 = xxxxxx
4026
4027  // 128-bit vector types.
4028  def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
4029                     OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
4030    let Inst{21-19} = 0b001; // imm6 = 001xxx
4031  }
4032  def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16,
4033                     OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
4034    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
4035  }
4036  def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32,
4037                     OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
4038    let Inst{21} = 0b1;      // imm6 = 1xxxxx
4039  }
4040  def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
4041                     OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
4042                             // imm6 = xxxxxx
4043}
4044
4045// Neon Shift-Accumulate vector operations,
4046//   element sizes of 8, 16, 32 and 64 bits:
4047multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
4048                         string OpcodeStr, string Dt, SDNode ShOp> {
4049  // 64-bit vector types.
4050  def v8i8  : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm8,
4051                        OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> {
4052    let Inst{21-19} = 0b001; // imm6 = 001xxx
4053  }
4054  def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm16,
4055                        OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> {
4056    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
4057  }
4058  def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm32,
4059                        OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> {
4060    let Inst{21} = 0b1;      // imm6 = 1xxxxx
4061  }
4062  def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, shr_imm64,
4063                        OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>;
4064                             // imm6 = xxxxxx
4065
4066  // 128-bit vector types.
4067  def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm8,
4068                        OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> {
4069    let Inst{21-19} = 0b001; // imm6 = 001xxx
4070  }
4071  def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm16,
4072                        OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> {
4073    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
4074  }
4075  def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm32,
4076                        OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> {
4077    let Inst{21} = 0b1;      // imm6 = 1xxxxx
4078  }
4079  def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, shr_imm64,
4080                        OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>;
4081                             // imm6 = xxxxxx
4082}
4083
4084// Neon Shift-Insert vector operations,
4085//   with f of either N2RegVShLFrm or N2RegVShRFrm
4086//   element sizes of 8, 16, 32 and 64 bits:
4087multiclass N2VShInsL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
4088                          string OpcodeStr> {
4089  // 64-bit vector types.
4090  def v8i8  : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
4091                        N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsliImm> {
4092    let Inst{21-19} = 0b001; // imm6 = 001xxx
4093  }
4094  def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
4095                        N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsliImm> {
4096    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
4097  }
4098  def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm,
4099                        N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsliImm> {
4100    let Inst{21} = 0b1;      // imm6 = 1xxxxx
4101  }
4102  def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, i32imm,
4103                        N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsliImm>;
4104                             // imm6 = xxxxxx
4105
4106  // 128-bit vector types.
4107  def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
4108                        N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsliImm> {
4109    let Inst{21-19} = 0b001; // imm6 = 001xxx
4110  }
4111  def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
4112                        N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsliImm> {
4113    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
4114  }
4115  def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm,
4116                        N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsliImm> {
4117    let Inst{21} = 0b1;      // imm6 = 1xxxxx
4118  }
4119  def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, i32imm,
4120                        N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsliImm>;
4121                             // imm6 = xxxxxx
4122}
4123multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
4124                          string OpcodeStr> {
4125  // 64-bit vector types.
4126  def v8i8  : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm8,
4127                        N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsriImm> {
4128    let Inst{21-19} = 0b001; // imm6 = 001xxx
4129  }
4130  def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm16,
4131                        N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsriImm> {
4132    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
4133  }
4134  def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm32,
4135                        N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsriImm> {
4136    let Inst{21} = 0b1;      // imm6 = 1xxxxx
4137  }
4138  def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, shr_imm64,
4139                        N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsriImm>;
4140                             // imm6 = xxxxxx
4141
4142  // 128-bit vector types.
4143  def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm8,
4144                        N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsriImm> {
4145    let Inst{21-19} = 0b001; // imm6 = 001xxx
4146  }
4147  def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm16,
4148                        N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsriImm> {
4149    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
4150  }
4151  def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm32,
4152                        N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsriImm> {
4153    let Inst{21} = 0b1;      // imm6 = 1xxxxx
4154  }
4155  def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, shr_imm64,
4156                        N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsriImm>;
4157                             // imm6 = xxxxxx
4158}
4159
4160// Neon Shift Long operations,
4161//   element sizes of 8, 16, 32 bits:
4162multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
4163                      bit op4, string OpcodeStr, string Dt,
4164                      SDPatternOperator OpNode> {
4165  def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
4166              OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, imm1_7, OpNode> {
4167    let Inst{21-19} = 0b001; // imm6 = 001xxx
4168  }
4169  def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
4170               OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, imm1_15, OpNode> {
4171    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
4172  }
4173  def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
4174               OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, imm1_31, OpNode> {
4175    let Inst{21} = 0b1;      // imm6 = 1xxxxx
4176  }
4177}
4178
4179// Neon Shift Narrow operations,
4180//   element sizes of 16, 32, 64 bits:
4181multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
4182                      bit op4, InstrItinClass itin, string OpcodeStr, string Dt,
4183                      SDPatternOperator OpNode> {
4184  def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
4185                    OpcodeStr, !strconcat(Dt, "16"),
4186                    v8i8, v8i16, shr_imm8, OpNode> {
4187    let Inst{21-19} = 0b001; // imm6 = 001xxx
4188  }
4189  def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
4190                     OpcodeStr, !strconcat(Dt, "32"),
4191                     v4i16, v4i32, shr_imm16, OpNode> {
4192    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
4193  }
4194  def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
4195                     OpcodeStr, !strconcat(Dt, "64"),
4196                     v2i32, v2i64, shr_imm32, OpNode> {
4197    let Inst{21} = 0b1;      // imm6 = 1xxxxx
4198  }
4199}
4200
4201//===----------------------------------------------------------------------===//
4202// Instruction Definitions.
4203//===----------------------------------------------------------------------===//
4204
4205// Vector Add Operations.
4206
4207//   VADD     : Vector Add (integer and floating-point)
4208defm VADD     : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i",
4209                         add, 1>;
4210def  VADDfd   : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32",
4211                     v2f32, v2f32, fadd, 1>;
4212def  VADDfq   : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32",
4213                     v4f32, v4f32, fadd, 1>;
4214def  VADDhd   : N3VD<0, 0, 0b01, 0b1101, 0, IIC_VBIND, "vadd", "f16",
4215                     v4f16, v4f16, fadd, 1>,
4216                Requires<[HasNEON,HasFullFP16]>;
4217def  VADDhq   : N3VQ<0, 0, 0b01, 0b1101, 0, IIC_VBINQ, "vadd", "f16",
4218                     v8f16, v8f16, fadd, 1>,
4219                Requires<[HasNEON,HasFullFP16]>;
4220//   VADDL    : Vector Add Long (Q = D + D)
4221defm VADDLs   : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
4222                            "vaddl", "s", add, sext, 1>;
4223defm VADDLu   : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
4224                            "vaddl", "u", add, zanyext, 1>;
4225//   VADDW    : Vector Add Wide (Q = Q + D)
4226defm VADDWs   : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>;
4227defm VADDWu   : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zanyext, 0>;
4228//   VHADD    : Vector Halving Add
4229defm VHADDs   : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm,
4230                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4231                           "vhadd", "s", int_arm_neon_vhadds, 1>;
4232defm VHADDu   : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm,
4233                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4234                           "vhadd", "u", int_arm_neon_vhaddu, 1>;
4235//   VRHADD   : Vector Rounding Halving Add
4236defm VRHADDs  : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm,
4237                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4238                           "vrhadd", "s", int_arm_neon_vrhadds, 1>;
4239defm VRHADDu  : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm,
4240                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4241                           "vrhadd", "u", int_arm_neon_vrhaddu, 1>;
4242//   VQADD    : Vector Saturating Add
4243defm VQADDs   : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm,
4244                            IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4245                            "vqadd", "s", saddsat, 1>;
4246defm VQADDu   : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm,
4247                            IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
4248                            "vqadd", "u", uaddsat, 1>;
4249//   VADDHN   : Vector Add and Narrow Returning High Half (D = Q + Q)
4250defm VADDHN   : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", null_frag, 1>;
4251//   VRADDHN  : Vector Rounding Add and Narrow Returning High Half (D = Q + Q)
4252defm VRADDHN  : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i",
4253                            int_arm_neon_vraddhn, 1>;
4254
4255let Predicates = [HasNEON] in {
4256def : Pat<(v8i8  (trunc (ARMvshruImm (add (v8i16 QPR:$Vn), QPR:$Vm), 8))),
4257          (VADDHNv8i8 QPR:$Vn, QPR:$Vm)>;
4258def : Pat<(v4i16 (trunc (ARMvshruImm (add (v4i32 QPR:$Vn), QPR:$Vm), 16))),
4259          (VADDHNv4i16 QPR:$Vn, QPR:$Vm)>;
4260def : Pat<(v2i32 (trunc (ARMvshruImm (add (v2i64 QPR:$Vn), QPR:$Vm), 32))),
4261          (VADDHNv2i32 QPR:$Vn, QPR:$Vm)>;
4262}
4263
4264// Vector Multiply Operations.
4265
4266//   VMUL     : Vector Multiply (integer, polynomial and floating-point)
4267defm VMUL     : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D,
4268                        IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>;
4269def  VMULpd   : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul",
4270                        "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>;
4271def  VMULpq   : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul",
4272                        "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>;
4273def  VMULfd   : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32",
4274                     v2f32, v2f32, fmul, 1>;
4275def  VMULfq   : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32",
4276                     v4f32, v4f32, fmul, 1>;
4277def  VMULhd   : N3VD<1, 0, 0b01, 0b1101, 1, IIC_VFMULD, "vmul", "f16",
4278                     v4f16, v4f16, fmul, 1>,
4279                Requires<[HasNEON,HasFullFP16]>;
4280def  VMULhq   : N3VQ<1, 0, 0b01, 0b1101, 1, IIC_VFMULQ, "vmul", "f16",
4281                     v8f16, v8f16, fmul, 1>,
4282                Requires<[HasNEON,HasFullFP16]>;
4283defm VMULsl   : N3VSL_HS<0b1000, "vmul", mul>;
4284def  VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>;
4285def  VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32,
4286                       v2f32, fmul>;
4287def  VMULslhd : N3VDSL16<0b01, 0b1001, "vmul", "f16", v4f16, fmul>,
4288                Requires<[HasNEON,HasFullFP16]>;
4289def  VMULslhq : N3VQSL16<0b01, 0b1001, "vmul", "f16", v8f16,
4290                       v4f16, fmul>,
4291                Requires<[HasNEON,HasFullFP16]>;
4292
4293let Predicates = [HasNEON] in {
4294def : Pat<(v8i16 (mul (v8i16 QPR:$src1),
4295                      (v8i16 (ARMvduplane (v8i16 QPR:$src2), imm:$lane)))),
4296          (v8i16 (VMULslv8i16 (v8i16 QPR:$src1),
4297                              (v4i16 (EXTRACT_SUBREG QPR:$src2,
4298                                      (DSubReg_i16_reg imm:$lane))),
4299                              (SubReg_i16_lane imm:$lane)))>;
4300def : Pat<(v4i32 (mul (v4i32 QPR:$src1),
4301                      (v4i32 (ARMvduplane (v4i32 QPR:$src2), imm:$lane)))),
4302          (v4i32 (VMULslv4i32 (v4i32 QPR:$src1),
4303                              (v2i32 (EXTRACT_SUBREG QPR:$src2,
4304                                      (DSubReg_i32_reg imm:$lane))),
4305                              (SubReg_i32_lane imm:$lane)))>;
4306def : Pat<(v4f32 (fmul (v4f32 QPR:$src1),
4307                       (v4f32 (ARMvduplane (v4f32 QPR:$src2), imm:$lane)))),
4308          (v4f32 (VMULslfq (v4f32 QPR:$src1),
4309                           (v2f32 (EXTRACT_SUBREG QPR:$src2,
4310                                   (DSubReg_i32_reg imm:$lane))),
4311                           (SubReg_i32_lane imm:$lane)))>;
4312def : Pat<(v8f16 (fmul (v8f16 QPR:$src1),
4313                       (v8f16 (ARMvduplane (v8f16 QPR:$src2), imm:$lane)))),
4314          (v8f16 (VMULslhq(v8f16 QPR:$src1),
4315                           (v4f16 (EXTRACT_SUBREG QPR:$src2,
4316                                   (DSubReg_i16_reg imm:$lane))),
4317                           (SubReg_i16_lane imm:$lane)))>;
4318
4319def : Pat<(v2f32 (fmul DPR:$Rn, (ARMvdup (f32 SPR:$Rm)))),
4320          (VMULslfd DPR:$Rn,
4321            (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0),
4322            (i32 0))>;
4323def : Pat<(v4f16 (fmul DPR:$Rn, (ARMvdup (f16 HPR:$Rm)))),
4324          (VMULslhd DPR:$Rn,
4325            (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), (f16 HPR:$Rm), ssub_0),
4326            (i32 0))>;
4327def : Pat<(v4f32 (fmul QPR:$Rn, (ARMvdup (f32 SPR:$Rm)))),
4328          (VMULslfq QPR:$Rn,
4329            (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0),
4330            (i32 0))>;
4331def : Pat<(v8f16 (fmul QPR:$Rn, (ARMvdup (f16 HPR:$Rm)))),
4332          (VMULslhq QPR:$Rn,
4333            (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), (f16 HPR:$Rm), ssub_0),
4334            (i32 0))>;
4335}
4336
4337//   VQDMULH  : Vector Saturating Doubling Multiply Returning High Half
4338defm VQDMULH  : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D,
4339                          IIC_VMULi16Q, IIC_VMULi32Q,
4340                          "vqdmulh", "s", int_arm_neon_vqdmulh, 1>;
4341defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D,
4342                            IIC_VMULi16Q, IIC_VMULi32Q,
4343                            "vqdmulh", "s",  int_arm_neon_vqdmulh>;
4344
4345let Predicates = [HasNEON] in {
4346def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1),
4347                                       (v8i16 (ARMvduplane (v8i16 QPR:$src2),
4348                                                            imm:$lane)))),
4349          (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1),
4350                                 (v4i16 (EXTRACT_SUBREG QPR:$src2,
4351                                         (DSubReg_i16_reg imm:$lane))),
4352                                 (SubReg_i16_lane imm:$lane)))>;
4353def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1),
4354                                       (v4i32 (ARMvduplane (v4i32 QPR:$src2),
4355                                                            imm:$lane)))),
4356          (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1),
4357                                 (v2i32 (EXTRACT_SUBREG QPR:$src2,
4358                                         (DSubReg_i32_reg imm:$lane))),
4359                                 (SubReg_i32_lane imm:$lane)))>;
4360}
4361
4362//   VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half
4363defm VQRDMULH   : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm,
4364                            IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q,
4365                            "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>;
4366defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D,
4367                              IIC_VMULi16Q, IIC_VMULi32Q,
4368                              "vqrdmulh", "s",  int_arm_neon_vqrdmulh>;
4369
4370let Predicates = [HasNEON] in {
4371def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1),
4372                                        (v8i16 (ARMvduplane (v8i16 QPR:$src2),
4373                                                             imm:$lane)))),
4374          (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1),
4375                                  (v4i16 (EXTRACT_SUBREG QPR:$src2,
4376                                          (DSubReg_i16_reg imm:$lane))),
4377                                  (SubReg_i16_lane imm:$lane)))>;
4378def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1),
4379                                        (v4i32 (ARMvduplane (v4i32 QPR:$src2),
4380                                                             imm:$lane)))),
4381          (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1),
4382                                  (v2i32 (EXTRACT_SUBREG QPR:$src2,
4383                                          (DSubReg_i32_reg imm:$lane))),
4384                                  (SubReg_i32_lane imm:$lane)))>;
4385}
4386
4387//   VMULL    : Vector Multiply Long (integer and polynomial) (Q = D * D)
4388let PostEncoderMethod = "NEONThumb2DataIPostEncoder",
4389    DecoderNamespace = "NEONData" in {
4390  defm VMULLs   : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
4391                           "vmull", "s", ARMvmulls, 1>;
4392  defm VMULLu   : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
4393                           "vmull", "u", ARMvmullu, 1>;
4394  def  VMULLp8   :  N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8",
4395                            v8i16, v8i8, int_arm_neon_vmullp, 1>;
4396  def  VMULLp64  : N3VLIntnp<0b00101, 0b10, 0b1110, 0, 0, NoItinerary,
4397                          "vmull", "p64", v2i64, v1i64, int_arm_neon_vmullp, 1>,
4398                    Requires<[HasV8, HasAES]>;
4399}
4400defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", ARMvmulls>;
4401defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", ARMvmullu>;
4402
4403//   VQDMULL  : Vector Saturating Doubling Multiply Long (Q = D * D)
4404defm VQDMULL  : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D,
4405                           "vqdmull", "s", int_arm_neon_vqdmull, 1>;
4406defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D,
4407                             "vqdmull", "s", int_arm_neon_vqdmull>;
4408
4409// Vector Multiply-Accumulate and Multiply-Subtract Operations.
4410
4411//   VMLA     : Vector Multiply Accumulate (integer and floating-point)
4412defm VMLA     : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
4413                             IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
4414def  VMLAfd   : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
4415                          v2f32, fmul_su, fadd_mlx>,
4416                Requires<[HasNEON, UseFPVMLx]>;
4417def  VMLAfq   : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
4418                          v4f32, fmul_su, fadd_mlx>,
4419                Requires<[HasNEON, UseFPVMLx]>;
4420def  VMLAhd   : N3VDMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACD, "vmla", "f16",
4421                          v4f16, fmul_su, fadd_mlx>,
4422                Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4423def  VMLAhq   : N3VQMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACQ, "vmla", "f16",
4424                          v8f16, fmul_su, fadd_mlx>,
4425                Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4426defm VMLAsl   : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
4427                              IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
4428def  VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
4429                            v2f32, fmul_su, fadd_mlx>,
4430                Requires<[HasNEON, UseFPVMLx]>;
4431def  VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32",
4432                            v4f32, v2f32, fmul_su, fadd_mlx>,
4433                Requires<[HasNEON, UseFPVMLx]>;
4434def  VMLAslhd : N3VDMulOpSL16<0b01, 0b0001, IIC_VMACD, "vmla", "f16",
4435                            v4f16, fmul, fadd>,
4436                Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4437def  VMLAslhq : N3VQMulOpSL16<0b01, 0b0001, IIC_VMACQ, "vmla", "f16",
4438                            v8f16, v4f16, fmul, fadd>,
4439                Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4440
4441let Predicates = [HasNEON] in {
4442def : Pat<(v8i16 (add (v8i16 QPR:$src1),
4443                  (mul (v8i16 QPR:$src2),
4444                       (v8i16 (ARMvduplane (v8i16 QPR:$src3), imm:$lane))))),
4445          (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
4446                              (v4i16 (EXTRACT_SUBREG QPR:$src3,
4447                                      (DSubReg_i16_reg imm:$lane))),
4448                              (SubReg_i16_lane imm:$lane)))>;
4449
4450def : Pat<(v4i32 (add (v4i32 QPR:$src1),
4451                  (mul (v4i32 QPR:$src2),
4452                       (v4i32 (ARMvduplane (v4i32 QPR:$src3), imm:$lane))))),
4453          (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
4454                              (v2i32 (EXTRACT_SUBREG QPR:$src3,
4455                                      (DSubReg_i32_reg imm:$lane))),
4456                              (SubReg_i32_lane imm:$lane)))>;
4457}
4458
4459def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1),
4460                  (fmul_su (v4f32 QPR:$src2),
4461                        (v4f32 (ARMvduplane (v4f32 QPR:$src3), imm:$lane))))),
4462          (v4f32 (VMLAslfq (v4f32 QPR:$src1),
4463                           (v4f32 QPR:$src2),
4464                           (v2f32 (EXTRACT_SUBREG QPR:$src3,
4465                                   (DSubReg_i32_reg imm:$lane))),
4466                           (SubReg_i32_lane imm:$lane)))>,
4467          Requires<[HasNEON, UseFPVMLx]>;
4468
4469//   VMLAL    : Vector Multiply Accumulate Long (Q += D * D)
4470defm VMLALs   : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
4471                              "vmlal", "s", ARMvmulls, add>;
4472defm VMLALu   : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
4473                              "vmlal", "u", ARMvmullu, add>;
4474
4475defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", ARMvmulls, add>;
4476defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", ARMvmullu, add>;
4477
4478let Predicates = [HasNEON, HasV8_1a] in {
4479  // v8.1a Neon Rounding Double Multiply-Op vector operations,
4480  // VQRDMLAH : Vector Saturating Rounding Doubling Multiply Accumulate Long
4481  //            (Q += D * D)
4482  defm VQRDMLAH : N3VInt3_HS<1, 0, 0b1011, 1, IIC_VMACi16D, IIC_VMACi32D,
4483                             IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s",
4484                             null_frag>;
4485  def : Pat<(v4i16 (saddsat
4486                     (v4i16 DPR:$src1),
4487                     (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn),
4488                                                   (v4i16 DPR:$Vm))))),
4489            (v4i16 (VQRDMLAHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
4490  def : Pat<(v2i32 (saddsat
4491                     (v2i32 DPR:$src1),
4492                     (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn),
4493                                                   (v2i32 DPR:$Vm))))),
4494            (v2i32 (VQRDMLAHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
4495  def : Pat<(v8i16 (saddsat
4496                     (v8i16 QPR:$src1),
4497                     (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn),
4498                                                   (v8i16 QPR:$Vm))))),
4499            (v8i16 (VQRDMLAHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
4500  def : Pat<(v4i32 (saddsat
4501                     (v4i32 QPR:$src1),
4502                     (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn),
4503                                                   (v4i32 QPR:$Vm))))),
4504            (v4i32 (VQRDMLAHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
4505
4506  defm VQRDMLAHsl : N3VMulOpSL_HS<0b1110, IIC_VMACi16D, IIC_VMACi32D,
4507                                  IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlah", "s",
4508                                  null_frag>;
4509  def : Pat<(v4i16 (saddsat
4510                     (v4i16 DPR:$src1),
4511                     (v4i16 (int_arm_neon_vqrdmulh
4512                              (v4i16 DPR:$Vn),
4513                              (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm),
4514                                                   imm:$lane)))))),
4515            (v4i16 (VQRDMLAHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm,
4516                                    imm:$lane))>;
4517  def : Pat<(v2i32 (saddsat
4518                     (v2i32 DPR:$src1),
4519                     (v2i32 (int_arm_neon_vqrdmulh
4520                              (v2i32 DPR:$Vn),
4521                              (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm),
4522                                                   imm:$lane)))))),
4523            (v2i32 (VQRDMLAHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
4524                                    imm:$lane))>;
4525  def : Pat<(v8i16 (saddsat
4526                     (v8i16 QPR:$src1),
4527                     (v8i16 (int_arm_neon_vqrdmulh
4528                              (v8i16 QPR:$src2),
4529                              (v8i16 (ARMvduplane (v8i16 QPR:$src3),
4530                                                   imm:$lane)))))),
4531            (v8i16 (VQRDMLAHslv8i16 (v8i16 QPR:$src1),
4532                                    (v8i16 QPR:$src2),
4533                                    (v4i16 (EXTRACT_SUBREG
4534                                             QPR:$src3,
4535                                             (DSubReg_i16_reg imm:$lane))),
4536                                    (SubReg_i16_lane imm:$lane)))>;
4537  def : Pat<(v4i32 (saddsat
4538                     (v4i32 QPR:$src1),
4539                     (v4i32 (int_arm_neon_vqrdmulh
4540                              (v4i32 QPR:$src2),
4541                              (v4i32 (ARMvduplane (v4i32 QPR:$src3),
4542                                                   imm:$lane)))))),
4543            (v4i32 (VQRDMLAHslv4i32 (v4i32 QPR:$src1),
4544                                    (v4i32 QPR:$src2),
4545                                    (v2i32 (EXTRACT_SUBREG
4546                                             QPR:$src3,
4547                                             (DSubReg_i32_reg imm:$lane))),
4548                                    (SubReg_i32_lane imm:$lane)))>;
4549
4550  //   VQRDMLSH : Vector Saturating Rounding Doubling Multiply Subtract Long
4551  //              (Q -= D * D)
4552  defm VQRDMLSH : N3VInt3_HS<1, 0, 0b1100, 1, IIC_VMACi16D, IIC_VMACi32D,
4553                             IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s",
4554                             null_frag>;
4555  def : Pat<(v4i16 (ssubsat
4556                     (v4i16 DPR:$src1),
4557                     (v4i16 (int_arm_neon_vqrdmulh (v4i16 DPR:$Vn),
4558                                                   (v4i16 DPR:$Vm))))),
4559            (v4i16 (VQRDMLSHv4i16 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
4560  def : Pat<(v2i32 (ssubsat
4561                     (v2i32 DPR:$src1),
4562                     (v2i32 (int_arm_neon_vqrdmulh (v2i32 DPR:$Vn),
4563                                                   (v2i32 DPR:$Vm))))),
4564            (v2i32 (VQRDMLSHv2i32 DPR:$src1, DPR:$Vn, DPR:$Vm))>;
4565  def : Pat<(v8i16 (ssubsat
4566                     (v8i16 QPR:$src1),
4567                     (v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$Vn),
4568                                                   (v8i16 QPR:$Vm))))),
4569            (v8i16 (VQRDMLSHv8i16 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
4570  def : Pat<(v4i32 (ssubsat
4571                     (v4i32 QPR:$src1),
4572                     (v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$Vn),
4573                                                   (v4i32 QPR:$Vm))))),
4574            (v4i32 (VQRDMLSHv4i32 QPR:$src1, QPR:$Vn, QPR:$Vm))>;
4575
4576  defm VQRDMLSHsl : N3VMulOpSL_HS<0b1111, IIC_VMACi16D, IIC_VMACi32D,
4577                                  IIC_VMACi16Q, IIC_VMACi32Q, "vqrdmlsh", "s",
4578                                  null_frag>;
4579  def : Pat<(v4i16 (ssubsat
4580                     (v4i16 DPR:$src1),
4581                     (v4i16 (int_arm_neon_vqrdmulh
4582                              (v4i16 DPR:$Vn),
4583                              (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm),
4584                                                   imm:$lane)))))),
4585            (v4i16 (VQRDMLSHslv4i16 DPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane))>;
4586  def : Pat<(v2i32 (ssubsat
4587                     (v2i32 DPR:$src1),
4588                     (v2i32 (int_arm_neon_vqrdmulh
4589                              (v2i32 DPR:$Vn),
4590                              (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm),
4591                                                   imm:$lane)))))),
4592            (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
4593                                    imm:$lane))>;
4594  def : Pat<(v8i16 (ssubsat
4595                     (v8i16 QPR:$src1),
4596                     (v8i16 (int_arm_neon_vqrdmulh
4597                              (v8i16 QPR:$src2),
4598                              (v8i16 (ARMvduplane (v8i16 QPR:$src3),
4599                                                   imm:$lane)))))),
4600            (v8i16 (VQRDMLSHslv8i16 (v8i16 QPR:$src1),
4601                                    (v8i16 QPR:$src2),
4602                                    (v4i16 (EXTRACT_SUBREG
4603                                             QPR:$src3,
4604                                             (DSubReg_i16_reg imm:$lane))),
4605                                    (SubReg_i16_lane imm:$lane)))>;
4606  def : Pat<(v4i32 (ssubsat
4607                     (v4i32 QPR:$src1),
4608                     (v4i32 (int_arm_neon_vqrdmulh
4609                              (v4i32 QPR:$src2),
4610                              (v4i32 (ARMvduplane (v4i32 QPR:$src3),
4611                                                    imm:$lane)))))),
4612            (v4i32 (VQRDMLSHslv4i32 (v4i32 QPR:$src1),
4613                                    (v4i32 QPR:$src2),
4614                                    (v2i32 (EXTRACT_SUBREG
4615                                             QPR:$src3,
4616                                             (DSubReg_i32_reg imm:$lane))),
4617                                    (SubReg_i32_lane imm:$lane)))>;
4618}
4619//   VQDMLAL  : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D)
4620defm VQDMLAL  : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
4621                            "vqdmlal", "s", null_frag>;
4622defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", null_frag>;
4623
4624let Predicates = [HasNEON] in {
4625def : Pat<(v4i32 (saddsat (v4i32 QPR:$src1),
4626                     (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
4627                                                  (v4i16 DPR:$Vm))))),
4628          (VQDMLALv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
4629def : Pat<(v2i64 (saddsat (v2i64 QPR:$src1),
4630                     (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
4631                                                  (v2i32 DPR:$Vm))))),
4632          (VQDMLALv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
4633def : Pat<(v4i32 (saddsat (v4i32 QPR:$src1),
4634                     (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
4635                                (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm),
4636                                                     imm:$lane)))))),
4637          (VQDMLALslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>;
4638def : Pat<(v2i64 (saddsat (v2i64 QPR:$src1),
4639                     (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
4640                                (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm),
4641                                                     imm:$lane)))))),
4642          (VQDMLALslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>;
4643}
4644
4645//   VMLS     : Vector Multiply Subtract (integer and floating-point)
4646defm VMLS     : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
4647                             IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
4648def  VMLSfd   : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
4649                          v2f32, fmul_su, fsub_mlx>,
4650                Requires<[HasNEON, UseFPVMLx]>;
4651def  VMLSfq   : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
4652                          v4f32, fmul_su, fsub_mlx>,
4653                Requires<[HasNEON, UseFPVMLx]>;
4654def  VMLShd   : N3VDMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACD, "vmls", "f16",
4655                          v4f16, fmul, fsub>,
4656                Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4657def  VMLShq   : N3VQMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACQ, "vmls", "f16",
4658                          v8f16, fmul, fsub>,
4659                Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4660defm VMLSsl   : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
4661                              IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
4662def  VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
4663                            v2f32, fmul_su, fsub_mlx>,
4664                Requires<[HasNEON, UseFPVMLx]>;
4665def  VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32",
4666                            v4f32, v2f32, fmul_su, fsub_mlx>,
4667                Requires<[HasNEON, UseFPVMLx]>;
4668def  VMLSslhd : N3VDMulOpSL16<0b01, 0b0101, IIC_VMACD, "vmls", "f16",
4669                            v4f16, fmul, fsub>,
4670                Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4671def  VMLSslhq : N3VQMulOpSL16<0b01, 0b0101, IIC_VMACQ, "vmls", "f16",
4672                            v8f16, v4f16, fmul, fsub>,
4673                Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
4674
4675let Predicates = [HasNEON] in {
4676def : Pat<(v8i16 (sub (v8i16 QPR:$src1),
4677                  (mul (v8i16 QPR:$src2),
4678                       (v8i16 (ARMvduplane (v8i16 QPR:$src3), imm:$lane))))),
4679          (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
4680                              (v4i16 (EXTRACT_SUBREG QPR:$src3,
4681                                      (DSubReg_i16_reg imm:$lane))),
4682                              (SubReg_i16_lane imm:$lane)))>;
4683
4684def : Pat<(v4i32 (sub (v4i32 QPR:$src1),
4685                  (mul (v4i32 QPR:$src2),
4686                     (v4i32 (ARMvduplane (v4i32 QPR:$src3), imm:$lane))))),
4687          (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
4688                              (v2i32 (EXTRACT_SUBREG QPR:$src3,
4689                                      (DSubReg_i32_reg imm:$lane))),
4690                              (SubReg_i32_lane imm:$lane)))>;
4691}
4692
4693def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1),
4694                  (fmul_su (v4f32 QPR:$src2),
4695                        (v4f32 (ARMvduplane (v4f32 QPR:$src3), imm:$lane))))),
4696          (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2),
4697                           (v2f32 (EXTRACT_SUBREG QPR:$src3,
4698                                   (DSubReg_i32_reg imm:$lane))),
4699                           (SubReg_i32_lane imm:$lane)))>,
4700          Requires<[HasNEON, UseFPVMLx]>;
4701
4702//   VMLSL    : Vector Multiply Subtract Long (Q -= D * D)
4703defm VMLSLs   : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
4704                              "vmlsl", "s", ARMvmulls, sub>;
4705defm VMLSLu   : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
4706                              "vmlsl", "u", ARMvmullu, sub>;
4707
4708defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", ARMvmulls, sub>;
4709defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", ARMvmullu, sub>;
4710
4711//   VQDMLSL  : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
4712defm VQDMLSL  : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D,
4713                            "vqdmlsl", "s", null_frag>;
4714defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b0111, "vqdmlsl", "s", null_frag>;
4715
4716let Predicates = [HasNEON] in {
4717def : Pat<(v4i32 (ssubsat (v4i32 QPR:$src1),
4718                     (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
4719                                                  (v4i16 DPR:$Vm))))),
4720          (VQDMLSLv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
4721def : Pat<(v2i64 (ssubsat (v2i64 QPR:$src1),
4722                     (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
4723                                                  (v2i32 DPR:$Vm))))),
4724          (VQDMLSLv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>;
4725def : Pat<(v4i32 (ssubsat (v4i32 QPR:$src1),
4726                     (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn),
4727                                (v4i16 (ARMvduplane (v4i16 DPR_8:$Vm),
4728                                                     imm:$lane)))))),
4729          (VQDMLSLslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>;
4730def : Pat<(v2i64 (ssubsat (v2i64 QPR:$src1),
4731                     (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn),
4732                                (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm),
4733                                                     imm:$lane)))))),
4734          (VQDMLSLslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>;
4735}
4736
4737// Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations.
4738def  VFMAfd   : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32",
4739                          v2f32, fmul_su, fadd_mlx>,
4740                Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
4741
4742def  VFMAfq   : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32",
4743                          v4f32, fmul_su, fadd_mlx>,
4744                Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
4745def  VFMAhd   : N3VDMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACD, "vfma", "f16",
4746                          v4f16, fmul, fadd>,
4747                Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
4748
4749def  VFMAhq   : N3VQMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACQ, "vfma", "f16",
4750                          v8f16, fmul, fadd>,
4751                Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
4752
4753//   Fused Vector Multiply Subtract (floating-point)
4754def  VFMSfd   : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32",
4755                          v2f32, fmul_su, fsub_mlx>,
4756                Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
4757def  VFMSfq   : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32",
4758                          v4f32, fmul_su, fsub_mlx>,
4759                Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
4760def  VFMShd   : N3VDMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACD, "vfms", "f16",
4761                          v4f16, fmul, fsub>,
4762                Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
4763def  VFMShq   : N3VQMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACQ, "vfms", "f16",
4764                          v8f16, fmul, fsub>,
4765                Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
4766
4767// Match @llvm.fma.* intrinsics
4768def : Pat<(v4f16 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
4769          (VFMAhd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
4770          Requires<[HasNEON,HasFullFP16]>;
4771def : Pat<(v8f16 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)),
4772          (VFMAhq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
4773          Requires<[HasNEON,HasFullFP16]>;
4774def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
4775          (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
4776          Requires<[HasNEON,HasVFP4]>;
4777def : Pat<(v4f32 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)),
4778          (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
4779          Requires<[HasNEON,HasVFP4]>;
4780def : Pat<(v2f32 (fma (fneg DPR:$Vn), DPR:$Vm, DPR:$src1)),
4781          (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
4782      Requires<[HasNEON,HasVFP4]>;
4783def : Pat<(v4f32 (fma (fneg QPR:$Vn), QPR:$Vm, QPR:$src1)),
4784          (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
4785      Requires<[HasNEON,HasVFP4]>;
4786
4787// ARMv8.2a dot product instructions.
4788// We put them in the VFPV8 decoder namespace because the ARM and Thumb
4789// encodings are the same and thus no further bit twiddling is necessary
4790// in the disassembler.
4791class VDOT<bit op6, bit op4, bit op23, RegisterClass RegTy, string Asm,
4792           string AsmTy, ValueType AccumTy, ValueType InputTy,
4793           SDPatternOperator OpNode> :
4794      N3Vnp<{0b1100, op23}, 0b10, 0b1101, op6, op4, (outs RegTy:$dst),
4795            (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm), N3RegFrm, IIC_VDOTPROD,
4796            Asm, AsmTy,
4797            [(set (AccumTy RegTy:$dst),
4798                  (OpNode (AccumTy RegTy:$Vd),
4799                          (InputTy RegTy:$Vn),
4800                          (InputTy RegTy:$Vm)))]> {
4801  let Predicates = [HasDotProd];
4802  let DecoderNamespace = "VFPV8";
4803  let Constraints = "$dst = $Vd";
4804}
4805
4806def VUDOTD : VDOT<0, 1, 0, DPR, "vudot", "u8", v2i32, v8i8,  int_arm_neon_udot>;
4807def VSDOTD : VDOT<0, 0, 0, DPR, "vsdot", "s8", v2i32, v8i8,  int_arm_neon_sdot>;
4808def VUDOTQ : VDOT<1, 1, 0, QPR, "vudot", "u8", v4i32, v16i8, int_arm_neon_udot>;
4809def VSDOTQ : VDOT<1, 0, 0, QPR, "vsdot", "s8", v4i32, v16i8, int_arm_neon_sdot>;
4810
4811// Indexed dot product instructions:
4812multiclass DOTI<string opc, string dt, bit Q, bit U, RegisterClass Ty,
4813           ValueType AccumType, ValueType InputType, SDPatternOperator OpNode,
4814           dag RHS> {
4815  def "" : N3Vnp<0b11100, 0b10, 0b1101, Q, U, (outs Ty:$dst),
4816                 (ins Ty:$Vd, Ty:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
4817                 N3RegFrm, IIC_VDOTPROD, opc, dt, []> {
4818    bit lane;
4819    let Inst{5} = lane;
4820    let AsmString = !strconcat(opc, ".", dt, "\t$Vd, $Vn, $Vm$lane");
4821    let Constraints = "$dst = $Vd";
4822    let Predicates = [HasDotProd];
4823    let DecoderNamespace = "VFPV8";
4824  }
4825
4826  def : Pat<
4827    (AccumType (OpNode (AccumType Ty:$Vd),
4828                       (InputType Ty:$Vn),
4829                       (InputType (bitconvert (AccumType
4830                                  (ARMvduplane (AccumType Ty:$Vm),
4831                                                 VectorIndex32:$lane)))))),
4832    (!cast<Instruction>(NAME) Ty:$Vd, Ty:$Vn, RHS, VectorIndex32:$lane)>;
4833}
4834
4835defm VUDOTDI : DOTI<"vudot", "u8", 0b0, 0b1, DPR, v2i32, v8i8,
4836                    int_arm_neon_udot, (v2i32 DPR_VFP2:$Vm)>;
4837defm VSDOTDI : DOTI<"vsdot", "s8", 0b0, 0b0, DPR, v2i32, v8i8,
4838                    int_arm_neon_sdot, (v2i32 DPR_VFP2:$Vm)>;
4839defm VUDOTQI : DOTI<"vudot", "u8", 0b1, 0b1, QPR, v4i32, v16i8,
4840                    int_arm_neon_udot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>;
4841defm VSDOTQI : DOTI<"vsdot", "s8", 0b1, 0b0, QPR, v4i32, v16i8,
4842                    int_arm_neon_sdot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>;
4843
4844// v8.6A matrix multiplication extension
4845let Predicates = [HasMatMulInt8] in {
4846  class N3VMatMul<bit B, bit U, string Asm, string AsmTy,
4847                  SDPatternOperator OpNode>
4848        : N3Vnp<{0b1100, B}, 0b10, 0b1100, 1, U, (outs QPR:$dst),
4849                (ins QPR:$Vd, QPR:$Vn, QPR:$Vm), N3RegFrm, NoItinerary,
4850                Asm, AsmTy,
4851                [(set (v4i32 QPR:$dst), (OpNode (v4i32 QPR:$Vd),
4852                                                (v16i8 QPR:$Vn),
4853                                                (v16i8 QPR:$Vm)))]> {
4854    let DecoderNamespace = "VFPV8";
4855    let Constraints = "$dst = $Vd";
4856  }
4857
4858  multiclass N3VMixedDotLane<bit Q, bit U, string Asm, string AsmTy, RegisterClass RegTy,
4859                        ValueType AccumTy, ValueType InputTy, SDPatternOperator OpNode,
4860                        dag RHS> {
4861
4862    def "" : N3Vnp<0b11101, 0b00, 0b1101, Q, U, (outs RegTy:$dst),
4863                (ins RegTy:$Vd, RegTy:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), N3RegFrm,
4864                 NoItinerary, Asm, AsmTy, []> {
4865      bit lane;
4866      let Inst{5} = lane;
4867      let AsmString = !strconcat(Asm, ".", AsmTy, "\t$Vd, $Vn, $Vm$lane");
4868      let DecoderNamespace = "VFPV8";
4869      let Constraints = "$dst = $Vd";
4870    }
4871
4872    def : Pat<
4873      (AccumTy (OpNode (AccumTy RegTy:$Vd),
4874                       (InputTy RegTy:$Vn),
4875                       (InputTy (bitconvert (AccumTy
4876                                (ARMvduplane (AccumTy RegTy:$Vm),
4877                                              VectorIndex32:$lane)))))),
4878      (!cast<Instruction>(NAME) RegTy:$Vd, RegTy:$Vn, RHS, VectorIndex32:$lane)>;
4879
4880  }
4881
4882  multiclass SUDOTLane<bit Q, RegisterClass RegTy, ValueType AccumTy, ValueType InputTy, dag RHS>
4883        : N3VMixedDotLane<Q, 1, "vsudot", "u8", RegTy, AccumTy, InputTy, null_frag, null_frag> {
4884    def : Pat<
4885      (AccumTy (int_arm_neon_usdot (AccumTy RegTy:$Vd),
4886                                   (InputTy (bitconvert (AccumTy
4887                                            (ARMvduplane (AccumTy RegTy:$Vm),
4888                                                          VectorIndex32:$lane)))),
4889                                   (InputTy RegTy:$Vn))),
4890      (!cast<Instruction>(NAME) RegTy:$Vd, RegTy:$Vn, RHS, VectorIndex32:$lane)>;
4891  }
4892
4893  def VSMMLA  : N3VMatMul<0, 0, "vsmmla",  "s8", int_arm_neon_smmla>;
4894  def VUMMLA  : N3VMatMul<0, 1, "vummla",  "u8", int_arm_neon_ummla>;
4895  def VUSMMLA : N3VMatMul<1, 0, "vusmmla", "s8", int_arm_neon_usmmla>;
4896  def VUSDOTD : VDOT<0, 0, 1, DPR, "vusdot", "s8", v2i32, v8i8,  int_arm_neon_usdot>;
4897  def VUSDOTQ : VDOT<1, 0, 1, QPR, "vusdot", "s8", v4i32, v16i8, int_arm_neon_usdot>;
4898
4899  defm VUSDOTDI : N3VMixedDotLane<0, 0, "vusdot", "s8", DPR, v2i32, v8i8,
4900                                  int_arm_neon_usdot, (v2i32 DPR_VFP2:$Vm)>;
4901  defm VUSDOTQI : N3VMixedDotLane<1, 0, "vusdot", "s8", QPR, v4i32, v16i8,
4902                                  int_arm_neon_usdot, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>;
4903  defm VSUDOTDI : SUDOTLane<0, DPR, v2i32, v8i8, (v2i32 DPR_VFP2:$Vm)>;
4904  defm VSUDOTQI : SUDOTLane<1, QPR, v4i32, v16i8, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>;
4905}
4906
4907// ARMv8.3 complex operations
4908class BaseN3VCP8ComplexTied<bit op21, bit op4, bit s, bit q,
4909                            InstrItinClass itin, dag oops, dag iops,
4910                            string opc, string dt, list<dag> pattern>
4911  : N3VCP8<{?,?}, {op21,s}, q, op4, oops,
4912           iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "$src1 = $Vd", pattern>{
4913  bits<2> rot;
4914  let Inst{24-23} = rot;
4915}
4916
4917class BaseN3VCP8ComplexOdd<bit op23, bit op21, bit op4, bit s, bit q,
4918                           InstrItinClass itin, dag oops, dag iops, string opc,
4919                            string dt, list<dag> pattern>
4920  : N3VCP8<{?,op23}, {op21,s}, q, op4, oops,
4921           iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "", pattern> {
4922  bits<1> rot;
4923  let Inst{24} = rot;
4924}
4925
4926class BaseN3VCP8ComplexTiedLane32<bit op4, bit s, bit q, InstrItinClass itin,
4927                                  dag oops, dag iops, string opc, string dt,
4928                                  list<dag> pattern>
4929  : N3VLaneCP8<s, {?,?}, q, op4, oops, iops, itin, opc, dt,
4930               "$Vd, $Vn, $Vm$lane, $rot", "$src1 = $Vd", pattern> {
4931  bits<2> rot;
4932  bit lane;
4933
4934  let Inst{21-20} = rot;
4935  let Inst{5} = lane;
4936}
4937
4938class BaseN3VCP8ComplexTiedLane64<bit op4, bit s, bit q, InstrItinClass itin,
4939                            dag oops, dag iops, string opc, string dt,
4940                            list<dag> pattern>
4941  : N3VLaneCP8<s, {?,?}, q, op4, oops, iops, itin, opc, dt,
4942               "$Vd, $Vn, $Vm$lane, $rot", "$src1 = $Vd", pattern> {
4943  bits<2> rot;
4944  bit lane;
4945
4946  let Inst{21-20} = rot;
4947  let Inst{5} = Vm{4};
4948  // This is needed because the lane operand does not have any bits in the
4949  // encoding (it only has one possible value), so we need to manually set it
4950  // to it's default value.
4951  let DecoderMethod = "DecodeNEONComplexLane64Instruction";
4952}
4953
4954multiclass N3VCP8ComplexTied<bit op21, bit op4,
4955                       string OpcodeStr, SDPatternOperator Op> {
4956  let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
4957  def v4f16 : BaseN3VCP8ComplexTied<op21, op4, 0, 0, IIC_VMACD, (outs DPR:$Vd),
4958              (ins DPR:$src1, DPR:$Vn, DPR:$Vm, complexrotateop:$rot),
4959              OpcodeStr, "f16", []>;
4960  def v8f16 : BaseN3VCP8ComplexTied<op21, op4, 0, 1, IIC_VMACQ, (outs QPR:$Vd),
4961              (ins QPR:$src1, QPR:$Vn, QPR:$Vm, complexrotateop:$rot),
4962              OpcodeStr, "f16", []>;
4963  }
4964  let Predicates = [HasNEON,HasV8_3a] in {
4965  def v2f32 : BaseN3VCP8ComplexTied<op21, op4, 1, 0, IIC_VMACD, (outs DPR:$Vd),
4966              (ins DPR:$src1, DPR:$Vn, DPR:$Vm, complexrotateop:$rot),
4967              OpcodeStr, "f32", []>;
4968  def v4f32 : BaseN3VCP8ComplexTied<op21, op4, 1, 1, IIC_VMACQ, (outs QPR:$Vd),
4969              (ins QPR:$src1, QPR:$Vn, QPR:$Vm, complexrotateop:$rot),
4970              OpcodeStr, "f32", []>;
4971  }
4972}
4973
4974multiclass N3VCP8ComplexOdd<bit op23, bit op21, bit op4,
4975                       string OpcodeStr, SDPatternOperator Op> {
4976  let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
4977  def v4f16 : BaseN3VCP8ComplexOdd<op23, op21, op4, 0, 0, IIC_VMACD,
4978              (outs DPR:$Vd),
4979              (ins DPR:$Vn, DPR:$Vm, complexrotateopodd:$rot),
4980              OpcodeStr, "f16", []>;
4981  def v8f16 : BaseN3VCP8ComplexOdd<op23, op21, op4, 0, 1, IIC_VMACQ,
4982              (outs QPR:$Vd),
4983              (ins QPR:$Vn, QPR:$Vm, complexrotateopodd:$rot),
4984              OpcodeStr, "f16", []>;
4985  }
4986  let Predicates = [HasNEON,HasV8_3a] in {
4987  def v2f32 : BaseN3VCP8ComplexOdd<op23, op21, op4, 1, 0, IIC_VMACD,
4988              (outs DPR:$Vd),
4989              (ins DPR:$Vn, DPR:$Vm, complexrotateopodd:$rot),
4990              OpcodeStr, "f32", []>;
4991  def v4f32 : BaseN3VCP8ComplexOdd<op23, op21, op4, 1, 1, IIC_VMACQ,
4992              (outs QPR:$Vd),
4993              (ins QPR:$Vn, QPR:$Vm, complexrotateopodd:$rot),
4994              OpcodeStr, "f32", []>;
4995  }
4996}
4997
4998// These instructions index by pairs of lanes, so the VectorIndexes are twice
4999// as wide as the data types.
5000multiclass N3VCP8ComplexTiedLane<bit op4, string OpcodeStr,
5001                                 SDPatternOperator Op> {
5002  let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
5003  def v4f16_indexed : BaseN3VCP8ComplexTiedLane32<op4, 0, 0, IIC_VMACD,
5004                      (outs DPR:$Vd),
5005                      (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
5006                      VectorIndex32:$lane, complexrotateop:$rot),
5007                      OpcodeStr, "f16", []>;
5008  def v8f16_indexed : BaseN3VCP8ComplexTiedLane32<op4, 0, 1, IIC_VMACQ,
5009                      (outs QPR:$Vd),
5010                      (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm,
5011                      VectorIndex32:$lane, complexrotateop:$rot),
5012                      OpcodeStr, "f16", []>;
5013  }
5014  let Predicates = [HasNEON,HasV8_3a] in {
5015  def v2f32_indexed : BaseN3VCP8ComplexTiedLane64<op4, 1, 0, IIC_VMACD,
5016                      (outs DPR:$Vd),
5017                      (ins DPR:$src1, DPR:$Vn, DPR:$Vm, VectorIndex64:$lane,
5018                      complexrotateop:$rot),
5019                      OpcodeStr, "f32", []>;
5020  def v4f32_indexed : BaseN3VCP8ComplexTiedLane64<op4, 1, 1, IIC_VMACQ,
5021                      (outs QPR:$Vd),
5022                      (ins QPR:$src1, QPR:$Vn, DPR:$Vm, VectorIndex64:$lane,
5023                      complexrotateop:$rot),
5024                      OpcodeStr, "f32", []>;
5025  }
5026}
5027
5028defm VCMLA : N3VCP8ComplexTied<1, 0, "vcmla", null_frag>;
5029defm VCADD : N3VCP8ComplexOdd<1, 0, 0, "vcadd", null_frag>;
5030defm VCMLA : N3VCP8ComplexTiedLane<0, "vcmla", null_frag>;
5031
5032let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
5033  def : Pat<(v4f16 (int_arm_neon_vcadd_rot90 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm))),
5034            (VCADDv4f16 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm), (i32 0))>;
5035  def : Pat<(v4f16 (int_arm_neon_vcadd_rot270 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm))),
5036            (VCADDv4f16 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm), (i32 1))>;
5037  def : Pat<(v8f16 (int_arm_neon_vcadd_rot90 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm))),
5038            (VCADDv8f16 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm), (i32 0))>;
5039  def : Pat<(v8f16 (int_arm_neon_vcadd_rot270 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm))),
5040            (VCADDv8f16 (v8f16 QPR:$Rn), (v8f16 QPR:$Rm), (i32 1))>;
5041}
5042let Predicates = [HasNEON,HasV8_3a] in {
5043  def : Pat<(v2f32 (int_arm_neon_vcadd_rot90 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm))),
5044            (VCADDv2f32 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm), (i32 0))>;
5045  def : Pat<(v2f32 (int_arm_neon_vcadd_rot270 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm))),
5046            (VCADDv2f32 (v2f32 DPR:$Rn), (v2f32 DPR:$Rm), (i32 1))>;
5047  def : Pat<(v4f32 (int_arm_neon_vcadd_rot90 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm))),
5048            (VCADDv4f32 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm), (i32 0))>;
5049  def : Pat<(v4f32 (int_arm_neon_vcadd_rot270 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm))),
5050            (VCADDv4f32 (v4f32 QPR:$Rn), (v4f32 QPR:$Rm), (i32 1))>;
5051}
5052
5053// Vector Subtract Operations.
5054
5055//   VSUB     : Vector Subtract (integer and floating-point)
5056defm VSUB     : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ,
5057                         "vsub", "i", sub, 0>;
5058def  VSUBfd   : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32",
5059                     v2f32, v2f32, fsub, 0>;
5060def  VSUBfq   : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32",
5061                     v4f32, v4f32, fsub, 0>;
5062def  VSUBhd   : N3VD<0, 0, 0b11, 0b1101, 0, IIC_VBIND, "vsub", "f16",
5063                     v4f16, v4f16, fsub, 0>,
5064                Requires<[HasNEON,HasFullFP16]>;
5065def  VSUBhq   : N3VQ<0, 0, 0b11, 0b1101, 0, IIC_VBINQ, "vsub", "f16",
5066                     v8f16, v8f16, fsub, 0>,
5067                Requires<[HasNEON,HasFullFP16]>;
5068//   VSUBL    : Vector Subtract Long (Q = D - D)
5069defm VSUBLs   : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
5070                            "vsubl", "s", sub, sext, 0>;
5071defm VSUBLu   : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
5072                            "vsubl", "u", sub, zanyext, 0>;
5073//   VSUBW    : Vector Subtract Wide (Q = Q - D)
5074defm VSUBWs   : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>;
5075defm VSUBWu   : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zanyext, 0>;
5076//   VHSUB    : Vector Halving Subtract
5077defm VHSUBs   : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm,
5078                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5079                           "vhsub", "s", int_arm_neon_vhsubs, 0>;
5080defm VHSUBu   : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm,
5081                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5082                           "vhsub", "u", int_arm_neon_vhsubu, 0>;
5083//   VQSUB    : Vector Saturing Subtract
5084defm VQSUBs   : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm,
5085                            IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5086                            "vqsub", "s", ssubsat, 0>;
5087defm VQSUBu   : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm,
5088                            IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5089                            "vqsub", "u", usubsat, 0>;
5090//   VSUBHN   : Vector Subtract and Narrow Returning High Half (D = Q - Q)
5091defm VSUBHN   : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", null_frag, 0>;
5092//   VRSUBHN  : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q)
5093defm VRSUBHN  : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i",
5094                            int_arm_neon_vrsubhn, 0>;
5095
5096let Predicates = [HasNEON] in {
5097def : Pat<(v8i8  (trunc (ARMvshruImm (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))),
5098          (VSUBHNv8i8 QPR:$Vn, QPR:$Vm)>;
5099def : Pat<(v4i16 (trunc (ARMvshruImm (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))),
5100          (VSUBHNv4i16 QPR:$Vn, QPR:$Vm)>;
5101def : Pat<(v2i32 (trunc (ARMvshruImm (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))),
5102          (VSUBHNv2i32 QPR:$Vn, QPR:$Vm)>;
5103}
5104
5105// Vector Comparisons.
5106
5107//   VCEQ     : Vector Compare Equal
5108defm VCEQ     : N3V_QHS_cmp<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
5109                            IIC_VSUBi4Q, "vceq", "i", ARMCCeq, 1>;
5110def  VCEQfd   : N3VD_cmp<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32,
5111                         ARMCCeq, 1>;
5112def  VCEQfq   : N3VQ_cmp<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
5113                         ARMCCeq, 1>;
5114def  VCEQhd   : N3VD_cmp<0,0,0b01,0b1110,0, IIC_VBIND, "vceq", "f16", v4i16, v4f16,
5115                         ARMCCeq, 1>,
5116                Requires<[HasNEON, HasFullFP16]>;
5117def  VCEQhq   : N3VQ_cmp<0,0,0b01,0b1110,0, IIC_VBINQ, "vceq", "f16", v8i16, v8f16,
5118                         ARMCCeq, 1>,
5119                Requires<[HasNEON, HasFullFP16]>;
5120
5121let TwoOperandAliasConstraint = "$Vm = $Vd" in
5122defm VCEQz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
5123                            "$Vd, $Vm, #0", ARMCCeq>;
5124
5125//   VCGE     : Vector Compare Greater Than or Equal
5126defm VCGEs    : N3V_QHS_cmp<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
5127                            IIC_VSUBi4Q, "vcge", "s", ARMCCge, 0>;
5128defm VCGEu    : N3V_QHS_cmp<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
5129                            IIC_VSUBi4Q, "vcge", "u", ARMCChs, 0>;
5130def  VCGEfd   : N3VD_cmp<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32,
5131                         ARMCCge, 0>;
5132def  VCGEfq   : N3VQ_cmp<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
5133                         ARMCCge, 0>;
5134def  VCGEhd   : N3VD_cmp<1,0,0b01,0b1110,0, IIC_VBIND, "vcge", "f16", v4i16, v4f16,
5135                         ARMCCge, 0>,
5136                Requires<[HasNEON, HasFullFP16]>;
5137def  VCGEhq   : N3VQ_cmp<1,0,0b01,0b1110,0, IIC_VBINQ, "vcge", "f16", v8i16, v8f16,
5138                         ARMCCge, 0>,
5139                Requires<[HasNEON, HasFullFP16]>;
5140
5141let TwoOperandAliasConstraint = "$Vm = $Vd" in {
5142defm VCGEz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s",
5143                            "$Vd, $Vm, #0", ARMCCge>;
5144defm VCLEz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s",
5145                            "$Vd, $Vm, #0", ARMCCle>;
5146}
5147
5148//   VCGT     : Vector Compare Greater Than
5149defm VCGTs    : N3V_QHS_cmp<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
5150                            IIC_VSUBi4Q, "vcgt", "s", ARMCCgt, 0>;
5151defm VCGTu    : N3V_QHS_cmp<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
5152                            IIC_VSUBi4Q, "vcgt", "u", ARMCChi, 0>;
5153def  VCGTfd   : N3VD_cmp<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32,
5154                         ARMCCgt, 0>;
5155def  VCGTfq   : N3VQ_cmp<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
5156                         ARMCCgt, 0>;
5157def  VCGThd   : N3VD_cmp<1,0,0b11,0b1110,0, IIC_VBIND, "vcgt", "f16", v4i16, v4f16,
5158                         ARMCCgt, 0>,
5159                Requires<[HasNEON, HasFullFP16]>;
5160def  VCGThq   : N3VQ_cmp<1,0,0b11,0b1110,0, IIC_VBINQ, "vcgt", "f16", v8i16, v8f16,
5161                         ARMCCgt, 0>,
5162                Requires<[HasNEON, HasFullFP16]>;
5163
5164let TwoOperandAliasConstraint = "$Vm = $Vd" in {
5165defm VCGTz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s",
5166                            "$Vd, $Vm, #0", ARMCCgt>;
5167defm VCLTz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
5168                            "$Vd, $Vm, #0", ARMCClt>;
5169}
5170
5171//   VACGE    : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
5172def  VACGEfd   : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
5173                        "f32", v2i32, v2f32, int_arm_neon_vacge, 0>;
5174def  VACGEfq   : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
5175                        "f32", v4i32, v4f32, int_arm_neon_vacge, 0>;
5176def  VACGEhd   : N3VDInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
5177                        "f16", v4i16, v4f16, int_arm_neon_vacge, 0>,
5178                 Requires<[HasNEON, HasFullFP16]>;
5179def  VACGEhq   : N3VQInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
5180                        "f16", v8i16, v8f16, int_arm_neon_vacge, 0>,
5181                 Requires<[HasNEON, HasFullFP16]>;
5182//   VACGT    : Vector Absolute Compare Greater Than (aka VCAGT)
5183def  VACGTfd   : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
5184                        "f32", v2i32, v2f32, int_arm_neon_vacgt, 0>;
5185def  VACGTfq   : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
5186                        "f32", v4i32, v4f32, int_arm_neon_vacgt, 0>;
5187def  VACGThd   : N3VDInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
5188                        "f16", v4i16, v4f16, int_arm_neon_vacgt, 0>,
5189                 Requires<[HasNEON, HasFullFP16]>;
5190def  VACGThq   : N3VQInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
5191                        "f16", v8i16, v8f16, int_arm_neon_vacgt, 0>,
5192                 Requires<[HasNEON, HasFullFP16]>;
5193//   VTST     : Vector Test Bits
5194defm VTST     : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
5195                        IIC_VBINi4Q, "vtst", "", NEONvtst, 1>;
5196
5197def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
5198                   (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
5199def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
5200                   (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
5201def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
5202                   (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
5203def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
5204                   (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
5205let Predicates = [HasNEON, HasFullFP16] in {
5206def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm",
5207                   (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
5208def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm",
5209                   (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
5210def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm",
5211                   (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
5212def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm",
5213                   (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
5214}
5215
5216// +fp16fml Floating Point Multiplication Variants
5217let Predicates = [HasNEON, HasFP16FML], DecoderNamespace= "VFPV8" in {
5218
5219class N3VCP8F16Q1<string asm, RegisterClass Td, RegisterClass Tn,
5220                RegisterClass Tm, bits<2> op1, bits<2> op2, bit op3>
5221  : N3VCP8<op1, op2, 1, op3, (outs Td:$Vd), (ins Tn:$Vn, Tm:$Vm), NoItinerary,
5222           asm, "f16", "$Vd, $Vn, $Vm", "", []>;
5223
5224class N3VCP8F16Q0<string asm, RegisterClass Td, RegisterClass Tn,
5225                RegisterClass Tm, bits<2> op1, bits<2> op2, bit op3>
5226  : N3VCP8Q0<op1, op2, 0, op3, (outs Td:$Vd), (ins Tn:$Vn, Tm:$Vm), NoItinerary,
5227           asm, "f16", "$Vd, $Vn, $Vm", "", []>;
5228
5229// Vd, Vs, Vs[0-15], Idx[0-1]
5230class VFMD<string opc, string type, bits<2> S>
5231  : N3VLaneCP8<0, S, 0, 1, (outs DPR:$Vd),
5232               (ins SPR:$Vn, SPR_8:$Vm, VectorIndex32:$idx),
5233               IIC_VMACD, opc, type, "$Vd, $Vn, $Vm$idx", "", []> {
5234  bit idx;
5235  let Inst{3} = idx;
5236  let Inst{19-16} = Vn{4-1};
5237  let Inst{7}     = Vn{0};
5238  let Inst{5}     = Vm{0};
5239  let Inst{2-0}   = Vm{3-1};
5240}
5241
5242// Vq, Vd, Vd[0-7], Idx[0-3]
5243class VFMQ<string opc, string type, bits<2> S>
5244  : N3VLaneCP8<0, S, 1, 1, (outs QPR:$Vd),
5245               (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$idx),
5246               IIC_VMACD, opc, type, "$Vd, $Vn, $Vm$idx", "", []> {
5247  bits<2> idx;
5248  let Inst{5} = idx{1};
5249  let Inst{3} = idx{0};
5250}
5251
5252//                                                op1   op2   op3
5253def VFMALD  : N3VCP8F16Q0<"vfmal", DPR, SPR, SPR, 0b00, 0b10, 1>;
5254def VFMSLD  : N3VCP8F16Q0<"vfmsl", DPR, SPR, SPR, 0b01, 0b10, 1>;
5255def VFMALQ  : N3VCP8F16Q1<"vfmal", QPR, DPR, DPR, 0b00, 0b10, 1>;
5256def VFMSLQ  : N3VCP8F16Q1<"vfmsl", QPR, DPR, DPR, 0b01, 0b10, 1>;
5257def VFMALDI : VFMD<"vfmal", "f16", 0b00>;
5258def VFMSLDI : VFMD<"vfmsl", "f16", 0b01>;
5259def VFMALQI : VFMQ<"vfmal", "f16", 0b00>;
5260def VFMSLQI : VFMQ<"vfmsl", "f16", 0b01>;
5261} // HasNEON, HasFP16FML
5262
5263
5264def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
5265                   (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
5266def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
5267                   (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
5268def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
5269                   (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
5270def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
5271                   (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
5272let Predicates = [HasNEON, HasFullFP16] in {
5273def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm",
5274                   (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
5275def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm",
5276                   (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
5277def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm",
5278                   (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
5279def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm",
5280                   (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
5281}
5282
5283// Vector Bitwise Operations.
5284
5285def vnotd : PatFrag<(ops node:$in),
5286                    (xor node:$in, ARMimmAllOnesD)>;
5287def vnotq : PatFrag<(ops node:$in),
5288                    (xor node:$in, ARMimmAllOnesV)>;
5289
5290
5291//   VAND     : Vector Bitwise AND
5292def  VANDd    : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand",
5293                      v2i32, v2i32, and, 1>;
5294def  VANDq    : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand",
5295                      v4i32, v4i32, and, 1>;
5296
5297//   VEOR     : Vector Bitwise Exclusive OR
5298def  VEORd    : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor",
5299                      v2i32, v2i32, xor, 1>;
5300def  VEORq    : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor",
5301                      v4i32, v4i32, xor, 1>;
5302
5303//   VORR     : Vector Bitwise OR
5304def  VORRd    : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr",
5305                      v2i32, v2i32, or, 1>;
5306def  VORRq    : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr",
5307                      v4i32, v4i32, or, 1>;
5308
5309def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1,
5310                          (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src),
5311                          IIC_VMOVImm,
5312                          "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
5313                          [(set DPR:$Vd,
5314                            (v4i16 (ARMvorrImm DPR:$src, timm:$SIMM)))]> {
5315  let Inst{9} = SIMM{9};
5316}
5317
5318def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1,
5319                          (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src),
5320                          IIC_VMOVImm,
5321                          "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
5322                          [(set DPR:$Vd,
5323                            (v2i32 (ARMvorrImm DPR:$src, timm:$SIMM)))]> {
5324  let Inst{10-9} = SIMM{10-9};
5325}
5326
5327def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1,
5328                          (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src),
5329                          IIC_VMOVImm,
5330                          "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
5331                          [(set QPR:$Vd,
5332                            (v8i16 (ARMvorrImm QPR:$src, timm:$SIMM)))]> {
5333  let Inst{9} = SIMM{9};
5334}
5335
5336def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1,
5337                          (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src),
5338                          IIC_VMOVImm,
5339                          "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
5340                          [(set QPR:$Vd,
5341                            (v4i32 (ARMvorrImm QPR:$src, timm:$SIMM)))]> {
5342  let Inst{10-9} = SIMM{10-9};
5343}
5344
5345
5346//   VBIC     : Vector Bitwise Bit Clear (AND NOT)
5347let TwoOperandAliasConstraint = "$Vn = $Vd" in {
5348def  VBICd    : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
5349                     (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
5350                     "vbic", "$Vd, $Vn, $Vm", "",
5351                     [(set DPR:$Vd, (v2i32 (and DPR:$Vn,
5352                                                 (vnotd DPR:$Vm))))]>;
5353def  VBICq    : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
5354                     (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
5355                     "vbic", "$Vd, $Vn, $Vm", "",
5356                     [(set QPR:$Vd, (v4i32 (and QPR:$Vn,
5357                                                 (vnotq QPR:$Vm))))]>;
5358}
5359
5360def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1,
5361                          (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src),
5362                          IIC_VMOVImm,
5363                          "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
5364                          [(set DPR:$Vd,
5365                            (v4i16 (ARMvbicImm DPR:$src, timm:$SIMM)))]> {
5366  let Inst{9} = SIMM{9};
5367}
5368
5369def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1,
5370                          (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src),
5371                          IIC_VMOVImm,
5372                          "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
5373                          [(set DPR:$Vd,
5374                            (v2i32 (ARMvbicImm DPR:$src, timm:$SIMM)))]> {
5375  let Inst{10-9} = SIMM{10-9};
5376}
5377
5378def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1,
5379                          (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src),
5380                          IIC_VMOVImm,
5381                          "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
5382                          [(set QPR:$Vd,
5383                            (v8i16 (ARMvbicImm QPR:$src, timm:$SIMM)))]> {
5384  let Inst{9} = SIMM{9};
5385}
5386
5387def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1,
5388                          (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src),
5389                          IIC_VMOVImm,
5390                          "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
5391                          [(set QPR:$Vd,
5392                            (v4i32 (ARMvbicImm QPR:$src, timm:$SIMM)))]> {
5393  let Inst{10-9} = SIMM{10-9};
5394}
5395
5396//   VORN     : Vector Bitwise OR NOT
5397def  VORNd    : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd),
5398                     (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
5399                     "vorn", "$Vd, $Vn, $Vm", "",
5400                     [(set DPR:$Vd, (v2i32 (or DPR:$Vn,
5401                                                (vnotd DPR:$Vm))))]>;
5402def  VORNq    : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd),
5403                     (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
5404                     "vorn", "$Vd, $Vn, $Vm", "",
5405                     [(set QPR:$Vd, (v4i32 (or QPR:$Vn,
5406                                                (vnotq QPR:$Vm))))]>;
5407
5408//   VMVN     : Vector Bitwise NOT (Immediate)
5409
5410let isReMaterializable = 1 in {
5411
5412def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd),
5413                         (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
5414                         "vmvn", "i16", "$Vd, $SIMM", "",
5415                         [(set DPR:$Vd, (v4i16 (ARMvmvnImm timm:$SIMM)))]> {
5416  let Inst{9} = SIMM{9};
5417}
5418
5419def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd),
5420                         (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
5421                         "vmvn", "i16", "$Vd, $SIMM", "",
5422                         [(set QPR:$Vd, (v8i16 (ARMvmvnImm timm:$SIMM)))]> {
5423  let Inst{9} = SIMM{9};
5424}
5425
5426def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd),
5427                         (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
5428                         "vmvn", "i32", "$Vd, $SIMM", "",
5429                         [(set DPR:$Vd, (v2i32 (ARMvmvnImm timm:$SIMM)))]> {
5430  let Inst{11-8} = SIMM{11-8};
5431}
5432
5433def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd),
5434                         (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
5435                         "vmvn", "i32", "$Vd, $SIMM", "",
5436                         [(set QPR:$Vd, (v4i32 (ARMvmvnImm timm:$SIMM)))]> {
5437  let Inst{11-8} = SIMM{11-8};
5438}
5439}
5440
5441//   VMVN     : Vector Bitwise NOT
5442def  VMVNd    : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0,
5443                     (outs DPR:$Vd), (ins DPR:$Vm), IIC_VSUBiD,
5444                     "vmvn", "$Vd, $Vm", "",
5445                     [(set DPR:$Vd, (v2i32 (vnotd DPR:$Vm)))]>;
5446def  VMVNq    : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0,
5447                     (outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD,
5448                     "vmvn", "$Vd, $Vm", "",
5449                     [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>;
5450let Predicates = [HasNEON] in {
5451def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>;
5452def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>;
5453}
5454
5455// The TwoAddress pass will not go looking for equivalent operations
5456// with different register constraints; it just inserts copies.
5457// That is why pseudo VBSP implemented. Is is expanded later into
5458// VBIT/VBIF/VBSL taking into account register constraints to avoid copies.
5459def  VBSPd
5460  : PseudoNeonI<(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
5461                IIC_VBINiD, "",
5462                [(set DPR:$Vd,
5463                      (v2i32 (NEONvbsp DPR:$src1, DPR:$Vn, DPR:$Vm)))]>;
5464let Predicates = [HasNEON] in {
5465def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1),
5466                                   (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))),
5467          (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
5468def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1),
5469                                    (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))),
5470          (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
5471def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1),
5472                                    (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))),
5473          (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
5474def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1),
5475                                    (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))),
5476          (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
5477def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1),
5478                                    (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))),
5479          (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
5480
5481def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd),
5482                     (and DPR:$Vm, (vnotd DPR:$Vd)))),
5483          (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
5484
5485def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd),
5486                     (and DPR:$Vm, (vnotd DPR:$Vd)))),
5487          (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
5488}
5489
5490def  VBSPq
5491  : PseudoNeonI<(outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
5492                IIC_VBINiQ, "",
5493                [(set QPR:$Vd,
5494                      (v4i32 (NEONvbsp QPR:$src1, QPR:$Vn, QPR:$Vm)))]>;
5495let Predicates = [HasNEON] in {
5496def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1),
5497                                   (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))),
5498          (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
5499def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1),
5500                                    (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))),
5501          (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
5502def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1),
5503                                    (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))),
5504          (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
5505def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1),
5506                                    (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))),
5507          (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
5508def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1),
5509                                    (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))),
5510          (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
5511
5512def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd),
5513                     (and QPR:$Vm, (vnotq QPR:$Vd)))),
5514          (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
5515def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd),
5516                     (and QPR:$Vm, (vnotq QPR:$Vd)))),
5517          (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
5518}
5519
5520//   VBSL     : Vector Bitwise Select
5521def  VBSLd    : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
5522                     (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
5523                     N3RegFrm, IIC_VBINiD,
5524                     "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5525                     []>;
5526
5527def  VBSLq    : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
5528                     (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
5529                     N3RegFrm, IIC_VBINiQ,
5530                     "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5531                     []>;
5532
5533//   VBIF     : Vector Bitwise Insert if False
5534//              like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
5535def  VBIFd    : N3VX<1, 0, 0b11, 0b0001, 0, 1,
5536                     (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
5537                     N3RegFrm, IIC_VBINiD,
5538                     "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5539                     []>;
5540def  VBIFq    : N3VX<1, 0, 0b11, 0b0001, 1, 1,
5541                     (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
5542                     N3RegFrm, IIC_VBINiQ,
5543                     "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5544                     []>;
5545
5546//   VBIT     : Vector Bitwise Insert if True
5547//              like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst",
5548def  VBITd    : N3VX<1, 0, 0b10, 0b0001, 0, 1,
5549                     (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
5550                     N3RegFrm, IIC_VBINiD,
5551                     "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5552                     []>;
5553def  VBITq    : N3VX<1, 0, 0b10, 0b0001, 1, 1,
5554                     (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
5555                     N3RegFrm, IIC_VBINiQ,
5556                     "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5557                     []>;
5558
5559// Vector Absolute Differences.
5560
5561//   VABD     : Vector Absolute Difference
5562defm VABDs    : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm,
5563                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5564                           "vabd", "s", int_arm_neon_vabds, 1>;
5565defm VABDu    : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm,
5566                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5567                           "vabd", "u", int_arm_neon_vabdu, 1>;
5568def  VABDfd   : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND,
5569                        "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>;
5570def  VABDfq   : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ,
5571                        "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>;
5572def  VABDhd   : N3VDInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBIND,
5573                        "vabd", "f16", v4f16, v4f16, int_arm_neon_vabds, 1>,
5574                Requires<[HasNEON, HasFullFP16]>;
5575def  VABDhq   : N3VQInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBINQ,
5576                        "vabd", "f16", v8f16, v8f16, int_arm_neon_vabds, 1>,
5577                Requires<[HasNEON, HasFullFP16]>;
5578
5579//   VABDL    : Vector Absolute Difference Long (Q = | D - D |)
5580defm VABDLs   : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
5581                               "vabdl", "s", int_arm_neon_vabds, zext, 1>;
5582defm VABDLu   : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q,
5583                               "vabdl", "u", int_arm_neon_vabdu, zext, 1>;
5584
5585let Predicates = [HasNEON] in {
5586def : Pat<(v8i16 (abs (sub (zext (v8i8 DPR:$opA)), (zext (v8i8 DPR:$opB))))),
5587          (VABDLuv8i16 DPR:$opA, DPR:$opB)>;
5588def : Pat<(v4i32 (abs (sub (zext (v4i16 DPR:$opA)), (zext (v4i16 DPR:$opB))))),
5589          (VABDLuv4i32 DPR:$opA, DPR:$opB)>;
5590}
5591
5592// ISD::ABS is not legal for v2i64, so VABDL needs to be matched from the
5593// shift/xor pattern for ABS.
5594
5595def abd_shr :
5596    PatFrag<(ops node:$in1, node:$in2, node:$shift),
5597            (ARMvshrsImm (sub (zext node:$in1),
5598                            (zext node:$in2)), (i32 $shift))>;
5599
5600let Predicates = [HasNEON] in {
5601def : Pat<(xor (v4i32 (bitconvert (v2i64 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))),
5602               (v4i32 (bitconvert (v2i64 (add (sub (zext (v2i32 DPR:$opA)),
5603                                                   (zext (v2i32 DPR:$opB))),
5604                                         (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))))),
5605          (VABDLuv2i64 DPR:$opA, DPR:$opB)>;
5606}
5607
5608//   VABA     : Vector Absolute Difference and Accumulate
5609defm VABAs    : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
5610                             "vaba", "s", int_arm_neon_vabds, add>;
5611defm VABAu    : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
5612                             "vaba", "u", int_arm_neon_vabdu, add>;
5613
5614//   VABAL    : Vector Absolute Difference and Accumulate Long (Q += | D - D |)
5615defm VABALs   : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD,
5616                                 "vabal", "s", int_arm_neon_vabds, zext, add>;
5617defm VABALu   : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD,
5618                                 "vabal", "u", int_arm_neon_vabdu, zext, add>;
5619
5620// Vector Maximum and Minimum.
5621
5622//   VMAX     : Vector Maximum
5623defm VMAXs    : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm,
5624                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5625                           "vmax", "s", smax, 1>;
5626defm VMAXu    : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm,
5627                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5628                           "vmax", "u", umax, 1>;
5629def  VMAXfd   : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND,
5630                        "vmax", "f32",
5631                        v2f32, v2f32, fmaximum, 1>;
5632def  VMAXfq   : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ,
5633                        "vmax", "f32",
5634                        v4f32, v4f32, fmaximum, 1>;
5635def  VMAXhd   : N3VDInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBIND,
5636                        "vmax", "f16",
5637                        v4f16, v4f16, fmaximum, 1>,
5638                Requires<[HasNEON, HasFullFP16]>;
5639def  VMAXhq   : N3VQInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBINQ,
5640                        "vmax", "f16",
5641                        v8f16, v8f16, fmaximum, 1>,
5642                Requires<[HasNEON, HasFullFP16]>;
5643
5644// VMAXNM
5645let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
5646  def NEON_VMAXNMNDf  : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1,
5647                                  N3RegFrm, NoItinerary, "vmaxnm", "f32",
5648                                  v2f32, v2f32, fmaxnum, 1>,
5649                                  Requires<[HasV8, HasNEON]>;
5650  def NEON_VMAXNMNQf  : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1,
5651                                  N3RegFrm, NoItinerary, "vmaxnm", "f32",
5652                                  v4f32, v4f32, fmaxnum, 1>,
5653                                  Requires<[HasV8, HasNEON]>;
5654  def NEON_VMAXNMNDh  : N3VDIntnp<0b00110, 0b01, 0b1111, 0, 1,
5655                                  N3RegFrm, NoItinerary, "vmaxnm", "f16",
5656                                  v4f16, v4f16, fmaxnum, 1>,
5657                                  Requires<[HasV8, HasNEON, HasFullFP16]>;
5658  def NEON_VMAXNMNQh  : N3VQIntnp<0b00110, 0b01, 0b1111, 1, 1,
5659                                  N3RegFrm, NoItinerary, "vmaxnm", "f16",
5660                                  v8f16, v8f16, fmaxnum, 1>,
5661                                  Requires<[HasV8, HasNEON, HasFullFP16]>;
5662}
5663
5664//   VMIN     : Vector Minimum
5665defm VMINs    : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm,
5666                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5667                           "vmin", "s", smin, 1>;
5668defm VMINu    : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm,
5669                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
5670                           "vmin", "u", umin, 1>;
5671def  VMINfd   : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND,
5672                        "vmin", "f32",
5673                        v2f32, v2f32, fminimum, 1>;
5674def  VMINfq   : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ,
5675                        "vmin", "f32",
5676                        v4f32, v4f32, fminimum, 1>;
5677def  VMINhd   : N3VDInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBIND,
5678                        "vmin", "f16",
5679                        v4f16, v4f16, fminimum, 1>,
5680                Requires<[HasNEON, HasFullFP16]>;
5681def  VMINhq   : N3VQInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBINQ,
5682                        "vmin", "f16",
5683                        v8f16, v8f16, fminimum, 1>,
5684                Requires<[HasNEON, HasFullFP16]>;
5685
5686// VMINNM
5687let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
5688  def NEON_VMINNMNDf  : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1,
5689                                  N3RegFrm, NoItinerary, "vminnm", "f32",
5690                                  v2f32, v2f32, fminnum, 1>,
5691                                  Requires<[HasV8, HasNEON]>;
5692  def NEON_VMINNMNQf  : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1,
5693                                  N3RegFrm, NoItinerary, "vminnm", "f32",
5694                                  v4f32, v4f32, fminnum, 1>,
5695                                  Requires<[HasV8, HasNEON]>;
5696  def NEON_VMINNMNDh  : N3VDIntnp<0b00110, 0b11, 0b1111, 0, 1,
5697                                  N3RegFrm, NoItinerary, "vminnm", "f16",
5698                                  v4f16, v4f16, fminnum, 1>,
5699                                  Requires<[HasV8, HasNEON, HasFullFP16]>;
5700  def NEON_VMINNMNQh  : N3VQIntnp<0b00110, 0b11, 0b1111, 1, 1,
5701                                  N3RegFrm, NoItinerary, "vminnm", "f16",
5702                                  v8f16, v8f16, fminnum, 1>,
5703                                  Requires<[HasV8, HasNEON, HasFullFP16]>;
5704}
5705
5706// Vector Pairwise Operations.
5707
5708//   VPADD    : Vector Pairwise Add
5709def  VPADDi8  : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
5710                        "vpadd", "i8",
5711                        v8i8, v8i8, int_arm_neon_vpadd, 0>;
5712def  VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
5713                        "vpadd", "i16",
5714                        v4i16, v4i16, int_arm_neon_vpadd, 0>;
5715def  VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
5716                        "vpadd", "i32",
5717                        v2i32, v2i32, int_arm_neon_vpadd, 0>;
5718def  VPADDf   : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm,
5719                        IIC_VPBIND, "vpadd", "f32",
5720                        v2f32, v2f32, int_arm_neon_vpadd, 0>;
5721def  VPADDh   : N3VDInt<1, 0, 0b01, 0b1101, 0, N3RegFrm,
5722                        IIC_VPBIND, "vpadd", "f16",
5723                        v4f16, v4f16, int_arm_neon_vpadd, 0>,
5724                Requires<[HasNEON, HasFullFP16]>;
5725
5726//   VPADDL   : Vector Pairwise Add Long
5727defm VPADDLs  : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s",
5728                             int_arm_neon_vpaddls>;
5729defm VPADDLu  : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u",
5730                             int_arm_neon_vpaddlu>;
5731
5732//   VPADAL   : Vector Pairwise Add and Accumulate Long
5733defm VPADALs  : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s",
5734                              int_arm_neon_vpadals>;
5735defm VPADALu  : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u",
5736                              int_arm_neon_vpadalu>;
5737
5738//   VPMAX    : Vector Pairwise Maximum
5739def  VPMAXs8  : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5740                        "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>;
5741def  VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5742                        "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>;
5743def  VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5744                        "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>;
5745def  VPMAXu8  : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5746                        "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>;
5747def  VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5748                        "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>;
5749def  VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
5750                        "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>;
5751def  VPMAXf   : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax",
5752                        "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>;
5753def  VPMAXh   : N3VDInt<1, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax",
5754                        "f16", v4f16, v4f16, int_arm_neon_vpmaxs, 0>,
5755                Requires<[HasNEON, HasFullFP16]>;
5756
5757//   VPMIN    : Vector Pairwise Minimum
5758def  VPMINs8  : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5759                        "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>;
5760def  VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5761                        "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>;
5762def  VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5763                        "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>;
5764def  VPMINu8  : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5765                        "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>;
5766def  VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5767                        "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>;
5768def  VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
5769                        "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>;
5770def  VPMINf   : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin",
5771                        "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>;
5772def  VPMINh   : N3VDInt<1, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin",
5773                        "f16", v4f16, v4f16, int_arm_neon_vpmins, 0>,
5774                Requires<[HasNEON, HasFullFP16]>;
5775
5776// Vector Reciprocal and Reciprocal Square Root Estimate and Step.
5777
5778//   VRECPE   : Vector Reciprocal Estimate
5779def  VRECPEd  : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
5780                        IIC_VUNAD, "vrecpe", "u32",
5781                        v2i32, v2i32, int_arm_neon_vrecpe>;
5782def  VRECPEq  : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
5783                        IIC_VUNAQ, "vrecpe", "u32",
5784                        v4i32, v4i32, int_arm_neon_vrecpe>;
5785def  VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
5786                        IIC_VUNAD, "vrecpe", "f32",
5787                        v2f32, v2f32, int_arm_neon_vrecpe>;
5788def  VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
5789                        IIC_VUNAQ, "vrecpe", "f32",
5790                        v4f32, v4f32, int_arm_neon_vrecpe>;
5791def  VRECPEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0,
5792                        IIC_VUNAD, "vrecpe", "f16",
5793                        v4f16, v4f16, int_arm_neon_vrecpe>,
5794                Requires<[HasNEON, HasFullFP16]>;
5795def  VRECPEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0,
5796                        IIC_VUNAQ, "vrecpe", "f16",
5797                        v8f16, v8f16, int_arm_neon_vrecpe>,
5798                Requires<[HasNEON, HasFullFP16]>;
5799
5800//   VRECPS   : Vector Reciprocal Step
5801def  VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
5802                        IIC_VRECSD, "vrecps", "f32",
5803                        v2f32, v2f32, int_arm_neon_vrecps, 1>;
5804def  VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
5805                        IIC_VRECSQ, "vrecps", "f32",
5806                        v4f32, v4f32, int_arm_neon_vrecps, 1>;
5807def  VRECPShd : N3VDInt<0, 0, 0b01, 0b1111, 1, N3RegFrm,
5808                        IIC_VRECSD, "vrecps", "f16",
5809                        v4f16, v4f16, int_arm_neon_vrecps, 1>,
5810                Requires<[HasNEON, HasFullFP16]>;
5811def  VRECPShq : N3VQInt<0, 0, 0b01, 0b1111, 1, N3RegFrm,
5812                        IIC_VRECSQ, "vrecps", "f16",
5813                        v8f16, v8f16, int_arm_neon_vrecps, 1>,
5814                Requires<[HasNEON, HasFullFP16]>;
5815
5816//   VRSQRTE  : Vector Reciprocal Square Root Estimate
5817def  VRSQRTEd  : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
5818                         IIC_VUNAD, "vrsqrte", "u32",
5819                         v2i32, v2i32, int_arm_neon_vrsqrte>;
5820def  VRSQRTEq  : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
5821                         IIC_VUNAQ, "vrsqrte", "u32",
5822                         v4i32, v4i32, int_arm_neon_vrsqrte>;
5823def  VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
5824                         IIC_VUNAD, "vrsqrte", "f32",
5825                         v2f32, v2f32, int_arm_neon_vrsqrte>;
5826def  VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
5827                         IIC_VUNAQ, "vrsqrte", "f32",
5828                         v4f32, v4f32, int_arm_neon_vrsqrte>;
5829def  VRSQRTEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0,
5830                         IIC_VUNAD, "vrsqrte", "f16",
5831                         v4f16, v4f16, int_arm_neon_vrsqrte>,
5832                Requires<[HasNEON, HasFullFP16]>;
5833def  VRSQRTEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0,
5834                         IIC_VUNAQ, "vrsqrte", "f16",
5835                         v8f16, v8f16, int_arm_neon_vrsqrte>,
5836                Requires<[HasNEON, HasFullFP16]>;
5837
5838//   VRSQRTS  : Vector Reciprocal Square Root Step
5839def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
5840                        IIC_VRECSD, "vrsqrts", "f32",
5841                        v2f32, v2f32, int_arm_neon_vrsqrts, 1>;
5842def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
5843                        IIC_VRECSQ, "vrsqrts", "f32",
5844                        v4f32, v4f32, int_arm_neon_vrsqrts, 1>;
5845def VRSQRTShd : N3VDInt<0, 0, 0b11, 0b1111, 1, N3RegFrm,
5846                        IIC_VRECSD, "vrsqrts", "f16",
5847                        v4f16, v4f16, int_arm_neon_vrsqrts, 1>,
5848                Requires<[HasNEON, HasFullFP16]>;
5849def VRSQRTShq : N3VQInt<0, 0, 0b11, 0b1111, 1, N3RegFrm,
5850                        IIC_VRECSQ, "vrsqrts", "f16",
5851                        v8f16, v8f16, int_arm_neon_vrsqrts, 1>,
5852                Requires<[HasNEON, HasFullFP16]>;
5853
5854// Vector Shifts.
5855
5856//   VSHL     : Vector Shift
5857defm VSHLs    : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm,
5858                            IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
5859                            "vshl", "s", int_arm_neon_vshifts>;
5860defm VSHLu    : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm,
5861                            IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
5862                            "vshl", "u", int_arm_neon_vshiftu>;
5863
5864let Predicates = [HasNEON] in {
5865def : Pat<(v8i8 (ARMvshls (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))),
5866          (VSHLsv8i8 DPR:$Dn, DPR:$Dm)>;
5867def : Pat<(v4i16 (ARMvshls (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))),
5868          (VSHLsv4i16 DPR:$Dn, DPR:$Dm)>;
5869def : Pat<(v2i32 (ARMvshls (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))),
5870          (VSHLsv2i32 DPR:$Dn, DPR:$Dm)>;
5871def : Pat<(v1i64 (ARMvshls (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))),
5872          (VSHLsv1i64 DPR:$Dn, DPR:$Dm)>;
5873def : Pat<(v16i8 (ARMvshls (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))),
5874          (VSHLsv16i8 QPR:$Dn, QPR:$Dm)>;
5875def : Pat<(v8i16 (ARMvshls (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))),
5876          (VSHLsv8i16 QPR:$Dn, QPR:$Dm)>;
5877def : Pat<(v4i32 (ARMvshls (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))),
5878          (VSHLsv4i32 QPR:$Dn, QPR:$Dm)>;
5879def : Pat<(v2i64 (ARMvshls (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))),
5880          (VSHLsv2i64 QPR:$Dn, QPR:$Dm)>;
5881
5882def : Pat<(v8i8 (ARMvshlu (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))),
5883          (VSHLuv8i8 DPR:$Dn, DPR:$Dm)>;
5884def : Pat<(v4i16 (ARMvshlu (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))),
5885          (VSHLuv4i16 DPR:$Dn, DPR:$Dm)>;
5886def : Pat<(v2i32 (ARMvshlu (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))),
5887          (VSHLuv2i32 DPR:$Dn, DPR:$Dm)>;
5888def : Pat<(v1i64 (ARMvshlu (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))),
5889          (VSHLuv1i64 DPR:$Dn, DPR:$Dm)>;
5890def : Pat<(v16i8 (ARMvshlu (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))),
5891          (VSHLuv16i8 QPR:$Dn, QPR:$Dm)>;
5892def : Pat<(v8i16 (ARMvshlu (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))),
5893          (VSHLuv8i16 QPR:$Dn, QPR:$Dm)>;
5894def : Pat<(v4i32 (ARMvshlu (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))),
5895          (VSHLuv4i32 QPR:$Dn, QPR:$Dm)>;
5896def : Pat<(v2i64 (ARMvshlu (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))),
5897          (VSHLuv2i64 QPR:$Dn, QPR:$Dm)>;
5898
5899}
5900
5901//   VSHL     : Vector Shift Left (Immediate)
5902defm VSHLi    : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", ARMvshlImm>;
5903
5904//   VSHR     : Vector Shift Right (Immediate)
5905defm VSHRs    : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs",
5906                            ARMvshrsImm>;
5907defm VSHRu    : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu",
5908                            ARMvshruImm>;
5909
5910//   VSHLL    : Vector Shift Left Long
5911defm VSHLLs   : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s",
5912  PatFrag<(ops node:$LHS, node:$RHS), (ARMvshlImm (sext node:$LHS), node:$RHS)>>;
5913defm VSHLLu   : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u",
5914  PatFrag<(ops node:$LHS, node:$RHS), (ARMvshlImm (zext node:$LHS), node:$RHS)>>;
5915
5916//   VSHLL    : Vector Shift Left Long (with maximum shift count)
5917class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
5918                bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy,
5919                ValueType OpTy, Operand ImmTy>
5920  : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt,
5921           ResTy, OpTy, ImmTy, null_frag> {
5922  let Inst{21-16} = op21_16;
5923  let DecoderMethod = "DecodeVSHLMaxInstruction";
5924}
5925def  VSHLLi8  : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8",
5926                          v8i16, v8i8, imm8>;
5927def  VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16",
5928                          v4i32, v4i16, imm16>;
5929def  VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32",
5930                          v2i64, v2i32, imm32>;
5931
5932let Predicates = [HasNEON] in {
5933def : Pat<(v8i16 (ARMvshlImm (zext (v8i8 DPR:$Rn)), (i32 8))),
5934          (VSHLLi8 DPR:$Rn, 8)>;
5935def : Pat<(v4i32 (ARMvshlImm (zext (v4i16 DPR:$Rn)), (i32 16))),
5936          (VSHLLi16 DPR:$Rn, 16)>;
5937def : Pat<(v2i64 (ARMvshlImm (zext (v2i32 DPR:$Rn)), (i32 32))),
5938          (VSHLLi32 DPR:$Rn, 32)>;
5939def : Pat<(v8i16 (ARMvshlImm (sext (v8i8 DPR:$Rn)), (i32 8))),
5940          (VSHLLi8 DPR:$Rn, 8)>;
5941def : Pat<(v4i32 (ARMvshlImm (sext (v4i16 DPR:$Rn)), (i32 16))),
5942          (VSHLLi16 DPR:$Rn, 16)>;
5943def : Pat<(v2i64 (ARMvshlImm (sext (v2i32 DPR:$Rn)), (i32 32))),
5944          (VSHLLi32 DPR:$Rn, 32)>;
5945def : Pat<(v8i16 (ARMvshlImm (anyext (v8i8 DPR:$Rn)), (i32 8))),
5946          (VSHLLi8 DPR:$Rn, 8)>;
5947def : Pat<(v4i32 (ARMvshlImm (anyext (v4i16 DPR:$Rn)), (i32 16))),
5948          (VSHLLi16 DPR:$Rn, 16)>;
5949def : Pat<(v2i64 (ARMvshlImm (anyext (v2i32 DPR:$Rn)), (i32 32))),
5950          (VSHLLi32 DPR:$Rn, 32)>;
5951}
5952
5953//   VSHRN    : Vector Shift Right and Narrow
5954defm VSHRN    : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i",
5955                           PatFrag<(ops node:$Rn, node:$amt),
5956                                   (trunc (ARMvshrsImm node:$Rn, node:$amt))>>;
5957
5958let Predicates = [HasNEON] in {
5959def : Pat<(v8i8 (trunc (ARMvshruImm (v8i16 QPR:$Vn), shr_imm8:$amt))),
5960          (VSHRNv8i8 QPR:$Vn, shr_imm8:$amt)>;
5961def : Pat<(v4i16 (trunc (ARMvshruImm (v4i32 QPR:$Vn), shr_imm16:$amt))),
5962          (VSHRNv4i16 QPR:$Vn, shr_imm16:$amt)>;
5963def : Pat<(v2i32 (trunc (ARMvshruImm (v2i64 QPR:$Vn), shr_imm32:$amt))),
5964          (VSHRNv2i32 QPR:$Vn, shr_imm32:$amt)>;
5965}
5966
5967//   VRSHL    : Vector Rounding Shift
5968defm VRSHLs   : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm,
5969                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5970                            "vrshl", "s", int_arm_neon_vrshifts>;
5971defm VRSHLu   : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm,
5972                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5973                            "vrshl", "u", int_arm_neon_vrshiftu>;
5974//   VRSHR    : Vector Rounding Shift Right
5975defm VRSHRs   : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", "VRSHRs",
5976                            NEONvrshrsImm>;
5977defm VRSHRu   : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", "VRSHRu",
5978                            NEONvrshruImm>;
5979
5980//   VRSHRN   : Vector Rounding Shift Right and Narrow
5981defm VRSHRN   : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i",
5982                           NEONvrshrnImm>;
5983
5984//   VQSHL    : Vector Saturating Shift
5985defm VQSHLs   : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm,
5986                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5987                            "vqshl", "s", int_arm_neon_vqshifts>;
5988defm VQSHLu   : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm,
5989                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
5990                            "vqshl", "u", int_arm_neon_vqshiftu>;
5991//   VQSHL    : Vector Saturating Shift Left (Immediate)
5992defm VQSHLsi  : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshlsImm>;
5993defm VQSHLui  : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshluImm>;
5994
5995//   VQSHLU   : Vector Saturating Shift Left (Immediate, Unsigned)
5996defm VQSHLsu  : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsuImm>;
5997
5998//   VQSHRN   : Vector Saturating Shift Right and Narrow
5999defm VQSHRNs  : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s",
6000                           NEONvqshrnsImm>;
6001defm VQSHRNu  : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u",
6002                           NEONvqshrnuImm>;
6003
6004//   VQSHRUN  : Vector Saturating Shift Right and Narrow (Unsigned)
6005defm VQSHRUN  : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s",
6006                           NEONvqshrnsuImm>;
6007
6008//   VQRSHL   : Vector Saturating Rounding Shift
6009defm VQRSHLs  : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm,
6010                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
6011                            "vqrshl", "s", int_arm_neon_vqrshifts>;
6012defm VQRSHLu  : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm,
6013                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
6014                            "vqrshl", "u", int_arm_neon_vqrshiftu>;
6015
6016//   VQRSHRN  : Vector Saturating Rounding Shift Right and Narrow
6017defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s",
6018                           NEONvqrshrnsImm>;
6019defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u",
6020                           NEONvqrshrnuImm>;
6021
6022//   VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned)
6023defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s",
6024                           NEONvqrshrnsuImm>;
6025
6026//   VSRA     : Vector Shift Right and Accumulate
6027defm VSRAs    : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", ARMvshrsImm>;
6028defm VSRAu    : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", ARMvshruImm>;
6029//   VRSRA    : Vector Rounding Shift Right and Accumulate
6030defm VRSRAs   : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrsImm>;
6031defm VRSRAu   : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshruImm>;
6032
6033//   VSLI     : Vector Shift Left and Insert
6034defm VSLI     : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">;
6035
6036//   VSRI     : Vector Shift Right and Insert
6037defm VSRI     : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">;
6038
6039// Vector Absolute and Saturating Absolute.
6040
6041//   VABS     : Vector Absolute Value
6042defm VABS     : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0,
6043                           IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", abs>;
6044def  VABSfd   : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
6045                     "vabs", "f32",
6046                     v2f32, v2f32, fabs>;
6047def  VABSfq   : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
6048                     "vabs", "f32",
6049                      v4f32, v4f32, fabs>;
6050def  VABShd   : N2VD<0b11, 0b11, 0b01, 0b01, 0b01110, 0,
6051                     "vabs", "f16",
6052                     v4f16, v4f16, fabs>,
6053                Requires<[HasNEON, HasFullFP16]>;
6054def  VABShq   : N2VQ<0b11, 0b11, 0b01, 0b01, 0b01110, 0,
6055                     "vabs", "f16",
6056                      v8f16, v8f16, fabs>,
6057                Requires<[HasNEON, HasFullFP16]>;
6058
6059//   VQABS    : Vector Saturating Absolute Value
6060defm VQABS    : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0,
6061                           IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s",
6062                           int_arm_neon_vqabs>;
6063
6064// Vector Negate.
6065
6066def vnegd  : PatFrag<(ops node:$in),
6067                     (sub ARMimmAllZerosD, node:$in)>;
6068def vnegq  : PatFrag<(ops node:$in),
6069                     (sub ARMimmAllZerosV, node:$in)>;
6070
6071class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
6072  : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm),
6073        IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
6074        [(set DPR:$Vd, (Ty (vnegd DPR:$Vm)))]>;
6075class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
6076  : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm),
6077        IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm", "",
6078        [(set QPR:$Vd, (Ty (vnegq QPR:$Vm)))]>;
6079
6080//   VNEG     : Vector Negate (integer)
6081def  VNEGs8d  : VNEGD<0b00, "vneg", "s8", v8i8>;
6082def  VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>;
6083def  VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>;
6084def  VNEGs8q  : VNEGQ<0b00, "vneg", "s8", v16i8>;
6085def  VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>;
6086def  VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>;
6087
6088//   VNEG     : Vector Negate (floating-point)
6089def  VNEGfd   : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
6090                    (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD,
6091                    "vneg", "f32", "$Vd, $Vm", "",
6092                    [(set DPR:$Vd, (v2f32 (fneg DPR:$Vm)))]>;
6093def  VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0,
6094                    (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ,
6095                    "vneg", "f32", "$Vd, $Vm", "",
6096                    [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>;
6097def  VNEGhd   : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 0, 0,
6098                    (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD,
6099                    "vneg", "f16", "$Vd, $Vm", "",
6100                    [(set DPR:$Vd, (v4f16 (fneg DPR:$Vm)))]>,
6101                Requires<[HasNEON, HasFullFP16]>;
6102def  VNEGhq   : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 1, 0,
6103                    (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ,
6104                    "vneg", "f16", "$Vd, $Vm", "",
6105                    [(set QPR:$Vd, (v8f16 (fneg QPR:$Vm)))]>,
6106                Requires<[HasNEON, HasFullFP16]>;
6107
6108let Predicates = [HasNEON] in {
6109def : Pat<(v8i8  (vnegd  DPR:$src)), (VNEGs8d DPR:$src)>;
6110def : Pat<(v4i16 (vnegd  DPR:$src)), (VNEGs16d DPR:$src)>;
6111def : Pat<(v2i32 (vnegd  DPR:$src)), (VNEGs32d DPR:$src)>;
6112def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>;
6113def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>;
6114def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>;
6115}
6116
6117//   VQNEG    : Vector Saturating Negate
6118defm VQNEG    : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0,
6119                           IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s",
6120                           int_arm_neon_vqneg>;
6121
6122// Vector Bit Counting Operations.
6123
6124//   VCLS     : Vector Count Leading Sign Bits
6125defm VCLS     : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0,
6126                           IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s",
6127                           int_arm_neon_vcls>;
6128//   VCLZ     : Vector Count Leading Zeros
6129defm VCLZ     : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0,
6130                           IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i",
6131                           ctlz>;
6132//   VCNT     : Vector Count One Bits
6133def  VCNTd    : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
6134                        IIC_VCNTiD, "vcnt", "8",
6135                        v8i8, v8i8, ctpop>;
6136def  VCNTq    : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
6137                        IIC_VCNTiQ, "vcnt", "8",
6138                        v16i8, v16i8, ctpop>;
6139
6140// Vector Swap
6141def  VSWPd    : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0,
6142                     (outs DPR:$Vd, DPR:$Vm), (ins DPR:$in1, DPR:$in2),
6143                     NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm",
6144                     []>;
6145def  VSWPq    : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0,
6146                     (outs QPR:$Vd, QPR:$Vm), (ins QPR:$in1, QPR:$in2),
6147                     NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm",
6148                     []>;
6149
6150// Vector Move Operations.
6151
6152//   VMOV     : Vector Move (Register)
6153def : NEONInstAlias<"vmov${p} $Vd, $Vm",
6154                    (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
6155def : NEONInstAlias<"vmov${p} $Vd, $Vm",
6156                    (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
6157
6158//   VMOV     : Vector Move (Immediate)
6159
6160// Although VMOVs are not strictly speaking cheap, they are as expensive
6161// as their copies counterpart (VORR), so we should prefer rematerialization
6162// over splitting when it applies.
6163let isReMaterializable = 1, isAsCheapAsAMove=1 in {
6164def VMOVv8i8  : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd),
6165                         (ins nImmSplatI8:$SIMM), IIC_VMOVImm,
6166                         "vmov", "i8", "$Vd, $SIMM", "",
6167                         [(set DPR:$Vd, (v8i8 (ARMvmovImm timm:$SIMM)))]>;
6168def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd),
6169                         (ins nImmSplatI8:$SIMM), IIC_VMOVImm,
6170                         "vmov", "i8", "$Vd, $SIMM", "",
6171                         [(set QPR:$Vd, (v16i8 (ARMvmovImm timm:$SIMM)))]>;
6172
6173def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd),
6174                         (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
6175                         "vmov", "i16", "$Vd, $SIMM", "",
6176                         [(set DPR:$Vd, (v4i16 (ARMvmovImm timm:$SIMM)))]> {
6177  let Inst{9} = SIMM{9};
6178}
6179
6180def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd),
6181                         (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
6182                         "vmov", "i16", "$Vd, $SIMM", "",
6183                         [(set QPR:$Vd, (v8i16 (ARMvmovImm timm:$SIMM)))]> {
6184 let Inst{9} = SIMM{9};
6185}
6186
6187def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd),
6188                         (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
6189                         "vmov", "i32", "$Vd, $SIMM", "",
6190                         [(set DPR:$Vd, (v2i32 (ARMvmovImm timm:$SIMM)))]> {
6191  let Inst{11-8} = SIMM{11-8};
6192}
6193
6194def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd),
6195                         (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
6196                         "vmov", "i32", "$Vd, $SIMM", "",
6197                         [(set QPR:$Vd, (v4i32 (ARMvmovImm timm:$SIMM)))]> {
6198  let Inst{11-8} = SIMM{11-8};
6199}
6200
6201def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd),
6202                         (ins nImmSplatI64:$SIMM), IIC_VMOVImm,
6203                         "vmov", "i64", "$Vd, $SIMM", "",
6204                         [(set DPR:$Vd, (v1i64 (ARMvmovImm timm:$SIMM)))]>;
6205def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd),
6206                         (ins nImmSplatI64:$SIMM), IIC_VMOVImm,
6207                         "vmov", "i64", "$Vd, $SIMM", "",
6208                         [(set QPR:$Vd, (v2i64 (ARMvmovImm timm:$SIMM)))]>;
6209
6210def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd),
6211                         (ins nImmVMOVF32:$SIMM), IIC_VMOVImm,
6212                         "vmov", "f32", "$Vd, $SIMM", "",
6213                         [(set DPR:$Vd, (v2f32 (ARMvmovFPImm timm:$SIMM)))]>;
6214def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd),
6215                         (ins nImmVMOVF32:$SIMM), IIC_VMOVImm,
6216                         "vmov", "f32", "$Vd, $SIMM", "",
6217                         [(set QPR:$Vd, (v4f32 (ARMvmovFPImm timm:$SIMM)))]>;
6218} // isReMaterializable, isAsCheapAsAMove
6219
6220// Add support for bytes replication feature, so it could be GAS compatible.
6221multiclass NEONImmReplicateI8InstAlias<ValueType To> {
6222  // E.g. instructions below:
6223  // "vmov.i32 d0, #0xffffffff"
6224  // "vmov.i32 d0, #0xabababab"
6225  // "vmov.i16 d0, #0xabab"
6226  // are incorrect, but we could deal with such cases.
6227  // For last two instructions, for example, it should emit:
6228  // "vmov.i8 d0, #0xab"
6229  def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm",
6230                      (VMOVv8i8 DPR:$Vd, nImmVMOVIReplicate<i8, To>:$Vm, pred:$p)>;
6231  def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm",
6232                      (VMOVv16i8 QPR:$Vd, nImmVMOVIReplicate<i8, To>:$Vm, pred:$p)>;
6233  // Also add same support for VMVN instructions. So instruction:
6234  // "vmvn.i32 d0, #0xabababab"
6235  // actually means:
6236  // "vmov.i8 d0, #0x54"
6237  def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm",
6238                      (VMOVv8i8 DPR:$Vd, nImmVINVIReplicate<i8, To>:$Vm, pred:$p)>;
6239  def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm",
6240                      (VMOVv16i8 QPR:$Vd, nImmVINVIReplicate<i8, To>:$Vm, pred:$p)>;
6241}
6242
6243defm : NEONImmReplicateI8InstAlias<i16>;
6244defm : NEONImmReplicateI8InstAlias<i32>;
6245defm : NEONImmReplicateI8InstAlias<i64>;
6246
6247// Similar to above for types other than i8, e.g.:
6248// "vmov.i32 d0, #0xab00ab00" -> "vmov.i16 d0, #0xab00"
6249// "vmvn.i64 q0, #0xab000000ab000000" -> "vmvn.i32 q0, #0xab000000"
6250// In this case we do not canonicalize VMVN to VMOV
6251multiclass NEONImmReplicateInstAlias<ValueType From, NeonI V8, NeonI V16,
6252                                     NeonI NV8, NeonI NV16, ValueType To> {
6253  def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm",
6254                      (V8 DPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>;
6255  def : NEONInstAlias<"vmov${p}.i" # To.Size # " $Vd, $Vm",
6256                      (V16 QPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>;
6257  def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm",
6258                      (NV8 DPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>;
6259  def : NEONInstAlias<"vmvn${p}.i" # To.Size # " $Vd, $Vm",
6260                      (NV16 QPR:$Vd, nImmVMOVIReplicate<From, To>:$Vm, pred:$p)>;
6261}
6262
6263defm : NEONImmReplicateInstAlias<i16, VMOVv4i16, VMOVv8i16,
6264                                      VMVNv4i16, VMVNv8i16, i32>;
6265defm : NEONImmReplicateInstAlias<i16, VMOVv4i16, VMOVv8i16,
6266                                      VMVNv4i16, VMVNv8i16, i64>;
6267defm : NEONImmReplicateInstAlias<i32, VMOVv2i32, VMOVv4i32,
6268                                      VMVNv2i32, VMVNv4i32, i64>;
6269// TODO: add "VMOV <-> VMVN" conversion for cases like
6270// "vmov.i32 d0, #0xffaaffaa" -> "vmvn.i16 d0, #0x55"
6271// "vmvn.i32 d0, #0xaaffaaff" -> "vmov.i16 d0, #0xff00"
6272
6273// On some CPUs the two instructions "vmov.i32 dD, #0" and "vmov.i32 qD, #0"
6274// require zero cycles to execute so they should be used wherever possible for
6275// setting a register to zero.
6276
6277// Even without these pseudo-insts we would probably end up with the correct
6278// instruction, but we could not mark the general ones with "isAsCheapAsAMove"
6279// since they are sometimes rather expensive (in general).
6280
6281let AddedComplexity = 50, isAsCheapAsAMove = 1, isReMaterializable = 1 in {
6282  def VMOVD0 : ARMPseudoExpand<(outs DPR:$Vd), (ins), 4, IIC_VMOVImm,
6283                               [(set DPR:$Vd, (v2i32 ARMimmAllZerosD))],
6284                               (VMOVv2i32 DPR:$Vd, 0, (ops 14, zero_reg))>,
6285               Requires<[HasZCZ]>;
6286  def VMOVQ0 : ARMPseudoExpand<(outs QPR:$Vd), (ins), 4, IIC_VMOVImm,
6287                               [(set QPR:$Vd, (v4i32 ARMimmAllZerosV))],
6288                               (VMOVv4i32 QPR:$Vd, 0, (ops 14, zero_reg))>,
6289               Requires<[HasZCZ]>;
6290}
6291
6292//   VMOV     : Vector Get Lane (move scalar to ARM core register)
6293
6294def VGETLNs8  : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?},
6295                          (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane),
6296                          IIC_VMOVSI, "vmov", "s8", "$R, $V$lane",
6297                          [(set GPR:$R, (ARMvgetlanes (v8i8 DPR:$V),
6298                                           imm:$lane))]> {
6299  let Inst{21}  = lane{2};
6300  let Inst{6-5} = lane{1-0};
6301}
6302def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1},
6303                          (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane),
6304                          IIC_VMOVSI, "vmov", "s16", "$R, $V$lane",
6305                          [(set GPR:$R, (ARMvgetlanes (v4i16 DPR:$V),
6306                                           imm:$lane))]> {
6307  let Inst{21} = lane{1};
6308  let Inst{6}  = lane{0};
6309}
6310def VGETLNu8  : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?},
6311                          (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane),
6312                          IIC_VMOVSI, "vmov", "u8", "$R, $V$lane",
6313                          [(set GPR:$R, (ARMvgetlaneu (v8i8 DPR:$V),
6314                                           imm:$lane))]> {
6315  let Inst{21}  = lane{2};
6316  let Inst{6-5} = lane{1-0};
6317}
6318def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1},
6319                          (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane),
6320                          IIC_VMOVSI, "vmov", "u16", "$R, $V$lane",
6321                          [(set GPR:$R, (ARMvgetlaneu (v4i16 DPR:$V),
6322                                           imm:$lane))]> {
6323  let Inst{21} = lane{1};
6324  let Inst{6}  = lane{0};
6325}
6326def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00,
6327                          (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane),
6328                          IIC_VMOVSI, "vmov", "32", "$R, $V$lane",
6329                          [(set GPR:$R, (extractelt (v2i32 DPR:$V),
6330                                           imm:$lane))]>,
6331                Requires<[HasFPRegs, HasFastVGETLNi32]> {
6332  let Inst{21} = lane{0};
6333}
6334let Predicates = [HasNEON] in {
6335// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td
6336def : Pat<(ARMvgetlanes (v16i8 QPR:$src), imm:$lane),
6337          (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src,
6338                           (DSubReg_i8_reg imm:$lane))),
6339                     (SubReg_i8_lane imm:$lane))>;
6340def : Pat<(ARMvgetlanes (v8i16 QPR:$src), imm:$lane),
6341          (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src,
6342                             (DSubReg_i16_reg imm:$lane))),
6343                     (SubReg_i16_lane imm:$lane))>;
6344def : Pat<(ARMvgetlaneu (v16i8 QPR:$src), imm:$lane),
6345          (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src,
6346                           (DSubReg_i8_reg imm:$lane))),
6347                     (SubReg_i8_lane imm:$lane))>;
6348def : Pat<(ARMvgetlaneu (v8i16 QPR:$src), imm:$lane),
6349          (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src,
6350                             (DSubReg_i16_reg imm:$lane))),
6351                     (SubReg_i16_lane imm:$lane))>;
6352}
6353def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
6354          (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src,
6355                             (DSubReg_i32_reg imm:$lane))),
6356                     (SubReg_i32_lane imm:$lane))>,
6357      Requires<[HasNEON, HasFastVGETLNi32]>;
6358def : Pat<(extractelt (v2i32 DPR:$src), imm:$lane),
6359          (COPY_TO_REGCLASS
6360            (i32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>,
6361      Requires<[HasNEON, HasSlowVGETLNi32]>;
6362def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
6363          (COPY_TO_REGCLASS
6364            (i32 (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane))), GPR)>,
6365      Requires<[HasNEON, HasSlowVGETLNi32]>;
6366let Predicates = [HasNEON] in {
6367def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2),
6368          (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)),
6369                          (SSubReg_f32_reg imm:$src2))>;
6370def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2),
6371          (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)),
6372                          (SSubReg_f32_reg imm:$src2))>;
6373//def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2),
6374//          (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
6375def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2),
6376          (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
6377}
6378
6379multiclass ExtractEltEvenF16<ValueType VT4, ValueType VT8> {
6380  def : Pat<(extractelt (VT4 DPR:$src), imm_even:$lane),
6381              (EXTRACT_SUBREG
6382                  (v2f32 (COPY_TO_REGCLASS (VT4 DPR:$src), DPR_VFP2)),
6383                  (SSubReg_f16_reg imm_even:$lane))>;
6384  def : Pat<(extractelt (VT8 QPR:$src), imm_even:$lane),
6385              (EXTRACT_SUBREG
6386                  (v4f32 (COPY_TO_REGCLASS (VT8 QPR:$src), QPR_VFP2)),
6387                  (SSubReg_f16_reg imm_even:$lane))>;
6388}
6389
6390multiclass ExtractEltOddF16VMOVH<ValueType VT4, ValueType VT8> {
6391  def : Pat<(extractelt (VT4 DPR:$src), imm_odd:$lane),
6392            (COPY_TO_REGCLASS
6393              (VMOVH (EXTRACT_SUBREG
6394                        (v2f32 (COPY_TO_REGCLASS (VT4 DPR:$src), DPR_VFP2)),
6395                        (SSubReg_f16_reg imm_odd:$lane))),
6396              HPR)>;
6397  def : Pat<(extractelt (VT8 QPR:$src), imm_odd:$lane),
6398            (COPY_TO_REGCLASS
6399              (VMOVH (EXTRACT_SUBREG
6400                        (v4f32 (COPY_TO_REGCLASS (VT8 QPR:$src), QPR_VFP2)),
6401                        (SSubReg_f16_reg imm_odd:$lane))),
6402              HPR)>;
6403}
6404
6405let Predicates = [HasNEON] in {
6406  defm : ExtractEltEvenF16<v4f16, v8f16>;
6407  defm : ExtractEltOddF16VMOVH<v4f16, v8f16>;
6408}
6409
6410let AddedComplexity = 1, Predicates = [HasNEON, HasBF16, HasFullFP16] in {
6411  // If VMOVH (vmovx.f16) is available use it to extract BF16 from the odd lanes
6412  defm : ExtractEltOddF16VMOVH<v4bf16, v8bf16>;
6413}
6414
6415let Predicates = [HasBF16, HasNEON] in {
6416  defm : ExtractEltEvenF16<v4bf16, v8bf16>;
6417
6418  // Otherwise, if VMOVH is not available resort to extracting the odd lane
6419  // into a GPR and then moving to HPR
6420  def : Pat<(extractelt (v4bf16 DPR:$src), imm_odd:$lane),
6421            (COPY_TO_REGCLASS
6422              (VGETLNu16 (v4bf16 DPR:$src), imm:$lane),
6423              HPR)>;
6424
6425  def : Pat<(extractelt (v8bf16 QPR:$src), imm_odd:$lane),
6426            (COPY_TO_REGCLASS
6427              (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src,
6428                                                (DSubReg_i16_reg imm:$lane))),
6429                         (SubReg_i16_lane imm:$lane)),
6430              HPR)>;
6431}
6432
6433//   VMOV     : Vector Set Lane (move ARM core register to scalar)
6434
6435let Constraints = "$src1 = $V" in {
6436def VSETLNi8  : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V),
6437                          (ins DPR:$src1, GPR:$R, VectorIndex8:$lane),
6438                          IIC_VMOVISL, "vmov", "8", "$V$lane, $R",
6439                          [(set DPR:$V, (vector_insert (v8i8 DPR:$src1),
6440                                           GPR:$R, imm:$lane))]> {
6441  let Inst{21}  = lane{2};
6442  let Inst{6-5} = lane{1-0};
6443}
6444def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V),
6445                          (ins DPR:$src1, GPR:$R, VectorIndex16:$lane),
6446                          IIC_VMOVISL, "vmov", "16", "$V$lane, $R",
6447                          [(set DPR:$V, (vector_insert (v4i16 DPR:$src1),
6448                                           GPR:$R, imm:$lane))]> {
6449  let Inst{21} = lane{1};
6450  let Inst{6}  = lane{0};
6451}
6452def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V),
6453                          (ins DPR:$src1, GPR:$R, VectorIndex32:$lane),
6454                          IIC_VMOVISL, "vmov", "32", "$V$lane, $R",
6455                          [(set DPR:$V, (insertelt (v2i32 DPR:$src1),
6456                                           GPR:$R, imm:$lane))]>,
6457                Requires<[HasVFP2]> {
6458  let Inst{21} = lane{0};
6459  // This instruction is equivalent as
6460  // $V = INSERT_SUBREG $src1, $R, translateImmToSubIdx($imm)
6461  let isInsertSubreg = 1;
6462}
6463}
6464
6465// TODO: for odd lanes we could optimize this a bit by using the VINS
6466// FullFP16 instruction when it is available
6467multiclass InsertEltF16<ValueType VTScalar, ValueType VT4, ValueType VT8> {
6468  def : Pat<(insertelt (VT4 DPR:$src1), (VTScalar HPR:$src2), imm:$lane),
6469            (VT4 (VSETLNi16 DPR:$src1,
6470                 (COPY_TO_REGCLASS HPR:$src2, GPR), imm:$lane))>;
6471  def : Pat<(insertelt (VT8 QPR:$src1), (VTScalar HPR:$src2), imm:$lane),
6472            (VT8 (INSERT_SUBREG QPR:$src1,
6473                    (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1,
6474                                        (DSubReg_i16_reg imm:$lane))),
6475                              (COPY_TO_REGCLASS HPR:$src2, GPR),
6476                              (SubReg_i16_lane imm:$lane))),
6477                    (DSubReg_i16_reg imm:$lane)))>;
6478}
6479
6480let Predicates = [HasNEON] in {
6481def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane),
6482          (v16i8 (INSERT_SUBREG QPR:$src1,
6483                  (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1,
6484                                   (DSubReg_i8_reg imm:$lane))),
6485                            GPR:$src2, (SubReg_i8_lane imm:$lane))),
6486                  (DSubReg_i8_reg imm:$lane)))>;
6487def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane),
6488          (v8i16 (INSERT_SUBREG QPR:$src1,
6489                  (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1,
6490                                     (DSubReg_i16_reg imm:$lane))),
6491                             GPR:$src2, (SubReg_i16_lane imm:$lane))),
6492                  (DSubReg_i16_reg imm:$lane)))>;
6493def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane),
6494          (v4i32 (INSERT_SUBREG QPR:$src1,
6495                  (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1,
6496                                     (DSubReg_i32_reg imm:$lane))),
6497                             GPR:$src2, (SubReg_i32_lane imm:$lane))),
6498                  (DSubReg_i32_reg imm:$lane)))>;
6499
6500def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)),
6501          (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)),
6502                                SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
6503def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)),
6504          (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)),
6505                                SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
6506
6507defm : InsertEltF16<f16, v4f16, v8f16>;
6508
6509def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
6510          (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
6511
6512def : Pat<(v2f32 (scalar_to_vector SPR:$src)),
6513          (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
6514def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))),
6515          (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
6516def : Pat<(v4f32 (scalar_to_vector SPR:$src)),
6517          (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
6518
6519def : Pat<(v4f16 (scalar_to_vector (f16 HPR:$src))),
6520          (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), HPR:$src, ssub_0)>;
6521def : Pat<(v8f16 (scalar_to_vector (f16 HPR:$src))),
6522          (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), HPR:$src, ssub_0)>;
6523
6524def : Pat<(v8i8 (scalar_to_vector GPR:$src)),
6525          (VSETLNi8  (v8i8  (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
6526def : Pat<(v4i16 (scalar_to_vector GPR:$src)),
6527          (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
6528def : Pat<(v2i32 (scalar_to_vector GPR:$src)),
6529          (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
6530
6531def : Pat<(v16i8 (scalar_to_vector GPR:$src)),
6532          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6533                         (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
6534                         dsub_0)>;
6535def : Pat<(v8i16 (scalar_to_vector GPR:$src)),
6536          (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
6537                         (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
6538                         dsub_0)>;
6539def : Pat<(v4i32 (scalar_to_vector GPR:$src)),
6540          (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
6541                         (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
6542                         dsub_0)>;
6543}
6544
6545let Predicates = [HasNEON, HasBF16] in
6546defm : InsertEltF16<bf16, v4bf16, v8bf16>;
6547
6548//   VDUP     : Vector Duplicate (from ARM core register to all elements)
6549
6550class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
6551  : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$V), (ins GPR:$R),
6552          IIC_VMOVIS, "vdup", Dt, "$V, $R",
6553          [(set DPR:$V, (Ty (ARMvdup (i32 GPR:$R))))]>;
6554class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
6555  : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$V), (ins GPR:$R),
6556          IIC_VMOVIS, "vdup", Dt, "$V, $R",
6557          [(set QPR:$V, (Ty (ARMvdup (i32 GPR:$R))))]>;
6558
6559def  VDUP8d   : VDUPD<0b11101100, 0b00, "8", v8i8>;
6560def  VDUP16d  : VDUPD<0b11101000, 0b01, "16", v4i16>;
6561def  VDUP32d  : VDUPD<0b11101000, 0b00, "32", v2i32>,
6562                Requires<[HasNEON, HasFastVDUP32]>;
6563def  VDUP8q   : VDUPQ<0b11101110, 0b00, "8", v16i8>;
6564def  VDUP16q  : VDUPQ<0b11101010, 0b01, "16", v8i16>;
6565def  VDUP32q  : VDUPQ<0b11101010, 0b00, "32", v4i32>;
6566
6567// ARMvdup patterns for uarchs with fast VDUP.32.
6568def : Pat<(v2f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>,
6569      Requires<[HasNEON,HasFastVDUP32]>;
6570def : Pat<(v4f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>,
6571      Requires<[HasNEON]>;
6572
6573// ARMvdup patterns for uarchs with slow VDUP.32 - use VMOVDRR instead.
6574def : Pat<(v2i32 (ARMvdup (i32 GPR:$R))), (VMOVDRR GPR:$R, GPR:$R)>,
6575      Requires<[HasNEON,HasSlowVDUP32]>;
6576def : Pat<(v2f32 (ARMvdup (f32 (bitconvert GPR:$R)))), (VMOVDRR GPR:$R, GPR:$R)>,
6577      Requires<[HasNEON,HasSlowVDUP32]>;
6578
6579//   VDUP     : Vector Duplicate Lane (from scalar to all elements)
6580
6581class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt,
6582              ValueType Ty, Operand IdxTy>
6583  : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, IdxTy:$lane),
6584              IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm$lane",
6585              [(set DPR:$Vd, (Ty (ARMvduplane (Ty DPR:$Vm), imm:$lane)))]>;
6586
6587class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt,
6588              ValueType ResTy, ValueType OpTy, Operand IdxTy>
6589  : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, IdxTy:$lane),
6590              IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm$lane",
6591              [(set QPR:$Vd, (ResTy (ARMvduplane (OpTy DPR:$Vm),
6592                                      VectorIndex32:$lane)))]>;
6593
6594// Inst{19-16} is partially specified depending on the element size.
6595
6596def VDUPLN8d  : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8, VectorIndex8> {
6597  bits<3> lane;
6598  let Inst{19-17} = lane{2-0};
6599}
6600def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16, VectorIndex16> {
6601  bits<2> lane;
6602  let Inst{19-18} = lane{1-0};
6603}
6604def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32, VectorIndex32> {
6605  bits<1> lane;
6606  let Inst{19} = lane{0};
6607}
6608def VDUPLN8q  : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8, VectorIndex8> {
6609  bits<3> lane;
6610  let Inst{19-17} = lane{2-0};
6611}
6612def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16, VectorIndex16> {
6613  bits<2> lane;
6614  let Inst{19-18} = lane{1-0};
6615}
6616def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32, VectorIndex32> {
6617  bits<1> lane;
6618  let Inst{19} = lane{0};
6619}
6620
6621let Predicates = [HasNEON] in {
6622def : Pat<(v4f16 (ARMvduplane (v4f16 DPR:$Vm), imm:$lane)),
6623          (VDUPLN32d DPR:$Vm, imm:$lane)>;
6624
6625def : Pat<(v2f32 (ARMvduplane (v2f32 DPR:$Vm), imm:$lane)),
6626          (VDUPLN32d DPR:$Vm, imm:$lane)>;
6627
6628def : Pat<(v4f32 (ARMvduplane (v2f32 DPR:$Vm), imm:$lane)),
6629          (VDUPLN32q DPR:$Vm, imm:$lane)>;
6630
6631def : Pat<(v16i8 (ARMvduplane (v16i8 QPR:$src), imm:$lane)),
6632          (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src,
6633                                  (DSubReg_i8_reg imm:$lane))),
6634                           (SubReg_i8_lane imm:$lane)))>;
6635def : Pat<(v8i16 (ARMvduplane (v8i16 QPR:$src), imm:$lane)),
6636          (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src,
6637                                    (DSubReg_i16_reg imm:$lane))),
6638                            (SubReg_i16_lane imm:$lane)))>;
6639def : Pat<(v8f16 (ARMvduplane (v8f16 QPR:$src), imm:$lane)),
6640          (v8f16 (VDUPLN16q (v4f16 (EXTRACT_SUBREG QPR:$src,
6641                                    (DSubReg_i16_reg imm:$lane))),
6642                            (SubReg_i16_lane imm:$lane)))>;
6643def : Pat<(v4i32 (ARMvduplane (v4i32 QPR:$src), imm:$lane)),
6644          (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src,
6645                                    (DSubReg_i32_reg imm:$lane))),
6646                            (SubReg_i32_lane imm:$lane)))>;
6647def : Pat<(v4f32 (ARMvduplane (v4f32 QPR:$src), imm:$lane)),
6648          (v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src,
6649                                   (DSubReg_i32_reg imm:$lane))),
6650                           (SubReg_i32_lane imm:$lane)))>;
6651
6652def : Pat<(v4f16 (ARMvdup (f16 HPR:$src))),
6653          (v4f16 (VDUPLN16d (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)),
6654                             (f16 HPR:$src), ssub_0), (i32 0)))>;
6655def : Pat<(v2f32 (ARMvdup (f32 SPR:$src))),
6656          (v2f32 (VDUPLN32d (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
6657                             SPR:$src, ssub_0), (i32 0)))>;
6658def : Pat<(v4f32 (ARMvdup (f32 SPR:$src))),
6659          (v4f32 (VDUPLN32q (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
6660                             SPR:$src, ssub_0), (i32 0)))>;
6661def : Pat<(v8f16 (ARMvdup (f16 HPR:$src))),
6662          (v8f16 (VDUPLN16q (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)),
6663                             (f16 HPR:$src), ssub_0), (i32 0)))>;
6664}
6665
6666let Predicates = [HasNEON, HasBF16] in {
6667def : Pat<(v4bf16 (ARMvduplane (v4bf16 DPR:$Vm), imm:$lane)),
6668          (VDUPLN16d DPR:$Vm, imm:$lane)>;
6669
6670def : Pat<(v8bf16 (ARMvduplane (v8bf16 QPR:$src), imm:$lane)),
6671          (v8bf16 (VDUPLN16q (v4bf16 (EXTRACT_SUBREG QPR:$src,
6672                                    (DSubReg_i16_reg imm:$lane))),
6673                            (SubReg_i16_lane imm:$lane)))>;
6674
6675def : Pat<(v4bf16 (ARMvdup (bf16 HPR:$src))),
6676          (v4bf16 (VDUPLN16d (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)),
6677                             (bf16 HPR:$src), ssub_0), (i32 0)))>;
6678def : Pat<(v8bf16 (ARMvdup (bf16 HPR:$src))),
6679          (v8bf16 (VDUPLN16q (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)),
6680                             (bf16 HPR:$src), ssub_0), (i32 0)))>;
6681}
6682
6683//   VMOVN    : Vector Narrowing Move
6684defm VMOVN    : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN,
6685                         "vmovn", "i", trunc>;
6686//   VQMOVN   : Vector Saturating Narrowing Move
6687defm VQMOVNs  : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD,
6688                            "vqmovn", "s", int_arm_neon_vqmovns>;
6689defm VQMOVNu  : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD,
6690                            "vqmovn", "u", int_arm_neon_vqmovnu>;
6691defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD,
6692                            "vqmovun", "s", int_arm_neon_vqmovnsu>;
6693//   VMOVL    : Vector Lengthening Move
6694defm VMOVLs   : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>;
6695defm VMOVLu   : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>;
6696
6697let Predicates = [HasNEON] in {
6698def : Pat<(v8i16 (anyext (v8i8 DPR:$Vm))), (VMOVLuv8i16 DPR:$Vm)>;
6699def : Pat<(v4i32 (anyext (v4i16 DPR:$Vm))), (VMOVLuv4i32 DPR:$Vm)>;
6700def : Pat<(v2i64 (anyext (v2i32 DPR:$Vm))), (VMOVLuv2i64 DPR:$Vm)>;
6701}
6702
6703// Vector Conversions.
6704
6705//   VCVT     : Vector Convert Between Floating-Point and Integers
6706def  VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
6707                     v2i32, v2f32, fp_to_sint>;
6708def  VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
6709                     v2i32, v2f32, fp_to_uint>;
6710def  VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
6711                     v2f32, v2i32, sint_to_fp>;
6712def  VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
6713                     v2f32, v2i32, uint_to_fp>;
6714
6715def  VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
6716                     v4i32, v4f32, fp_to_sint>;
6717def  VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
6718                     v4i32, v4f32, fp_to_uint>;
6719def  VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
6720                     v4f32, v4i32, sint_to_fp>;
6721def  VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
6722                     v4f32, v4i32, uint_to_fp>;
6723
6724def  VCVTh2sd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16",
6725                     v4i16, v4f16, fp_to_sint>,
6726                Requires<[HasNEON, HasFullFP16]>;
6727def  VCVTh2ud : N2VD<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16",
6728                     v4i16, v4f16, fp_to_uint>,
6729                Requires<[HasNEON, HasFullFP16]>;
6730def  VCVTs2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16",
6731                     v4f16, v4i16, sint_to_fp>,
6732                Requires<[HasNEON, HasFullFP16]>;
6733def  VCVTu2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16",
6734                     v4f16, v4i16, uint_to_fp>,
6735                Requires<[HasNEON, HasFullFP16]>;
6736
6737def  VCVTh2sq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16",
6738                     v8i16, v8f16, fp_to_sint>,
6739                Requires<[HasNEON, HasFullFP16]>;
6740def  VCVTh2uq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16",
6741                     v8i16, v8f16, fp_to_uint>,
6742                Requires<[HasNEON, HasFullFP16]>;
6743def  VCVTs2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16",
6744                     v8f16, v8i16, sint_to_fp>,
6745                Requires<[HasNEON, HasFullFP16]>;
6746def  VCVTu2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16",
6747                     v8f16, v8i16, uint_to_fp>,
6748                Requires<[HasNEON, HasFullFP16]>;
6749
6750// VCVT{A, N, P, M}
6751multiclass VCVT_FPI<string op, bits<3> op10_8, SDPatternOperator IntS,
6752                    SDPatternOperator IntU> {
6753  let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
6754    def SDf : N2VDIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
6755                       "s32.f32", v2i32, v2f32, IntS>, Requires<[HasV8, HasNEON]>;
6756    def SQf : N2VQIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
6757                       "s32.f32", v4i32, v4f32, IntS>, Requires<[HasV8, HasNEON]>;
6758    def UDf : N2VDIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
6759                       "u32.f32", v2i32, v2f32, IntU>, Requires<[HasV8, HasNEON]>;
6760    def UQf : N2VQIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
6761                       "u32.f32", v4i32, v4f32, IntU>, Requires<[HasV8, HasNEON]>;
6762    def SDh : N2VDIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
6763                       "s16.f16", v4i16, v4f16, IntS>,
6764              Requires<[HasV8, HasNEON, HasFullFP16]>;
6765    def SQh : N2VQIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
6766                       "s16.f16", v8i16, v8f16, IntS>,
6767              Requires<[HasV8, HasNEON, HasFullFP16]>;
6768    def UDh : N2VDIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
6769                       "u16.f16", v4i16, v4f16, IntU>,
6770              Requires<[HasV8, HasNEON, HasFullFP16]>;
6771    def UQh : N2VQIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
6772                       "u16.f16", v8i16, v8f16, IntU>,
6773              Requires<[HasV8, HasNEON, HasFullFP16]>;
6774  }
6775}
6776
6777defm VCVTAN : VCVT_FPI<"a", 0b000, int_arm_neon_vcvtas, int_arm_neon_vcvtau>;
6778defm VCVTNN : VCVT_FPI<"n", 0b001, int_arm_neon_vcvtns, int_arm_neon_vcvtnu>;
6779defm VCVTPN : VCVT_FPI<"p", 0b010, int_arm_neon_vcvtps, int_arm_neon_vcvtpu>;
6780defm VCVTMN : VCVT_FPI<"m", 0b011, int_arm_neon_vcvtms, int_arm_neon_vcvtmu>;
6781
6782//   VCVT     : Vector Convert Between Floating-Point and Fixed-Point.
6783let DecoderMethod = "DecodeVCVTD" in {
6784def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
6785                        v2i32, v2f32, int_arm_neon_vcvtfp2fxs>;
6786def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
6787                        v2i32, v2f32, int_arm_neon_vcvtfp2fxu>;
6788def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
6789                        v2f32, v2i32, int_arm_neon_vcvtfxs2fp>;
6790def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
6791                        v2f32, v2i32, int_arm_neon_vcvtfxu2fp>;
6792let Predicates = [HasNEON, HasFullFP16] in {
6793def VCVTh2xsd : N2VCvtD<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16",
6794                        v4i16, v4f16, int_arm_neon_vcvtfp2fxs>;
6795def VCVTh2xud : N2VCvtD<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16",
6796                        v4i16, v4f16, int_arm_neon_vcvtfp2fxu>;
6797def VCVTxs2hd : N2VCvtD<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16",
6798                        v4f16, v4i16, int_arm_neon_vcvtfxs2fp>;
6799def VCVTxu2hd : N2VCvtD<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16",
6800                        v4f16, v4i16, int_arm_neon_vcvtfxu2fp>;
6801} // Predicates = [HasNEON, HasFullFP16]
6802}
6803
6804let DecoderMethod = "DecodeVCVTQ" in {
6805def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
6806                        v4i32, v4f32, int_arm_neon_vcvtfp2fxs>;
6807def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
6808                        v4i32, v4f32, int_arm_neon_vcvtfp2fxu>;
6809def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
6810                        v4f32, v4i32, int_arm_neon_vcvtfxs2fp>;
6811def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
6812                        v4f32, v4i32, int_arm_neon_vcvtfxu2fp>;
6813let Predicates = [HasNEON, HasFullFP16] in {
6814def VCVTh2xsq : N2VCvtQ<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16",
6815                        v8i16, v8f16, int_arm_neon_vcvtfp2fxs>;
6816def VCVTh2xuq : N2VCvtQ<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16",
6817                        v8i16, v8f16, int_arm_neon_vcvtfp2fxu>;
6818def VCVTxs2hq : N2VCvtQ<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16",
6819                        v8f16, v8i16, int_arm_neon_vcvtfxs2fp>;
6820def VCVTxu2hq : N2VCvtQ<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16",
6821                        v8f16, v8i16, int_arm_neon_vcvtfxu2fp>;
6822} // Predicates = [HasNEON, HasFullFP16]
6823}
6824
6825def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0",
6826                    (VCVTf2sd DPR:$Dd, DPR:$Dm, pred:$p)>;
6827def : NEONInstAlias<"vcvt${p}.u32.f32 $Dd, $Dm, #0",
6828                    (VCVTf2ud DPR:$Dd, DPR:$Dm, pred:$p)>;
6829def : NEONInstAlias<"vcvt${p}.f32.s32 $Dd, $Dm, #0",
6830                    (VCVTs2fd DPR:$Dd, DPR:$Dm, pred:$p)>;
6831def : NEONInstAlias<"vcvt${p}.f32.u32 $Dd, $Dm, #0",
6832                    (VCVTu2fd DPR:$Dd, DPR:$Dm, pred:$p)>;
6833
6834def : NEONInstAlias<"vcvt${p}.s32.f32 $Qd, $Qm, #0",
6835                    (VCVTf2sq QPR:$Qd, QPR:$Qm, pred:$p)>;
6836def : NEONInstAlias<"vcvt${p}.u32.f32 $Qd, $Qm, #0",
6837                    (VCVTf2uq QPR:$Qd, QPR:$Qm, pred:$p)>;
6838def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0",
6839                    (VCVTs2fq QPR:$Qd, QPR:$Qm, pred:$p)>;
6840def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0",
6841                    (VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>;
6842
6843def : NEONInstAlias<"vcvt${p}.s16.f16 $Dd, $Dm, #0",
6844                    (VCVTh2sd DPR:$Dd, DPR:$Dm, pred:$p)>;
6845def : NEONInstAlias<"vcvt${p}.u16.f16 $Dd, $Dm, #0",
6846                    (VCVTh2ud DPR:$Dd, DPR:$Dm, pred:$p)>;
6847def : NEONInstAlias<"vcvt${p}.f16.s16 $Dd, $Dm, #0",
6848                    (VCVTs2hd DPR:$Dd, DPR:$Dm, pred:$p)>;
6849def : NEONInstAlias<"vcvt${p}.f16.u16 $Dd, $Dm, #0",
6850                    (VCVTu2hd DPR:$Dd, DPR:$Dm, pred:$p)>;
6851
6852def : NEONInstAlias<"vcvt${p}.s16.f16 $Qd, $Qm, #0",
6853                    (VCVTh2sq QPR:$Qd, QPR:$Qm, pred:$p)>;
6854def : NEONInstAlias<"vcvt${p}.u16.f16 $Qd, $Qm, #0",
6855                    (VCVTh2uq QPR:$Qd, QPR:$Qm, pred:$p)>;
6856def : NEONInstAlias<"vcvt${p}.f16.s16 $Qd, $Qm, #0",
6857                    (VCVTs2hq QPR:$Qd, QPR:$Qm, pred:$p)>;
6858def : NEONInstAlias<"vcvt${p}.f16.u16 $Qd, $Qm, #0",
6859                    (VCVTu2hq QPR:$Qd, QPR:$Qm, pred:$p)>;
6860
6861
6862//   VCVT     : Vector Convert Between Half-Precision and Single-Precision.
6863def  VCVTf2h  : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0,
6864                        IIC_VUNAQ, "vcvt", "f16.f32",
6865                        v4i16, v4f32, int_arm_neon_vcvtfp2hf>,
6866                Requires<[HasNEON, HasFP16]>;
6867def  VCVTh2f  : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0,
6868                        IIC_VUNAQ, "vcvt", "f32.f16",
6869                        v4f32, v4i16, int_arm_neon_vcvthf2fp>,
6870                Requires<[HasNEON, HasFP16]>;
6871
6872// Vector Reverse.
6873
6874//   VREV64   : Vector Reverse elements within 64-bit doublewords
6875
6876class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
6877  : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd),
6878        (ins DPR:$Vm), IIC_VMOVD,
6879        OpcodeStr, Dt, "$Vd, $Vm", "",
6880        [(set DPR:$Vd, (Ty (ARMvrev64 (Ty DPR:$Vm))))]>;
6881class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
6882  : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd),
6883        (ins QPR:$Vm), IIC_VMOVQ,
6884        OpcodeStr, Dt, "$Vd, $Vm", "",
6885        [(set QPR:$Vd, (Ty (ARMvrev64 (Ty QPR:$Vm))))]>;
6886
6887def VREV64d8  : VREV64D<0b00, "vrev64", "8", v8i8>;
6888def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>;
6889def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>;
6890let Predicates = [HasNEON] in {
6891def : Pat<(v2f32 (ARMvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>;
6892}
6893
6894def VREV64q8  : VREV64Q<0b00, "vrev64", "8", v16i8>;
6895def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>;
6896def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>;
6897
6898let Predicates = [HasNEON] in {
6899  def : Pat<(v4f32 (ARMvrev64 (v4f32 QPR:$Vm))),
6900            (VREV64q32 QPR:$Vm)>;
6901  def : Pat<(v8f16 (ARMvrev64 (v8f16 QPR:$Vm))),
6902            (VREV64q16 QPR:$Vm)>;
6903  def : Pat<(v4f16 (ARMvrev64 (v4f16 DPR:$Vm))),
6904            (VREV64d16 DPR:$Vm)>;
6905}
6906
6907//   VREV32   : Vector Reverse elements within 32-bit words
6908
6909class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
6910  : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd),
6911        (ins DPR:$Vm), IIC_VMOVD,
6912        OpcodeStr, Dt, "$Vd, $Vm", "",
6913        [(set DPR:$Vd, (Ty (ARMvrev32 (Ty DPR:$Vm))))]>;
6914class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
6915  : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd),
6916        (ins QPR:$Vm), IIC_VMOVQ,
6917        OpcodeStr, Dt, "$Vd, $Vm", "",
6918        [(set QPR:$Vd, (Ty (ARMvrev32 (Ty QPR:$Vm))))]>;
6919
6920def VREV32d8  : VREV32D<0b00, "vrev32", "8", v8i8>;
6921def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>;
6922
6923def VREV32q8  : VREV32Q<0b00, "vrev32", "8", v16i8>;
6924def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>;
6925
6926let Predicates = [HasNEON] in {
6927  def : Pat<(v8f16 (ARMvrev32 (v8f16 QPR:$Vm))),
6928            (VREV32q16 QPR:$Vm)>;
6929  def : Pat<(v4f16 (ARMvrev32 (v4f16 DPR:$Vm))),
6930            (VREV32d16 DPR:$Vm)>;
6931}
6932
6933//   VREV16   : Vector Reverse elements within 16-bit halfwords
6934
6935class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
6936  : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd),
6937        (ins DPR:$Vm), IIC_VMOVD,
6938        OpcodeStr, Dt, "$Vd, $Vm", "",
6939        [(set DPR:$Vd, (Ty (ARMvrev16 (Ty DPR:$Vm))))]>;
6940class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
6941  : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd),
6942        (ins QPR:$Vm), IIC_VMOVQ,
6943        OpcodeStr, Dt, "$Vd, $Vm", "",
6944        [(set QPR:$Vd, (Ty (ARMvrev16 (Ty QPR:$Vm))))]>;
6945
6946def VREV16d8  : VREV16D<0b00, "vrev16", "8", v8i8>;
6947def VREV16q8  : VREV16Q<0b00, "vrev16", "8", v16i8>;
6948
6949// Other Vector Shuffles.
6950
6951//  Aligned extractions: really just dropping registers
6952
6953class AlignedVEXTq<ValueType DestTy, ValueType SrcTy, SDNodeXForm LaneCVT>
6954      : Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))),
6955             (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>,
6956        Requires<[HasNEON]>;
6957
6958def : AlignedVEXTq<v8i8, v16i8, DSubReg_i8_reg>;
6959
6960def : AlignedVEXTq<v4i16, v8i16, DSubReg_i16_reg>;
6961
6962def : AlignedVEXTq<v2i32, v4i32, DSubReg_i32_reg>;
6963
6964def : AlignedVEXTq<v1i64, v2i64, DSubReg_f64_reg>;
6965
6966def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>;
6967
6968def : AlignedVEXTq<v4f16, v8f16, DSubReg_i16_reg>; // v8f16 -> v4f16
6969
6970//   VEXT     : Vector Extract
6971
6972
6973// All of these have a two-operand InstAlias.
6974let TwoOperandAliasConstraint = "$Vn = $Vd" in {
6975class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
6976  : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd),
6977        (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm,
6978        IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
6979        [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn),
6980                                     (Ty DPR:$Vm), imm:$index)))]> {
6981  bits<3> index;
6982  let Inst{11} = 0b0;
6983  let Inst{10-8} = index{2-0};
6984}
6985
6986class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
6987  : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd),
6988        (ins QPR:$Vn, QPR:$Vm, imm0_15:$index), NVExtFrm,
6989        IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
6990        [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn),
6991                                     (Ty QPR:$Vm), imm:$index)))]> {
6992  bits<4> index;
6993  let Inst{11-8} = index{3-0};
6994}
6995}
6996
6997def VEXTd8  : VEXTd<"vext", "8",  v8i8, imm0_7> {
6998  let Inst{10-8} = index{2-0};
6999}
7000def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> {
7001  let Inst{10-9} = index{1-0};
7002  let Inst{8}    = 0b0;
7003}
7004let Predicates = [HasNEON] in {
7005def : Pat<(v4f16 (NEONvext (v4f16 DPR:$Vn), (v4f16 DPR:$Vm), (i32 imm:$index))),
7006          (VEXTd16 DPR:$Vn, DPR:$Vm, imm:$index)>;
7007}
7008
7009def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> {
7010  let Inst{10}     = index{0};
7011  let Inst{9-8}    = 0b00;
7012}
7013let Predicates = [HasNEON] in {
7014def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), (v2f32 DPR:$Vm), (i32 imm:$index))),
7015          (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>;
7016}
7017
7018def VEXTq8  : VEXTq<"vext", "8",  v16i8, imm0_15> {
7019  let Inst{11-8} = index{3-0};
7020}
7021def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> {
7022  let Inst{11-9} = index{2-0};
7023  let Inst{8}    = 0b0;
7024}
7025let Predicates = [HasNEON] in {
7026def : Pat<(v8f16 (NEONvext (v8f16 QPR:$Vn), (v8f16 QPR:$Vm), (i32 imm:$index))),
7027          (VEXTq16 QPR:$Vn, QPR:$Vm, imm:$index)>;
7028}
7029
7030def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> {
7031  let Inst{11-10} = index{1-0};
7032  let Inst{9-8}    = 0b00;
7033}
7034def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> {
7035  let Inst{11} = index{0};
7036  let Inst{10-8}    = 0b000;
7037}
7038let Predicates = [HasNEON] in {
7039def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn), (v4f32 QPR:$Vm), (i32 imm:$index))),
7040          (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>;
7041}
7042
7043//   VTRN     : Vector Transpose
7044
7045def  VTRNd8   : N2VDShuffle<0b00, 0b00001, "vtrn", "8">;
7046def  VTRNd16  : N2VDShuffle<0b01, 0b00001, "vtrn", "16">;
7047def  VTRNd32  : N2VDShuffle<0b10, 0b00001, "vtrn", "32">;
7048
7049def  VTRNq8   : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">;
7050def  VTRNq16  : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">;
7051def  VTRNq32  : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">;
7052
7053//   VUZP     : Vector Unzip (Deinterleave)
7054
7055def  VUZPd8   : N2VDShuffle<0b00, 0b00010, "vuzp", "8">;
7056def  VUZPd16  : N2VDShuffle<0b01, 0b00010, "vuzp", "16">;
7057// vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
7058def : NEONInstAlias<"vuzp${p}.32 $Dd, $Dm",
7059                    (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>;
7060
7061def  VUZPq8   : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">;
7062def  VUZPq16  : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">;
7063def  VUZPq32  : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">;
7064
7065//   VZIP     : Vector Zip (Interleave)
7066
7067def  VZIPd8   : N2VDShuffle<0b00, 0b00011, "vzip", "8">;
7068def  VZIPd16  : N2VDShuffle<0b01, 0b00011, "vzip", "16">;
7069// vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
7070def : NEONInstAlias<"vzip${p}.32 $Dd, $Dm",
7071                    (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>;
7072
7073def  VZIPq8   : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">;
7074def  VZIPq16  : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">;
7075def  VZIPq32  : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">;
7076
7077// Vector Table Lookup and Table Extension.
7078
7079//   VTBL     : Vector Table Lookup
7080let DecoderMethod = "DecodeTBLInstruction" in {
7081def  VTBL1
7082  : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd),
7083        (ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1,
7084        "vtbl", "8", "$Vd, $Vn, $Vm", "",
7085        [(set DPR:$Vd, (v8i8 (NEONvtbl1 VecListOneD:$Vn, DPR:$Vm)))]>;
7086
7087let hasExtraSrcRegAllocReq = 1 in {
7088def  VTBL2
7089  : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd),
7090        (ins VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2,
7091        "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
7092def  VTBL3
7093  : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd),
7094        (ins VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB3,
7095        "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
7096def  VTBL4
7097  : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd),
7098        (ins VecListFourD:$Vn, DPR:$Vm),
7099        NVTBLFrm, IIC_VTB4,
7100        "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
7101} // hasExtraSrcRegAllocReq = 1
7102
7103def  VTBL3Pseudo
7104  : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>;
7105def  VTBL4Pseudo
7106  : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>;
7107
7108//   VTBX     : Vector Table Extension
7109def  VTBX1
7110  : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd),
7111        (ins DPR:$orig, VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1,
7112        "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd",
7113        [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1
7114                               DPR:$orig, VecListOneD:$Vn, DPR:$Vm)))]>;
7115let hasExtraSrcRegAllocReq = 1 in {
7116def  VTBX2
7117  : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd),
7118        (ins DPR:$orig, VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2,
7119        "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>;
7120def  VTBX3
7121  : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd),
7122        (ins DPR:$orig, VecListThreeD:$Vn, DPR:$Vm),
7123        NVTBLFrm, IIC_VTBX3,
7124        "vtbx", "8", "$Vd, $Vn, $Vm",
7125        "$orig = $Vd", []>;
7126def  VTBX4
7127  : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd),
7128        (ins DPR:$orig, VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX4,
7129        "vtbx", "8", "$Vd, $Vn, $Vm",
7130        "$orig = $Vd", []>;
7131} // hasExtraSrcRegAllocReq = 1
7132
7133def  VTBX3Pseudo
7134  : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
7135                IIC_VTBX3, "$orig = $dst", []>;
7136def  VTBX4Pseudo
7137  : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
7138                IIC_VTBX4, "$orig = $dst", []>;
7139} // DecoderMethod = "DecodeTBLInstruction"
7140
7141let Predicates = [HasNEON] in {
7142def : Pat<(v8i8 (NEONvtbl2 v8i8:$Vn0, v8i8:$Vn1, v8i8:$Vm)),
7143          (v8i8 (VTBL2 (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0,
7144                                            v8i8:$Vn1, dsub_1),
7145                       v8i8:$Vm))>;
7146def : Pat<(v8i8 (int_arm_neon_vtbx2 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1,
7147                                    v8i8:$Vm)),
7148          (v8i8 (VTBX2 v8i8:$orig,
7149                       (REG_SEQUENCE DPair, v8i8:$Vn0, dsub_0,
7150                                            v8i8:$Vn1, dsub_1),
7151                       v8i8:$Vm))>;
7152
7153def : Pat<(v8i8 (int_arm_neon_vtbl3 v8i8:$Vn0, v8i8:$Vn1,
7154                                    v8i8:$Vn2, v8i8:$Vm)),
7155          (v8i8 (VTBL3Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
7156                                                 v8i8:$Vn1, dsub_1,
7157                                                 v8i8:$Vn2, dsub_2,
7158                                                 (v8i8 (IMPLICIT_DEF)), dsub_3),
7159                             v8i8:$Vm))>;
7160def : Pat<(v8i8 (int_arm_neon_vtbx3 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1,
7161                                    v8i8:$Vn2, v8i8:$Vm)),
7162          (v8i8 (VTBX3Pseudo v8i8:$orig,
7163                             (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
7164                                                 v8i8:$Vn1, dsub_1,
7165                                                 v8i8:$Vn2, dsub_2,
7166                                                 (v8i8 (IMPLICIT_DEF)), dsub_3),
7167                             v8i8:$Vm))>;
7168
7169def : Pat<(v8i8 (int_arm_neon_vtbl4 v8i8:$Vn0, v8i8:$Vn1,
7170                                    v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)),
7171          (v8i8 (VTBL4Pseudo (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
7172                                                 v8i8:$Vn1, dsub_1,
7173                                                 v8i8:$Vn2, dsub_2,
7174                                                 v8i8:$Vn3, dsub_3),
7175                             v8i8:$Vm))>;
7176def : Pat<(v8i8 (int_arm_neon_vtbx4 v8i8:$orig, v8i8:$Vn0, v8i8:$Vn1,
7177                                    v8i8:$Vn2, v8i8:$Vn3, v8i8:$Vm)),
7178          (v8i8 (VTBX4Pseudo v8i8:$orig,
7179                             (REG_SEQUENCE QQPR, v8i8:$Vn0, dsub_0,
7180                                                 v8i8:$Vn1, dsub_1,
7181                                                 v8i8:$Vn2, dsub_2,
7182                                                 v8i8:$Vn3, dsub_3),
7183                             v8i8:$Vm))>;
7184}
7185
7186// VRINT      : Vector Rounding
7187multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> {
7188  let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
7189    def Df : N2VDIntnp<0b10, 0b10, 0b100, 0, NoItinerary,
7190                      !strconcat("vrint", op), "f32",
7191                      v2f32, v2f32, Int>, Requires<[HasV8, HasNEON]> {
7192      let Inst{9-7} = op9_7;
7193    }
7194    def Qf : N2VQIntnp<0b10, 0b10, 0b100, 0, NoItinerary,
7195                      !strconcat("vrint", op), "f32",
7196                      v4f32, v4f32, Int>, Requires<[HasV8, HasNEON]> {
7197      let Inst{9-7} = op9_7;
7198    }
7199    def Dh : N2VDIntnp<0b01, 0b10, 0b100, 0, NoItinerary,
7200                      !strconcat("vrint", op), "f16",
7201                      v4f16, v4f16, Int>,
7202             Requires<[HasV8, HasNEON, HasFullFP16]> {
7203      let Inst{9-7} = op9_7;
7204    }
7205    def Qh : N2VQIntnp<0b01, 0b10, 0b100, 0, NoItinerary,
7206                      !strconcat("vrint", op), "f16",
7207                      v8f16, v8f16, Int>,
7208             Requires<[HasV8, HasNEON, HasFullFP16]> {
7209      let Inst{9-7} = op9_7;
7210    }
7211  }
7212
7213  def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Dd, $Dm"),
7214                  (!cast<Instruction>(NAME#"Df") DPR:$Dd, DPR:$Dm)>;
7215  def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Qd, $Qm"),
7216                  (!cast<Instruction>(NAME#"Qf") QPR:$Qd, QPR:$Qm)>;
7217  let Predicates = [HasNEON, HasFullFP16] in {
7218  def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Dd, $Dm"),
7219                  (!cast<Instruction>(NAME#"Dh") DPR:$Dd, DPR:$Dm)>;
7220  def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Qd, $Qm"),
7221                  (!cast<Instruction>(NAME#"Qh") QPR:$Qd, QPR:$Qm)>;
7222  }
7223}
7224
7225defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>;
7226defm VRINTXN : VRINT_FPI<"x", 0b001, int_arm_neon_vrintx>;
7227defm VRINTAN : VRINT_FPI<"a", 0b010, int_arm_neon_vrinta>;
7228defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>;
7229defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>;
7230defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>;
7231
7232// Cryptography instructions
7233let PostEncoderMethod = "NEONThumb2DataIPostEncoder",
7234    DecoderNamespace = "v8Crypto", hasSideEffects = 0 in {
7235  class AES<string op, bit op7, bit op6, SDPatternOperator Int>
7236    : N2VQIntXnp<0b00, 0b00, 0b011, op6, op7, NoItinerary,
7237                 !strconcat("aes", op), "8", v16i8, v16i8, Int>;
7238  class AES2Op<string op, bit op7, bit op6, SDPatternOperator Int>
7239    : N2VQIntX2np<0b00, 0b00, 0b011, op6, op7, NoItinerary,
7240                 !strconcat("aes", op), "8", v16i8, v16i8, Int>;
7241  class N2SHA<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
7242              SDPatternOperator Int>
7243    : N2VQIntXnp<0b10, op17_16, op10_8, op6, op7, NoItinerary,
7244                 !strconcat("sha", op), "32", v4i32, v4i32, Int>;
7245  class N2SHA2Op<string op, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
7246              SDPatternOperator Int>
7247    : N2VQIntX2np<0b10, op17_16, op10_8, op6, op7, NoItinerary,
7248                 !strconcat("sha", op), "32", v4i32, v4i32, Int>;
7249  class N3SHA3Op<string op, bits<5> op27_23, bits<2> op21_20, SDPatternOperator Int>
7250    : N3VQInt3np<op27_23, op21_20, 0b1100, 1, 0, N3RegFrm, NoItinerary,
7251                !strconcat("sha", op), "32", v4i32, v4i32, Int, 0>;
7252}
7253
7254let Predicates = [HasV8, HasAES] in {
7255def AESD : AES2Op<"d", 0, 1, int_arm_neon_aesd>;
7256def AESE : AES2Op<"e", 0, 0, int_arm_neon_aese>;
7257def AESIMC : AES<"imc", 1, 1, int_arm_neon_aesimc>;
7258def AESMC : AES<"mc", 1, 0, int_arm_neon_aesmc>;
7259}
7260
7261let Predicates = [HasV8, HasSHA2] in {
7262def SHA1H : N2SHA<"1h", 0b01, 0b010, 1, 1, null_frag>;
7263def SHA1SU1 : N2SHA2Op<"1su1", 0b10, 0b011, 1, 0, int_arm_neon_sha1su1>;
7264def SHA256SU0 : N2SHA2Op<"256su0", 0b10, 0b011, 1, 1, int_arm_neon_sha256su0>;
7265def SHA1C : N3SHA3Op<"1c", 0b00100, 0b00, null_frag>;
7266def SHA1M : N3SHA3Op<"1m", 0b00100, 0b10, null_frag>;
7267def SHA1P : N3SHA3Op<"1p", 0b00100, 0b01, null_frag>;
7268def SHA1SU0 : N3SHA3Op<"1su0", 0b00100, 0b11, int_arm_neon_sha1su0>;
7269def SHA256H : N3SHA3Op<"256h", 0b00110, 0b00, int_arm_neon_sha256h>;
7270def SHA256H2 : N3SHA3Op<"256h2", 0b00110, 0b01, int_arm_neon_sha256h2>;
7271def SHA256SU1 : N3SHA3Op<"256su1", 0b00110, 0b10, int_arm_neon_sha256su1>;
7272}
7273
7274let Predicates = [HasNEON] in {
7275def : Pat<(i32 (int_arm_neon_sha1h i32:$Rn)),
7276          (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG
7277              (SHA1H (SUBREG_TO_REG (i64 0),
7278                                    (f32 (COPY_TO_REGCLASS i32:$Rn, SPR)),
7279                                    ssub_0)),
7280              ssub_0)), GPR)>;
7281
7282def : Pat<(v4i32 (int_arm_neon_sha1c v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)),
7283          (SHA1C v4i32:$hash_abcd,
7284                 (SUBREG_TO_REG (i64 0),
7285                                (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)),
7286                                ssub_0),
7287                 v4i32:$wk)>;
7288
7289def : Pat<(v4i32 (int_arm_neon_sha1m v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)),
7290          (SHA1M v4i32:$hash_abcd,
7291                 (SUBREG_TO_REG (i64 0),
7292                                (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)),
7293                                ssub_0),
7294                 v4i32:$wk)>;
7295
7296def : Pat<(v4i32 (int_arm_neon_sha1p v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk)),
7297          (SHA1P v4i32:$hash_abcd,
7298                 (SUBREG_TO_REG (i64 0),
7299                                (f32 (COPY_TO_REGCLASS i32:$hash_e, SPR)),
7300                                ssub_0),
7301                 v4i32:$wk)>;
7302}
7303
7304//===----------------------------------------------------------------------===//
7305// NEON instructions for single-precision FP math
7306//===----------------------------------------------------------------------===//
7307
7308class N2VSPat<SDNode OpNode, NeonI Inst>
7309  : NEONFPPat<(f32 (OpNode SPR:$a)),
7310              (EXTRACT_SUBREG
7311               (v2f32 (COPY_TO_REGCLASS (Inst
7312                (INSERT_SUBREG
7313                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
7314                 SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>;
7315
7316class N3VSPat<SDNode OpNode, NeonI Inst>
7317  : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
7318              (EXTRACT_SUBREG
7319               (v2f32 (COPY_TO_REGCLASS (Inst
7320                (INSERT_SUBREG
7321                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
7322                 SPR:$a, ssub_0),
7323                (INSERT_SUBREG
7324                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
7325                 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
7326
7327class N3VSPatFP16<SDNode OpNode, NeonI Inst>
7328  : NEONFPPat<(f16 (OpNode HPR:$a, HPR:$b)),
7329              (EXTRACT_SUBREG
7330               (v4f16 (COPY_TO_REGCLASS (Inst
7331                (INSERT_SUBREG
7332                 (v4f16 (COPY_TO_REGCLASS (v4f16 (IMPLICIT_DEF)), DPR_VFP2)),
7333                 HPR:$a, ssub_0),
7334                (INSERT_SUBREG
7335                 (v4f16 (COPY_TO_REGCLASS (v4f16 (IMPLICIT_DEF)), DPR_VFP2)),
7336                 HPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
7337
7338class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst>
7339  : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))),
7340              (EXTRACT_SUBREG
7341               (v2f32 (COPY_TO_REGCLASS (Inst
7342                (INSERT_SUBREG
7343                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
7344                 SPR:$acc, ssub_0),
7345                (INSERT_SUBREG
7346                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
7347                 SPR:$a, ssub_0),
7348                (INSERT_SUBREG
7349                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
7350                 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
7351
7352class NVCVTIFPat<SDNode OpNode, NeonI Inst>
7353  : NEONFPPat<(f32 (OpNode GPR:$a)),
7354              (f32 (EXTRACT_SUBREG
7355                     (v2f32 (Inst
7356                       (INSERT_SUBREG
7357                         (v2f32 (IMPLICIT_DEF)),
7358                         (i32 (COPY_TO_REGCLASS GPR:$a, SPR)), ssub_0))),
7359                     ssub_0))>;
7360class NVCVTFIPat<SDNode OpNode, NeonI Inst>
7361  : NEONFPPat<(i32 (OpNode SPR:$a)),
7362              (i32 (EXTRACT_SUBREG
7363                     (v2f32 (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
7364                                                 SPR:$a, ssub_0))),
7365                     ssub_0))>;
7366
7367def : N3VSPat<fadd, VADDfd>;
7368def : N3VSPat<fsub, VSUBfd>;
7369def : N3VSPat<fmul, VMULfd>;
7370def : N3VSMulOpPat<fmul, fadd, VMLAfd>,
7371      Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
7372def : N3VSMulOpPat<fmul, fsub, VMLSfd>,
7373      Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
7374def : N3VSMulOpPat<fmul, fadd, VFMAfd>,
7375      Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>;
7376def : N3VSMulOpPat<fmul, fsub, VFMSfd>,
7377      Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>;
7378def : N2VSPat<fabs, VABSfd>;
7379def : N2VSPat<fneg, VNEGfd>;
7380def : N3VSPatFP16<fmaximum, VMAXhd>, Requires<[HasFullFP16]>;
7381def : N3VSPatFP16<fminimum, VMINhd>, Requires<[HasFullFP16]>;
7382def : N3VSPat<fmaximum, VMAXfd>, Requires<[HasNEON]>;
7383def : N3VSPat<fminimum, VMINfd>, Requires<[HasNEON]>;
7384def : NVCVTFIPat<fp_to_sint, VCVTf2sd>;
7385def : NVCVTFIPat<fp_to_uint, VCVTf2ud>;
7386def : NVCVTIFPat<sint_to_fp, VCVTs2fd>;
7387def : NVCVTIFPat<uint_to_fp, VCVTu2fd>;
7388
7389// NEON doesn't have any f64 conversions, so provide patterns to make
7390// sure the VFP conversions match when extracting from a vector.
7391def : VFPPat<(f64 (sint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))),
7392             (VSITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
7393def : VFPPat<(f64 (sint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))),
7394             (VSITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>;
7395def : VFPPat<(f64 (uint_to_fp (extractelt (v2i32 DPR:$src), imm:$lane))),
7396             (VUITOD (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
7397def : VFPPat<(f64 (uint_to_fp (extractelt (v4i32 QPR:$src), imm:$lane))),
7398             (VUITOD (EXTRACT_SUBREG QPR:$src, (SSubReg_f32_reg imm:$lane)))>;
7399
7400
7401// Prefer VMOVDRR for i32 -> f32 bitcasts, it can write all DPR registers.
7402def : Pat<(f32 (bitconvert GPR:$a)),
7403          (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>,
7404        Requires<[HasNEON, DontUseVMOVSR]>;
7405def : Pat<(arm_vmovsr GPR:$a),
7406          (EXTRACT_SUBREG (VMOVDRR GPR:$a, GPR:$a), ssub_0)>,
7407        Requires<[HasNEON, DontUseVMOVSR]>;
7408
7409//===----------------------------------------------------------------------===//
7410// Non-Instruction Patterns or Endianess - Revert Patterns
7411//===----------------------------------------------------------------------===//
7412
7413// bit_convert
7414// 64 bit conversions
7415let Predicates = [HasNEON] in {
7416def : Pat<(f64   (bitconvert (v1i64 DPR:$src))), (f64   DPR:$src)>;
7417def : Pat<(v1i64 (bitconvert (f64   DPR:$src))), (v1i64 DPR:$src)>;
7418
7419def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>;
7420def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>;
7421
7422def : Pat<(v4i16 (bitconvert (v4f16 DPR:$src))), (v4i16  DPR:$src)>;
7423def : Pat<(v4f16 (bitconvert (v4i16 DPR:$src))), (v4f16  DPR:$src)>;
7424
7425def : Pat<(v4i16 (bitconvert (v4bf16 DPR:$src))), (v4i16  DPR:$src)>;
7426def : Pat<(v4bf16 (bitconvert (v4i16 DPR:$src))), (v4bf16  DPR:$src)>;
7427
7428// 128 bit conversions
7429def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>;
7430def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>;
7431
7432def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>;
7433def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>;
7434
7435def : Pat<(v8i16 (bitconvert (v8f16 QPR:$src))), (v8i16  QPR:$src)>;
7436def : Pat<(v8f16 (bitconvert (v8i16 QPR:$src))), (v8f16  QPR:$src)>;
7437
7438def : Pat<(v8i16 (bitconvert (v8bf16 QPR:$src))), (v8i16  QPR:$src)>;
7439def : Pat<(v8bf16 (bitconvert (v8i16 QPR:$src))), (v8bf16  QPR:$src)>;
7440}
7441
7442let Predicates = [IsLE,HasNEON] in {
7443  // 64 bit conversions
7444  def : Pat<(f64   (bitconvert (v2f32 DPR:$src))), (f64   DPR:$src)>;
7445  def : Pat<(f64   (bitconvert (v2i32 DPR:$src))), (f64   DPR:$src)>;
7446  def : Pat<(f64   (bitconvert (v4f16 DPR:$src))), (f64   DPR:$src)>;
7447  def : Pat<(f64   (bitconvert (v4bf16 DPR:$src))), (f64   DPR:$src)>;
7448  def : Pat<(f64   (bitconvert (v4i16 DPR:$src))), (f64   DPR:$src)>;
7449  def : Pat<(f64   (bitconvert (v8i8  DPR:$src))), (f64   DPR:$src)>;
7450
7451  def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>;
7452  def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>;
7453  def : Pat<(v1i64 (bitconvert (v4f16 DPR:$src))), (v1i64 DPR:$src)>;
7454  def : Pat<(v1i64 (bitconvert (v4bf16 DPR:$src))), (v1i64 DPR:$src)>;
7455  def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>;
7456  def : Pat<(v1i64 (bitconvert (v8i8  DPR:$src))), (v1i64 DPR:$src)>;
7457
7458  def : Pat<(v2f32 (bitconvert (f64   DPR:$src))), (v2f32 DPR:$src)>;
7459  def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>;
7460  def : Pat<(v2f32 (bitconvert (v4f16 DPR:$src))), (v2f32 DPR:$src)>;
7461  def : Pat<(v2f32 (bitconvert (v4bf16 DPR:$src))), (v2f32 DPR:$src)>;
7462  def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>;
7463  def : Pat<(v2f32 (bitconvert (v8i8  DPR:$src))), (v2f32 DPR:$src)>;
7464
7465  def : Pat<(v2i32 (bitconvert (f64   DPR:$src))), (v2i32 DPR:$src)>;
7466  def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>;
7467  def : Pat<(v2i32 (bitconvert (v4f16 DPR:$src))), (v2i32 DPR:$src)>;
7468  def : Pat<(v2i32 (bitconvert (v4bf16 DPR:$src))), (v2i32 DPR:$src)>;
7469  def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>;
7470  def : Pat<(v2i32 (bitconvert (v8i8  DPR:$src))), (v2i32 DPR:$src)>;
7471
7472  def : Pat<(v4f16 (bitconvert (f64   DPR:$src))), (v4f16 DPR:$src)>;
7473  def : Pat<(v4f16 (bitconvert (v1i64 DPR:$src))), (v4f16 DPR:$src)>;
7474  def : Pat<(v4f16 (bitconvert (v2f32 DPR:$src))), (v4f16 DPR:$src)>;
7475  def : Pat<(v4f16 (bitconvert (v2i32 DPR:$src))), (v4f16 DPR:$src)>;
7476  def : Pat<(v4f16 (bitconvert (v8i8  DPR:$src))), (v4f16 DPR:$src)>;
7477
7478  def : Pat<(v4bf16 (bitconvert (f64   DPR:$src))), (v4bf16 DPR:$src)>;
7479  def : Pat<(v4bf16 (bitconvert (v1i64 DPR:$src))), (v4bf16 DPR:$src)>;
7480  def : Pat<(v4bf16 (bitconvert (v2f32 DPR:$src))), (v4bf16 DPR:$src)>;
7481  def : Pat<(v4bf16 (bitconvert (v2i32 DPR:$src))), (v4bf16 DPR:$src)>;
7482  def : Pat<(v4bf16 (bitconvert (v8i8  DPR:$src))), (v4bf16 DPR:$src)>;
7483
7484  def : Pat<(v4i16 (bitconvert (f64   DPR:$src))), (v4i16 DPR:$src)>;
7485  def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>;
7486  def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>;
7487  def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>;
7488  def : Pat<(v4i16 (bitconvert (v8i8  DPR:$src))), (v4i16 DPR:$src)>;
7489
7490  def : Pat<(v8i8  (bitconvert (f64   DPR:$src))), (v8i8  DPR:$src)>;
7491  def : Pat<(v8i8  (bitconvert (v1i64 DPR:$src))), (v8i8  DPR:$src)>;
7492  def : Pat<(v8i8  (bitconvert (v2f32 DPR:$src))), (v8i8  DPR:$src)>;
7493  def : Pat<(v8i8  (bitconvert (v2i32 DPR:$src))), (v8i8  DPR:$src)>;
7494  def : Pat<(v8i8  (bitconvert (v4f16 DPR:$src))), (v8i8  DPR:$src)>;
7495  def : Pat<(v8i8  (bitconvert (v4bf16 DPR:$src))), (v8i8  DPR:$src)>;
7496  def : Pat<(v8i8  (bitconvert (v4i16 DPR:$src))), (v8i8  DPR:$src)>;
7497
7498  // 128 bit conversions
7499  def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
7500  def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>;
7501  def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (v2f64 QPR:$src)>;
7502  def : Pat<(v2f64 (bitconvert (v8bf16 QPR:$src))), (v2f64 QPR:$src)>;
7503  def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
7504  def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
7505
7506  def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>;
7507  def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>;
7508  def : Pat<(v2i64 (bitconvert (v8f16 QPR:$src))), (v2i64 QPR:$src)>;
7509  def : Pat<(v2i64 (bitconvert (v8bf16 QPR:$src))), (v2i64 QPR:$src)>;
7510  def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>;
7511  def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>;
7512
7513  def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>;
7514  def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>;
7515  def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (v4f32 QPR:$src)>;
7516  def : Pat<(v4f32 (bitconvert (v8bf16 QPR:$src))), (v4f32 QPR:$src)>;
7517  def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>;
7518  def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>;
7519
7520  def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>;
7521  def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>;
7522  def : Pat<(v4i32 (bitconvert (v8f16 QPR:$src))), (v4i32 QPR:$src)>;
7523  def : Pat<(v4i32 (bitconvert (v8bf16 QPR:$src))), (v4i32 QPR:$src)>;
7524  def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>;
7525  def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>;
7526
7527  def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (v8f16 QPR:$src)>;
7528  def : Pat<(v8f16 (bitconvert (v2i64 QPR:$src))), (v8f16 QPR:$src)>;
7529  def : Pat<(v8f16 (bitconvert (v4f32 QPR:$src))), (v8f16 QPR:$src)>;
7530  def : Pat<(v8f16 (bitconvert (v4i32 QPR:$src))), (v8f16 QPR:$src)>;
7531  def : Pat<(v8f16 (bitconvert (v16i8 QPR:$src))), (v8f16 QPR:$src)>;
7532
7533  def : Pat<(v8bf16 (bitconvert (v2f64 QPR:$src))), (v8bf16 QPR:$src)>;
7534  def : Pat<(v8bf16 (bitconvert (v2i64 QPR:$src))), (v8bf16 QPR:$src)>;
7535  def : Pat<(v8bf16 (bitconvert (v4f32 QPR:$src))), (v8bf16 QPR:$src)>;
7536  def : Pat<(v8bf16 (bitconvert (v4i32 QPR:$src))), (v8bf16 QPR:$src)>;
7537  def : Pat<(v8bf16 (bitconvert (v16i8 QPR:$src))), (v8bf16 QPR:$src)>;
7538
7539  def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>;
7540  def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>;
7541  def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>;
7542  def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>;
7543  def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>;
7544
7545  def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>;
7546  def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>;
7547  def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>;
7548  def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>;
7549  def : Pat<(v16i8 (bitconvert (v8f16 QPR:$src))), (v16i8 QPR:$src)>;
7550  def : Pat<(v16i8 (bitconvert (v8bf16 QPR:$src))), (v16i8 QPR:$src)>;
7551  def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>;
7552}
7553
7554let Predicates = [IsBE,HasNEON] in {
7555  // 64 bit conversions
7556  def : Pat<(f64   (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>;
7557  def : Pat<(f64   (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>;
7558  def : Pat<(f64   (bitconvert (v4f16 DPR:$src))), (VREV64d16 DPR:$src)>;
7559  def : Pat<(f64   (bitconvert (v4bf16 DPR:$src))), (VREV64d16 DPR:$src)>;
7560  def : Pat<(f64   (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>;
7561  def : Pat<(f64   (bitconvert (v8i8  DPR:$src))), (VREV64d8  DPR:$src)>;
7562
7563  def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>;
7564  def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>;
7565  def : Pat<(v1i64 (bitconvert (v4f16 DPR:$src))), (VREV64d16 DPR:$src)>;
7566  def : Pat<(v1i64 (bitconvert (v4bf16 DPR:$src))), (VREV64d16 DPR:$src)>;
7567  def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>;
7568  def : Pat<(v1i64 (bitconvert (v8i8  DPR:$src))), (VREV64d8  DPR:$src)>;
7569
7570  def : Pat<(v2f32 (bitconvert (f64   DPR:$src))), (VREV64d32 DPR:$src)>;
7571  def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>;
7572  def : Pat<(v2f32 (bitconvert (v4f16 DPR:$src))), (VREV32d16 DPR:$src)>;
7573  def : Pat<(v2f32 (bitconvert (v4bf16 DPR:$src))), (VREV32d16 DPR:$src)>;
7574  def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>;
7575  def : Pat<(v2f32 (bitconvert (v8i8  DPR:$src))), (VREV32d8  DPR:$src)>;
7576
7577  def : Pat<(v2i32 (bitconvert (f64   DPR:$src))), (VREV64d32 DPR:$src)>;
7578  def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>;
7579  def : Pat<(v2i32 (bitconvert (v4f16 DPR:$src))), (VREV32d16 DPR:$src)>;
7580  def : Pat<(v2i32 (bitconvert (v4bf16 DPR:$src))), (VREV32d16 DPR:$src)>;
7581  def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>;
7582  def : Pat<(v2i32 (bitconvert (v8i8  DPR:$src))), (VREV32d8  DPR:$src)>;
7583
7584  def : Pat<(v4f16 (bitconvert (f64   DPR:$src))), (VREV64d16 DPR:$src)>;
7585  def : Pat<(v4f16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>;
7586  def : Pat<(v4f16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>;
7587  def : Pat<(v4f16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>;
7588  def : Pat<(v4f16 (bitconvert (v8i8  DPR:$src))), (VREV16d8  DPR:$src)>;
7589
7590  def : Pat<(v4bf16 (bitconvert (f64   DPR:$src))), (VREV64d16 DPR:$src)>;
7591  def : Pat<(v4bf16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>;
7592  def : Pat<(v4bf16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>;
7593  def : Pat<(v4bf16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>;
7594  def : Pat<(v4bf16 (bitconvert (v8i8  DPR:$src))), (VREV16d8  DPR:$src)>;
7595
7596  def : Pat<(v4i16 (bitconvert (f64   DPR:$src))), (VREV64d16 DPR:$src)>;
7597  def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>;
7598  def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>;
7599  def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>;
7600  def : Pat<(v4i16 (bitconvert (v8i8  DPR:$src))), (VREV16d8  DPR:$src)>;
7601
7602  def : Pat<(v8i8  (bitconvert (f64   DPR:$src))), (VREV64d8  DPR:$src)>;
7603  def : Pat<(v8i8  (bitconvert (v1i64 DPR:$src))), (VREV64d8  DPR:$src)>;
7604  def : Pat<(v8i8  (bitconvert (v2f32 DPR:$src))), (VREV32d8  DPR:$src)>;
7605  def : Pat<(v8i8  (bitconvert (v2i32 DPR:$src))), (VREV32d8  DPR:$src)>;
7606  def : Pat<(v8i8  (bitconvert (v4f16 DPR:$src))), (VREV16d8  DPR:$src)>;
7607  def : Pat<(v8i8  (bitconvert (v4bf16 DPR:$src))), (VREV16d8  DPR:$src)>;
7608  def : Pat<(v8i8  (bitconvert (v4i16 DPR:$src))), (VREV16d8  DPR:$src)>;
7609
7610  // 128 bit conversions
7611  def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>;
7612  def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>;
7613  def : Pat<(v2f64 (bitconvert (v8f16 QPR:$src))), (VREV64q16 QPR:$src)>;
7614  def : Pat<(v2f64 (bitconvert (v8bf16 QPR:$src))), (VREV64q16 QPR:$src)>;
7615  def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>;
7616  def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (VREV64q8  QPR:$src)>;
7617
7618  def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>;
7619  def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>;
7620  def : Pat<(v2i64 (bitconvert (v8f16 QPR:$src))), (VREV64q16 QPR:$src)>;
7621  def : Pat<(v2i64 (bitconvert (v8bf16 QPR:$src))), (VREV64q16 QPR:$src)>;
7622  def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>;
7623  def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (VREV64q8  QPR:$src)>;
7624
7625  def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>;
7626  def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>;
7627  def : Pat<(v4f32 (bitconvert (v8f16 QPR:$src))), (VREV32q16 QPR:$src)>;
7628  def : Pat<(v4f32 (bitconvert (v8bf16 QPR:$src))), (VREV32q16 QPR:$src)>;
7629  def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>;
7630  def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (VREV32q8  QPR:$src)>;
7631
7632  def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>;
7633  def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>;
7634  def : Pat<(v4i32 (bitconvert (v8f16 QPR:$src))), (VREV32q16 QPR:$src)>;
7635  def : Pat<(v4i32 (bitconvert (v8bf16 QPR:$src))), (VREV32q16 QPR:$src)>;
7636  def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>;
7637  def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (VREV32q8  QPR:$src)>;
7638
7639  def : Pat<(v8f16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>;
7640  def : Pat<(v8f16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>;
7641  def : Pat<(v8f16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>;
7642  def : Pat<(v8f16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>;
7643  def : Pat<(v8f16 (bitconvert (v16i8 QPR:$src))), (VREV16q8  QPR:$src)>;
7644
7645  def : Pat<(v8bf16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>;
7646  def : Pat<(v8bf16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>;
7647  def : Pat<(v8bf16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>;
7648  def : Pat<(v8bf16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>;
7649  def : Pat<(v8bf16 (bitconvert (v16i8 QPR:$src))), (VREV16q8  QPR:$src)>;
7650
7651  def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>;
7652  def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>;
7653  def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>;
7654  def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>;
7655  def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (VREV16q8  QPR:$src)>;
7656
7657  def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (VREV64q8  QPR:$src)>;
7658  def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (VREV64q8  QPR:$src)>;
7659  def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (VREV32q8  QPR:$src)>;
7660  def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (VREV32q8  QPR:$src)>;
7661  def : Pat<(v16i8 (bitconvert (v8f16 QPR:$src))), (VREV16q8  QPR:$src)>;
7662  def : Pat<(v16i8 (bitconvert (v8bf16 QPR:$src))), (VREV16q8  QPR:$src)>;
7663  def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (VREV16q8  QPR:$src)>;
7664}
7665
7666let Predicates = [HasNEON] in {
7667  // Here we match the specific SDNode type 'ARMVectorRegCastImpl'
7668  // rather than the more general 'ARMVectorRegCast' which would also
7669  // match some bitconverts. If we use the latter in cases where the
7670  // input and output types are the same, the bitconvert gets elided
7671  // and we end up generating a nonsense match of nothing.
7672
7673  foreach VT = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in
7674    foreach VT2 = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in
7675      def : Pat<(VT (ARMVectorRegCastImpl (VT2 QPR:$src))), (VT QPR:$src)>;
7676
7677  foreach VT = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, f64 ] in
7678    foreach VT2 = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, f64 ] in
7679      def : Pat<(VT (ARMVectorRegCastImpl (VT2 DPR:$src))), (VT DPR:$src)>;
7680}
7681
7682// Use VLD1/VST1 + VREV for non-word-aligned v2f64 load/store on Big Endian
7683let Predicates = [IsBE,HasNEON] in {
7684def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)),
7685          (VREV64q8 (VLD1q8 addrmode6:$addr))>;
7686def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
7687          (VST1q8 addrmode6:$addr, (VREV64q8 QPR:$value))>;
7688def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)),
7689          (VREV64q16 (VLD1q16 addrmode6:$addr))>;
7690def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
7691          (VST1q16 addrmode6:$addr, (VREV64q16 QPR:$value))>;
7692}
7693
7694// Fold extracting an element out of a v2i32 into a vfp register.
7695def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))),
7696          (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>,
7697      Requires<[HasNEON]>;
7698
7699// Vector lengthening move with load, matching extending loads.
7700
7701// extload, zextload and sextload for a standard lengthening load. Example:
7702// Lengthen_Single<"8", "i16", "8"> =
7703//     Pat<(v8i16 (extloadvi8 addrmode6:$addr))
7704//         (VMOVLuv8i16 (VLD1d8 addrmode6:$addr,
7705//                              (f64 (IMPLICIT_DEF)), (i32 0)))>;
7706multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> {
7707  let AddedComplexity = 10 in {
7708  def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7709                    (!cast<PatFrag>("extloadvi" # SrcTy) addrmode6:$addr)),
7710                  (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
7711                    (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>,
7712             Requires<[HasNEON]>;
7713
7714  def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7715                  (!cast<PatFrag>("zextloadvi" # SrcTy) addrmode6:$addr)),
7716                (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
7717                    (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>,
7718           Requires<[HasNEON]>;
7719
7720  def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7721                  (!cast<PatFrag>("sextloadvi" # SrcTy) addrmode6:$addr)),
7722                (!cast<Instruction>("VMOVLsv" # DestLanes # DestTy)
7723                    (!cast<Instruction>("VLD1d" # SrcTy) addrmode6:$addr))>,
7724           Requires<[HasNEON]>;
7725  }
7726}
7727
7728// extload, zextload and sextload for a lengthening load which only uses
7729// half the lanes available. Example:
7730// Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> =
7731//     Pat<(v4i16 (extloadvi8 addrmode6oneL32:$addr)),
7732//         (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr,
7733//                                      (f64 (IMPLICIT_DEF)), (i32 0))),
7734//                         dsub_0)>;
7735multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy,
7736                               string InsnLanes, string InsnTy> {
7737  def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7738                   (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
7739       (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
7740         (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7741         dsub_0)>,
7742             Requires<[HasNEON]>;
7743  def _Z   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7744                   (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
7745       (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
7746         (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7747         dsub_0)>,
7748             Requires<[HasNEON]>;
7749  def _S   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7750                   (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
7751       (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy)
7752         (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7753         dsub_0)>,
7754             Requires<[HasNEON]>;
7755}
7756
7757// The following class definition is basically a copy of the
7758// Lengthen_HalfSingle definition above, however with an additional parameter
7759// "RevLanes" to select the correct VREV32dXX instruction. This is to convert
7760// data loaded by VLD1LN into proper vector format in big endian mode.
7761multiclass Lengthen_HalfSingle_Big_Endian<string DestLanes, string DestTy, string SrcTy,
7762                               string InsnLanes, string InsnTy, string RevLanes> {
7763  def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7764                   (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
7765       (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
7766         (!cast<Instruction>("VREV32d" # RevLanes)
7767           (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7768         dsub_0)>,
7769             Requires<[HasNEON]>;
7770  def _Z   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7771                   (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
7772       (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
7773         (!cast<Instruction>("VREV32d" # RevLanes)
7774           (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7775         dsub_0)>,
7776             Requires<[HasNEON]>;
7777  def _S   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7778                   (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
7779       (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy)
7780         (!cast<Instruction>("VREV32d" # RevLanes)
7781           (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7782         dsub_0)>,
7783             Requires<[HasNEON]>;
7784}
7785
7786// extload, zextload and sextload for a lengthening load followed by another
7787// lengthening load, to quadruple the initial length.
7788//
7789// Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32"> =
7790//     Pat<(v4i32 (extloadvi8 addrmode6oneL32:$addr))
7791//         (EXTRACT_SUBREG (VMOVLuv4i32
7792//           (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr,
7793//                                                   (f64 (IMPLICIT_DEF)),
7794//                                                   (i32 0))),
7795//                           dsub_0)),
7796//           dsub_0)>;
7797multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy,
7798                           string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
7799                           string Insn2Ty> {
7800  def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7801                   (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
7802         (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7803           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7804             (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7805             dsub_0))>,
7806             Requires<[HasNEON]>;
7807  def _Z   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7808                   (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
7809         (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7810           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7811             (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7812             dsub_0))>,
7813             Requires<[HasNEON]>;
7814  def _S   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7815                   (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
7816         (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
7817           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
7818             (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7819             dsub_0))>,
7820             Requires<[HasNEON]>;
7821}
7822
7823// The following class definition is basically a copy of the
7824// Lengthen_Double definition above, however with an additional parameter
7825// "RevLanes" to select the correct VREV32dXX instruction. This is to convert
7826// data loaded by VLD1LN into proper vector format in big endian mode.
7827multiclass Lengthen_Double_Big_Endian<string DestLanes, string DestTy, string SrcTy,
7828                           string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
7829                           string Insn2Ty, string RevLanes> {
7830  def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7831                   (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)),
7832         (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7833           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7834            (!cast<Instruction>("VREV32d" # RevLanes)
7835             (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7836             dsub_0))>,
7837             Requires<[HasNEON]>;
7838  def _Z   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7839                   (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)),
7840         (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7841           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7842            (!cast<Instruction>("VREV32d" # RevLanes)
7843             (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7844             dsub_0))>,
7845             Requires<[HasNEON]>;
7846  def _S   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7847                   (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)),
7848         (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
7849           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
7850            (!cast<Instruction>("VREV32d" # RevLanes)
7851             (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7852             dsub_0))>,
7853             Requires<[HasNEON]>;
7854}
7855
7856// extload, zextload and sextload for a lengthening load followed by another
7857// lengthening load, to quadruple the initial length, but which ends up only
7858// requiring half the available lanes (a 64-bit outcome instead of a 128-bit).
7859//
7860// Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32"> =
7861// Pat<(v2i32 (extloadvi8 addrmode6:$addr))
7862//     (EXTRACT_SUBREG (VMOVLuv4i32
7863//       (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd16 addrmode6:$addr,
7864//                                               (f64 (IMPLICIT_DEF)), (i32 0))),
7865//                       dsub_0)),
7866//       dsub_0)>;
7867multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy,
7868                           string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
7869                           string Insn2Ty> {
7870  def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7871                   (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)),
7872         (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7873           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7874             (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7875             dsub_0)),
7876          dsub_0)>,
7877             Requires<[HasNEON]>;
7878  def _Z   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7879                   (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)),
7880         (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7881           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7882             (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7883             dsub_0)),
7884          dsub_0)>,
7885              Requires<[HasNEON]>;
7886  def _S   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7887                   (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)),
7888         (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
7889           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
7890             (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0))),
7891             dsub_0)),
7892          dsub_0)>,
7893             Requires<[HasNEON]>;
7894}
7895
7896// The following class definition is basically a copy of the
7897// Lengthen_HalfDouble definition above, however with an additional VREV16d8
7898// instruction to convert data loaded by VLD1LN into proper vector format
7899// in big endian mode.
7900multiclass Lengthen_HalfDouble_Big_Endian<string DestLanes, string DestTy, string SrcTy,
7901                           string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
7902                           string Insn2Ty> {
7903  def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7904                   (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)),
7905         (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7906           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7907            (!cast<Instruction>("VREV16d8")
7908             (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7909             dsub_0)),
7910          dsub_0)>,
7911             Requires<[HasNEON]>;
7912  def _Z   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7913                   (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)),
7914         (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
7915           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
7916            (!cast<Instruction>("VREV16d8")
7917             (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7918             dsub_0)),
7919          dsub_0)>,
7920             Requires<[HasNEON]>;
7921  def _S   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
7922                   (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)),
7923         (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
7924           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
7925            (!cast<Instruction>("VREV16d8")
7926             (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
7927             dsub_0)),
7928          dsub_0)>,
7929             Requires<[HasNEON]>;
7930}
7931
7932defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16
7933defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32
7934defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64
7935
7936let Predicates = [HasNEON,IsLE] in {
7937  defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16
7938  defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32
7939
7940  // Double lengthening - v4i8 -> v4i16 -> v4i32
7941  defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">;
7942  // v2i8 -> v2i16 -> v2i32
7943  defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">;
7944  // v2i16 -> v2i32 -> v2i64
7945  defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">;
7946}
7947
7948let Predicates = [HasNEON,IsBE] in {
7949  defm : Lengthen_HalfSingle_Big_Endian<"4", "i16", "i8", "8", "i16", "8">; // v4i8 -> v4i16
7950  defm : Lengthen_HalfSingle_Big_Endian<"2", "i32", "i16", "4", "i32", "16">; // v2i16 -> v2i32
7951
7952  // Double lengthening - v4i8 -> v4i16 -> v4i32
7953  defm : Lengthen_Double_Big_Endian<"4", "i32", "i8", "8", "i16", "4", "i32", "8">;
7954  // v2i8 -> v2i16 -> v2i32
7955  defm : Lengthen_HalfDouble_Big_Endian<"2", "i32", "i8", "8", "i16", "4", "i32">;
7956  // v2i16 -> v2i32 -> v2i64
7957  defm : Lengthen_Double_Big_Endian<"2", "i64", "i16", "4", "i32", "2", "i64", "16">;
7958}
7959
7960// Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64
7961let Predicates = [HasNEON,IsLE] in {
7962  def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)),
7963        (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
7964           (VLD1LNd16 addrmode6:$addr,
7965                      (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
7966  def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)),
7967        (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
7968           (VLD1LNd16 addrmode6:$addr,
7969                      (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
7970  def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)),
7971        (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
7972           (VLD1LNd16 addrmode6:$addr,
7973                      (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
7974}
7975// The following patterns are basically a copy of the patterns above,
7976// however with an additional VREV16d instruction to convert data
7977// loaded by VLD1LN into proper vector format in big endian mode.
7978let Predicates = [HasNEON,IsBE] in {
7979  def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)),
7980        (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
7981           (!cast<Instruction>("VREV16d8")
7982             (VLD1LNd16 addrmode6:$addr,
7983                        (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
7984  def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)),
7985        (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
7986           (!cast<Instruction>("VREV16d8")
7987             (VLD1LNd16 addrmode6:$addr,
7988                        (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
7989  def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)),
7990        (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
7991           (!cast<Instruction>("VREV16d8")
7992             (VLD1LNd16 addrmode6:$addr,
7993                        (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
7994}
7995
7996let Predicates = [HasNEON] in {
7997def : Pat<(v2i64 (concat_vectors DPR:$Dn, DPR:$Dm)),
7998          (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
7999def : Pat<(v4i32 (concat_vectors DPR:$Dn, DPR:$Dm)),
8000          (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
8001def : Pat<(v8i16 (concat_vectors DPR:$Dn, DPR:$Dm)),
8002          (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
8003def : Pat<(v16i8 (concat_vectors DPR:$Dn, DPR:$Dm)),
8004          (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
8005def : Pat<(v4f32 (concat_vectors DPR:$Dn, DPR:$Dm)),
8006          (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
8007def : Pat<(v8f16 (concat_vectors DPR:$Dn, DPR:$Dm)),
8008          (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
8009def : Pat<(v8bf16 (concat_vectors DPR:$Dn, DPR:$Dm)),
8010          (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
8011}
8012
8013//===----------------------------------------------------------------------===//
8014// Assembler aliases
8015//
8016
8017def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn",
8018                    (VSETLNi32 DPR:$Dd, GPR:$Rn, 1, pred:$p)>;
8019def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn",
8020                    (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>;
8021
8022// VAND/VBIC/VEOR/VORR accept but do not require a type suffix.
8023defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm",
8024                         (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
8025defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm",
8026                         (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
8027defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm",
8028                         (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
8029defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm",
8030                         (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
8031defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm",
8032                         (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
8033defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm",
8034                         (VEORq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
8035defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
8036                         (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
8037defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
8038                         (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
8039// ... two-operand aliases
8040defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
8041                         (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
8042defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
8043                         (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
8044defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm",
8045                         (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
8046defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm",
8047                         (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
8048defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
8049                         (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
8050defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
8051                         (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
8052// ... immediates
8053def : NEONInstAlias<"vand${p}.i16 $Vd, $imm",
8054                    (VBICiv4i16 DPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>;
8055def : NEONInstAlias<"vand${p}.i32 $Vd, $imm",
8056                    (VBICiv2i32 DPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>;
8057def : NEONInstAlias<"vand${p}.i16 $Vd, $imm",
8058                    (VBICiv8i16 QPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>;
8059def : NEONInstAlias<"vand${p}.i32 $Vd, $imm",
8060                    (VBICiv4i32 QPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>;
8061
8062
8063// VLD1 single-lane pseudo-instructions. These need special handling for
8064// the lane index that an InstAlias can't handle, so we use these instead.
8065def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr",
8066                 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
8067                      pred:$p)>;
8068def VLD1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr",
8069                 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
8070                      pred:$p)>;
8071def VLD1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr",
8072                 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
8073                      pred:$p)>;
8074
8075def VLD1LNdWB_fixed_Asm_8 :
8076        NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr!",
8077                 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
8078                      pred:$p)>;
8079def VLD1LNdWB_fixed_Asm_16 :
8080        NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr!",
8081                 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
8082                      pred:$p)>;
8083def VLD1LNdWB_fixed_Asm_32 :
8084        NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr!",
8085                 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
8086                      pred:$p)>;
8087def VLD1LNdWB_register_Asm_8 :
8088        NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr, $Rm",
8089                  (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
8090                       rGPR:$Rm, pred:$p)>;
8091def VLD1LNdWB_register_Asm_16 :
8092        NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr, $Rm",
8093                  (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
8094                       rGPR:$Rm, pred:$p)>;
8095def VLD1LNdWB_register_Asm_32 :
8096        NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr, $Rm",
8097                  (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
8098                       rGPR:$Rm, pred:$p)>;
8099
8100
8101// VST1 single-lane pseudo-instructions. These need special handling for
8102// the lane index that an InstAlias can't handle, so we use these instead.
8103def VST1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr",
8104                 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
8105                      pred:$p)>;
8106def VST1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr",
8107                 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
8108                      pred:$p)>;
8109def VST1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr",
8110                 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
8111                      pred:$p)>;
8112
8113def VST1LNdWB_fixed_Asm_8 :
8114        NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr!",
8115                 (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
8116                      pred:$p)>;
8117def VST1LNdWB_fixed_Asm_16 :
8118        NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr!",
8119                 (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
8120                      pred:$p)>;
8121def VST1LNdWB_fixed_Asm_32 :
8122        NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr!",
8123                 (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
8124                      pred:$p)>;
8125def VST1LNdWB_register_Asm_8 :
8126        NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr, $Rm",
8127                  (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
8128                       rGPR:$Rm, pred:$p)>;
8129def VST1LNdWB_register_Asm_16 :
8130        NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr, $Rm",
8131                  (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
8132                       rGPR:$Rm, pred:$p)>;
8133def VST1LNdWB_register_Asm_32 :
8134        NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr, $Rm",
8135                  (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
8136                       rGPR:$Rm, pred:$p)>;
8137
8138// VLD2 single-lane pseudo-instructions. These need special handling for
8139// the lane index that an InstAlias can't handle, so we use these instead.
8140def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr",
8141                 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
8142                  pred:$p)>;
8143def VLD2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr",
8144                 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
8145                      pred:$p)>;
8146def VLD2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr",
8147                 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, pred:$p)>;
8148def VLD2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr",
8149                 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
8150                      pred:$p)>;
8151def VLD2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr",
8152                 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
8153                      pred:$p)>;
8154
8155def VLD2LNdWB_fixed_Asm_8 :
8156        NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr!",
8157                 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
8158                      pred:$p)>;
8159def VLD2LNdWB_fixed_Asm_16 :
8160        NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!",
8161                 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
8162                      pred:$p)>;
8163def VLD2LNdWB_fixed_Asm_32 :
8164        NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!",
8165                 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
8166                      pred:$p)>;
8167def VLD2LNqWB_fixed_Asm_16 :
8168        NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!",
8169                 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
8170                      pred:$p)>;
8171def VLD2LNqWB_fixed_Asm_32 :
8172        NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!",
8173                 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
8174                      pred:$p)>;
8175def VLD2LNdWB_register_Asm_8 :
8176        NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr, $Rm",
8177                  (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
8178                       rGPR:$Rm, pred:$p)>;
8179def VLD2LNdWB_register_Asm_16 :
8180        NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm",
8181                  (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
8182                       rGPR:$Rm, pred:$p)>;
8183def VLD2LNdWB_register_Asm_32 :
8184        NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm",
8185                  (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
8186                       rGPR:$Rm, pred:$p)>;
8187def VLD2LNqWB_register_Asm_16 :
8188        NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm",
8189                  (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
8190                       rGPR:$Rm, pred:$p)>;
8191def VLD2LNqWB_register_Asm_32 :
8192        NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm",
8193                  (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
8194                       rGPR:$Rm, pred:$p)>;
8195
8196
8197// VST2 single-lane pseudo-instructions. These need special handling for
8198// the lane index that an InstAlias can't handle, so we use these instead.
8199def VST2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr",
8200                 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
8201                      pred:$p)>;
8202def VST2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr",
8203                 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
8204                      pred:$p)>;
8205def VST2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr",
8206                 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
8207                      pred:$p)>;
8208def VST2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr",
8209                 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
8210                      pred:$p)>;
8211def VST2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr",
8212                 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
8213                      pred:$p)>;
8214
8215def VST2LNdWB_fixed_Asm_8 :
8216        NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr!",
8217                 (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
8218                      pred:$p)>;
8219def VST2LNdWB_fixed_Asm_16 :
8220        NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!",
8221                 (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
8222                      pred:$p)>;
8223def VST2LNdWB_fixed_Asm_32 :
8224        NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!",
8225                 (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
8226                      pred:$p)>;
8227def VST2LNqWB_fixed_Asm_16 :
8228        NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!",
8229                 (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
8230                      pred:$p)>;
8231def VST2LNqWB_fixed_Asm_32 :
8232        NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!",
8233                 (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
8234                      pred:$p)>;
8235def VST2LNdWB_register_Asm_8 :
8236        NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr, $Rm",
8237                  (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
8238                       rGPR:$Rm, pred:$p)>;
8239def VST2LNdWB_register_Asm_16 :
8240        NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm",
8241                  (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
8242                       rGPR:$Rm, pred:$p)>;
8243def VST2LNdWB_register_Asm_32 :
8244        NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm",
8245                  (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
8246                       rGPR:$Rm, pred:$p)>;
8247def VST2LNqWB_register_Asm_16 :
8248        NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm",
8249                  (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
8250                       rGPR:$Rm, pred:$p)>;
8251def VST2LNqWB_register_Asm_32 :
8252        NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm",
8253                  (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
8254                       rGPR:$Rm, pred:$p)>;
8255
8256// VLD3 all-lanes pseudo-instructions. These need special handling for
8257// the lane index that an InstAlias can't handle, so we use these instead.
8258def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
8259               (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
8260                    pred:$p)>;
8261def VLD3DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
8262               (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
8263                    pred:$p)>;
8264def VLD3DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
8265               (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
8266                    pred:$p)>;
8267def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
8268               (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
8269                    pred:$p)>;
8270def VLD3DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
8271               (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
8272                    pred:$p)>;
8273def VLD3DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
8274               (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
8275                    pred:$p)>;
8276
8277def VLD3DUPdWB_fixed_Asm_8 :
8278        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
8279               (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
8280                    pred:$p)>;
8281def VLD3DUPdWB_fixed_Asm_16 :
8282        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
8283               (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
8284                    pred:$p)>;
8285def VLD3DUPdWB_fixed_Asm_32 :
8286        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
8287               (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
8288                    pred:$p)>;
8289def VLD3DUPqWB_fixed_Asm_8 :
8290        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
8291               (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
8292                    pred:$p)>;
8293def VLD3DUPqWB_fixed_Asm_16 :
8294        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
8295               (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
8296                    pred:$p)>;
8297def VLD3DUPqWB_fixed_Asm_32 :
8298        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
8299               (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
8300                    pred:$p)>;
8301def VLD3DUPdWB_register_Asm_8 :
8302        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
8303                  (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
8304                       rGPR:$Rm, pred:$p)>;
8305def VLD3DUPdWB_register_Asm_16 :
8306        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
8307                  (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
8308                       rGPR:$Rm, pred:$p)>;
8309def VLD3DUPdWB_register_Asm_32 :
8310        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
8311                  (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
8312                       rGPR:$Rm, pred:$p)>;
8313def VLD3DUPqWB_register_Asm_8 :
8314        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
8315                  (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
8316                       rGPR:$Rm, pred:$p)>;
8317def VLD3DUPqWB_register_Asm_16 :
8318        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
8319                  (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
8320                       rGPR:$Rm, pred:$p)>;
8321def VLD3DUPqWB_register_Asm_32 :
8322        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
8323                  (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
8324                       rGPR:$Rm, pred:$p)>;
8325
8326
8327// VLD3 single-lane pseudo-instructions. These need special handling for
8328// the lane index that an InstAlias can't handle, so we use these instead.
8329def VLD3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
8330               (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
8331                    pred:$p)>;
8332def VLD3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
8333               (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
8334                    pred:$p)>;
8335def VLD3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
8336               (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
8337                    pred:$p)>;
8338def VLD3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
8339               (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
8340                    pred:$p)>;
8341def VLD3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
8342               (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
8343                    pred:$p)>;
8344
8345def VLD3LNdWB_fixed_Asm_8 :
8346        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
8347               (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
8348                    pred:$p)>;
8349def VLD3LNdWB_fixed_Asm_16 :
8350        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
8351               (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
8352                    pred:$p)>;
8353def VLD3LNdWB_fixed_Asm_32 :
8354        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
8355               (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
8356                    pred:$p)>;
8357def VLD3LNqWB_fixed_Asm_16 :
8358        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
8359               (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
8360                    pred:$p)>;
8361def VLD3LNqWB_fixed_Asm_32 :
8362        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
8363               (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
8364                    pred:$p)>;
8365def VLD3LNdWB_register_Asm_8 :
8366        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
8367                  (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
8368                       rGPR:$Rm, pred:$p)>;
8369def VLD3LNdWB_register_Asm_16 :
8370        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
8371                  (ins VecListThreeDHWordIndexed:$list,
8372                       addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
8373def VLD3LNdWB_register_Asm_32 :
8374        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
8375                  (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
8376                       rGPR:$Rm, pred:$p)>;
8377def VLD3LNqWB_register_Asm_16 :
8378        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
8379                  (ins VecListThreeQHWordIndexed:$list,
8380                       addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
8381def VLD3LNqWB_register_Asm_32 :
8382        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
8383                  (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
8384                       rGPR:$Rm, pred:$p)>;
8385
8386// VLD3 multiple structure pseudo-instructions. These need special handling for
8387// the vector operands that the normal instructions don't yet model.
8388// FIXME: Remove these when the register classes and instructions are updated.
8389def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
8390               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8391def VLD3dAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
8392               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8393def VLD3dAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
8394               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8395def VLD3qAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
8396               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8397def VLD3qAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
8398               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8399def VLD3qAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
8400               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8401
8402def VLD3dWB_fixed_Asm_8 :
8403        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
8404               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8405def VLD3dWB_fixed_Asm_16 :
8406        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
8407               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8408def VLD3dWB_fixed_Asm_32 :
8409        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
8410               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8411def VLD3qWB_fixed_Asm_8 :
8412        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
8413               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8414def VLD3qWB_fixed_Asm_16 :
8415        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
8416               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8417def VLD3qWB_fixed_Asm_32 :
8418        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
8419               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8420def VLD3dWB_register_Asm_8 :
8421        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
8422                  (ins VecListThreeD:$list, addrmode6align64:$addr,
8423                       rGPR:$Rm, pred:$p)>;
8424def VLD3dWB_register_Asm_16 :
8425        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
8426                  (ins VecListThreeD:$list, addrmode6align64:$addr,
8427                       rGPR:$Rm, pred:$p)>;
8428def VLD3dWB_register_Asm_32 :
8429        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
8430                  (ins VecListThreeD:$list, addrmode6align64:$addr,
8431                       rGPR:$Rm, pred:$p)>;
8432def VLD3qWB_register_Asm_8 :
8433        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
8434                  (ins VecListThreeQ:$list, addrmode6align64:$addr,
8435                       rGPR:$Rm, pred:$p)>;
8436def VLD3qWB_register_Asm_16 :
8437        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
8438                  (ins VecListThreeQ:$list, addrmode6align64:$addr,
8439                       rGPR:$Rm, pred:$p)>;
8440def VLD3qWB_register_Asm_32 :
8441        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
8442                  (ins VecListThreeQ:$list, addrmode6align64:$addr,
8443                       rGPR:$Rm, pred:$p)>;
8444
8445// VST3 single-lane pseudo-instructions. These need special handling for
8446// the lane index that an InstAlias can't handle, so we use these instead.
8447def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
8448               (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
8449                    pred:$p)>;
8450def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
8451               (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
8452                    pred:$p)>;
8453def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
8454               (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
8455                    pred:$p)>;
8456def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
8457               (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
8458                    pred:$p)>;
8459def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
8460               (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
8461                    pred:$p)>;
8462
8463def VST3LNdWB_fixed_Asm_8 :
8464        NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
8465               (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
8466                    pred:$p)>;
8467def VST3LNdWB_fixed_Asm_16 :
8468        NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
8469               (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
8470                    pred:$p)>;
8471def VST3LNdWB_fixed_Asm_32 :
8472        NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
8473               (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
8474                    pred:$p)>;
8475def VST3LNqWB_fixed_Asm_16 :
8476        NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
8477               (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
8478                    pred:$p)>;
8479def VST3LNqWB_fixed_Asm_32 :
8480        NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
8481               (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
8482                    pred:$p)>;
8483def VST3LNdWB_register_Asm_8 :
8484        NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
8485                  (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
8486                       rGPR:$Rm, pred:$p)>;
8487def VST3LNdWB_register_Asm_16 :
8488        NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
8489                  (ins VecListThreeDHWordIndexed:$list,
8490                       addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
8491def VST3LNdWB_register_Asm_32 :
8492        NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
8493                  (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
8494                       rGPR:$Rm, pred:$p)>;
8495def VST3LNqWB_register_Asm_16 :
8496        NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
8497                  (ins VecListThreeQHWordIndexed:$list,
8498                       addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
8499def VST3LNqWB_register_Asm_32 :
8500        NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
8501                  (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
8502                       rGPR:$Rm, pred:$p)>;
8503
8504
8505// VST3 multiple structure pseudo-instructions. These need special handling for
8506// the vector operands that the normal instructions don't yet model.
8507// FIXME: Remove these when the register classes and instructions are updated.
8508def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
8509               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8510def VST3dAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
8511               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8512def VST3dAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
8513               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8514def VST3qAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
8515               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8516def VST3qAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
8517               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8518def VST3qAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
8519               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8520
8521def VST3dWB_fixed_Asm_8 :
8522        NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
8523               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8524def VST3dWB_fixed_Asm_16 :
8525        NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
8526               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8527def VST3dWB_fixed_Asm_32 :
8528        NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
8529               (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
8530def VST3qWB_fixed_Asm_8 :
8531        NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
8532               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8533def VST3qWB_fixed_Asm_16 :
8534        NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
8535               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8536def VST3qWB_fixed_Asm_32 :
8537        NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
8538               (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
8539def VST3dWB_register_Asm_8 :
8540        NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
8541                  (ins VecListThreeD:$list, addrmode6align64:$addr,
8542                       rGPR:$Rm, pred:$p)>;
8543def VST3dWB_register_Asm_16 :
8544        NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
8545                  (ins VecListThreeD:$list, addrmode6align64:$addr,
8546                       rGPR:$Rm, pred:$p)>;
8547def VST3dWB_register_Asm_32 :
8548        NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
8549                  (ins VecListThreeD:$list, addrmode6align64:$addr,
8550                       rGPR:$Rm, pred:$p)>;
8551def VST3qWB_register_Asm_8 :
8552        NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
8553                  (ins VecListThreeQ:$list, addrmode6align64:$addr,
8554                       rGPR:$Rm, pred:$p)>;
8555def VST3qWB_register_Asm_16 :
8556        NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
8557                  (ins VecListThreeQ:$list, addrmode6align64:$addr,
8558                       rGPR:$Rm, pred:$p)>;
8559def VST3qWB_register_Asm_32 :
8560        NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
8561                  (ins VecListThreeQ:$list, addrmode6align64:$addr,
8562                       rGPR:$Rm, pred:$p)>;
8563
8564// VLD4 all-lanes pseudo-instructions. These need special handling for
8565// the lane index that an InstAlias can't handle, so we use these instead.
8566def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
8567               (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr,
8568                    pred:$p)>;
8569def VLD4DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
8570               (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr,
8571                    pred:$p)>;
8572def VLD4DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
8573               (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr,
8574                    pred:$p)>;
8575def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
8576               (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr,
8577                    pred:$p)>;
8578def VLD4DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
8579               (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr,
8580                    pred:$p)>;
8581def VLD4DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
8582               (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr,
8583                    pred:$p)>;
8584
8585def VLD4DUPdWB_fixed_Asm_8 :
8586        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
8587               (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr,
8588                    pred:$p)>;
8589def VLD4DUPdWB_fixed_Asm_16 :
8590        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
8591               (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr,
8592                    pred:$p)>;
8593def VLD4DUPdWB_fixed_Asm_32 :
8594        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
8595               (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr,
8596                    pred:$p)>;
8597def VLD4DUPqWB_fixed_Asm_8 :
8598        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
8599               (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr,
8600                    pred:$p)>;
8601def VLD4DUPqWB_fixed_Asm_16 :
8602        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
8603               (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr,
8604                    pred:$p)>;
8605def VLD4DUPqWB_fixed_Asm_32 :
8606        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
8607               (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr,
8608                    pred:$p)>;
8609def VLD4DUPdWB_register_Asm_8 :
8610        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
8611                  (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr,
8612                       rGPR:$Rm, pred:$p)>;
8613def VLD4DUPdWB_register_Asm_16 :
8614        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
8615                  (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr,
8616                       rGPR:$Rm, pred:$p)>;
8617def VLD4DUPdWB_register_Asm_32 :
8618        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
8619                  (ins VecListFourDAllLanes:$list,
8620                       addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>;
8621def VLD4DUPqWB_register_Asm_8 :
8622        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
8623                  (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr,
8624                       rGPR:$Rm, pred:$p)>;
8625def VLD4DUPqWB_register_Asm_16 :
8626        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
8627                  (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr,
8628                       rGPR:$Rm, pred:$p)>;
8629def VLD4DUPqWB_register_Asm_32 :
8630        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
8631                  (ins VecListFourQAllLanes:$list,
8632                       addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>;
8633
8634
8635// VLD4 single-lane pseudo-instructions. These need special handling for
8636// the lane index that an InstAlias can't handle, so we use these instead.
8637def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
8638               (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
8639                    pred:$p)>;
8640def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
8641               (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
8642                    pred:$p)>;
8643def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
8644               (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
8645                    pred:$p)>;
8646def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
8647               (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
8648                    pred:$p)>;
8649def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
8650               (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
8651                    pred:$p)>;
8652
8653def VLD4LNdWB_fixed_Asm_8 :
8654        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
8655               (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
8656                    pred:$p)>;
8657def VLD4LNdWB_fixed_Asm_16 :
8658        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
8659               (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
8660                    pred:$p)>;
8661def VLD4LNdWB_fixed_Asm_32 :
8662        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
8663               (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
8664                    pred:$p)>;
8665def VLD4LNqWB_fixed_Asm_16 :
8666        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
8667               (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
8668                    pred:$p)>;
8669def VLD4LNqWB_fixed_Asm_32 :
8670        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
8671               (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
8672                    pred:$p)>;
8673def VLD4LNdWB_register_Asm_8 :
8674        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
8675                  (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
8676                       rGPR:$Rm, pred:$p)>;
8677def VLD4LNdWB_register_Asm_16 :
8678        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
8679                  (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
8680                       rGPR:$Rm, pred:$p)>;
8681def VLD4LNdWB_register_Asm_32 :
8682        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
8683                  (ins VecListFourDWordIndexed:$list,
8684                       addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
8685def VLD4LNqWB_register_Asm_16 :
8686        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
8687                  (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
8688                       rGPR:$Rm, pred:$p)>;
8689def VLD4LNqWB_register_Asm_32 :
8690        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
8691                  (ins VecListFourQWordIndexed:$list,
8692                       addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
8693
8694
8695
8696// VLD4 multiple structure pseudo-instructions. These need special handling for
8697// the vector operands that the normal instructions don't yet model.
8698// FIXME: Remove these when the register classes and instructions are updated.
8699def VLD4dAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
8700               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8701                pred:$p)>;
8702def VLD4dAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
8703               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8704                pred:$p)>;
8705def VLD4dAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
8706               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8707                pred:$p)>;
8708def VLD4qAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
8709               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8710                pred:$p)>;
8711def VLD4qAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
8712               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8713                pred:$p)>;
8714def VLD4qAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
8715               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8716                pred:$p)>;
8717
8718def VLD4dWB_fixed_Asm_8 :
8719        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
8720               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8721                pred:$p)>;
8722def VLD4dWB_fixed_Asm_16 :
8723        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
8724               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8725                pred:$p)>;
8726def VLD4dWB_fixed_Asm_32 :
8727        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
8728               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8729                pred:$p)>;
8730def VLD4qWB_fixed_Asm_8 :
8731        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
8732               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8733                pred:$p)>;
8734def VLD4qWB_fixed_Asm_16 :
8735        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
8736               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8737                pred:$p)>;
8738def VLD4qWB_fixed_Asm_32 :
8739        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
8740               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8741                pred:$p)>;
8742def VLD4dWB_register_Asm_8 :
8743        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
8744                  (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8745                       rGPR:$Rm, pred:$p)>;
8746def VLD4dWB_register_Asm_16 :
8747        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
8748                  (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8749                       rGPR:$Rm, pred:$p)>;
8750def VLD4dWB_register_Asm_32 :
8751        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
8752                  (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8753                       rGPR:$Rm, pred:$p)>;
8754def VLD4qWB_register_Asm_8 :
8755        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
8756                  (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8757                       rGPR:$Rm, pred:$p)>;
8758def VLD4qWB_register_Asm_16 :
8759        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
8760                  (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8761                       rGPR:$Rm, pred:$p)>;
8762def VLD4qWB_register_Asm_32 :
8763        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
8764                  (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8765                       rGPR:$Rm, pred:$p)>;
8766
8767// VST4 single-lane pseudo-instructions. These need special handling for
8768// the lane index that an InstAlias can't handle, so we use these instead.
8769def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
8770               (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
8771                    pred:$p)>;
8772def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
8773               (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
8774                    pred:$p)>;
8775def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
8776               (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
8777                    pred:$p)>;
8778def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
8779               (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
8780                    pred:$p)>;
8781def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
8782               (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
8783                    pred:$p)>;
8784
8785def VST4LNdWB_fixed_Asm_8 :
8786        NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
8787               (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
8788                    pred:$p)>;
8789def VST4LNdWB_fixed_Asm_16 :
8790        NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
8791               (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
8792                    pred:$p)>;
8793def VST4LNdWB_fixed_Asm_32 :
8794        NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
8795               (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
8796                    pred:$p)>;
8797def VST4LNqWB_fixed_Asm_16 :
8798        NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
8799               (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
8800                    pred:$p)>;
8801def VST4LNqWB_fixed_Asm_32 :
8802        NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
8803               (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
8804                    pred:$p)>;
8805def VST4LNdWB_register_Asm_8 :
8806        NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
8807                  (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
8808                       rGPR:$Rm, pred:$p)>;
8809def VST4LNdWB_register_Asm_16 :
8810        NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
8811                  (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
8812                       rGPR:$Rm, pred:$p)>;
8813def VST4LNdWB_register_Asm_32 :
8814        NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
8815                  (ins VecListFourDWordIndexed:$list,
8816                       addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
8817def VST4LNqWB_register_Asm_16 :
8818        NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
8819                  (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
8820                       rGPR:$Rm, pred:$p)>;
8821def VST4LNqWB_register_Asm_32 :
8822        NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
8823                  (ins VecListFourQWordIndexed:$list,
8824                       addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
8825
8826
8827// VST4 multiple structure pseudo-instructions. These need special handling for
8828// the vector operands that the normal instructions don't yet model.
8829// FIXME: Remove these when the register classes and instructions are updated.
8830def VST4dAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
8831               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8832                    pred:$p)>;
8833def VST4dAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
8834               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8835                    pred:$p)>;
8836def VST4dAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
8837               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8838                    pred:$p)>;
8839def VST4qAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
8840               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8841                    pred:$p)>;
8842def VST4qAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
8843               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8844                    pred:$p)>;
8845def VST4qAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
8846               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8847                    pred:$p)>;
8848
8849def VST4dWB_fixed_Asm_8 :
8850        NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
8851               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8852                    pred:$p)>;
8853def VST4dWB_fixed_Asm_16 :
8854        NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
8855               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8856                    pred:$p)>;
8857def VST4dWB_fixed_Asm_32 :
8858        NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
8859               (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8860                    pred:$p)>;
8861def VST4qWB_fixed_Asm_8 :
8862        NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
8863               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8864                    pred:$p)>;
8865def VST4qWB_fixed_Asm_16 :
8866        NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
8867               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8868                    pred:$p)>;
8869def VST4qWB_fixed_Asm_32 :
8870        NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
8871               (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8872                    pred:$p)>;
8873def VST4dWB_register_Asm_8 :
8874        NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
8875                  (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8876                       rGPR:$Rm, pred:$p)>;
8877def VST4dWB_register_Asm_16 :
8878        NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
8879                  (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8880                       rGPR:$Rm, pred:$p)>;
8881def VST4dWB_register_Asm_32 :
8882        NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
8883                  (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
8884                       rGPR:$Rm, pred:$p)>;
8885def VST4qWB_register_Asm_8 :
8886        NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
8887                  (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8888                       rGPR:$Rm, pred:$p)>;
8889def VST4qWB_register_Asm_16 :
8890        NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
8891                  (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8892                       rGPR:$Rm, pred:$p)>;
8893def VST4qWB_register_Asm_32 :
8894        NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
8895                  (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
8896                       rGPR:$Rm, pred:$p)>;
8897
8898// VMOV/VMVN takes an optional datatype suffix
8899defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
8900                         (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
8901defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
8902                         (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
8903
8904defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm",
8905                         (VMVNd DPR:$Vd, DPR:$Vm, pred:$p)>;
8906defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm",
8907                         (VMVNq QPR:$Vd, QPR:$Vm, pred:$p)>;
8908
8909// VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
8910// D-register versions.
8911def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm",
8912                    (VCGEsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8913def : NEONInstAlias<"vcle${p}.s16 $Dd, $Dn, $Dm",
8914                    (VCGEsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8915def : NEONInstAlias<"vcle${p}.s32 $Dd, $Dn, $Dm",
8916                    (VCGEsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8917def : NEONInstAlias<"vcle${p}.u8 $Dd, $Dn, $Dm",
8918                    (VCGEuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8919def : NEONInstAlias<"vcle${p}.u16 $Dd, $Dn, $Dm",
8920                    (VCGEuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8921def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm",
8922                    (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8923def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm",
8924                    (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8925let Predicates = [HasNEON, HasFullFP16] in
8926def : NEONInstAlias<"vcle${p}.f16 $Dd, $Dn, $Dm",
8927                    (VCGEhd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8928// Q-register versions.
8929def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm",
8930                    (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8931def : NEONInstAlias<"vcle${p}.s16 $Qd, $Qn, $Qm",
8932                    (VCGEsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8933def : NEONInstAlias<"vcle${p}.s32 $Qd, $Qn, $Qm",
8934                    (VCGEsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8935def : NEONInstAlias<"vcle${p}.u8 $Qd, $Qn, $Qm",
8936                    (VCGEuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8937def : NEONInstAlias<"vcle${p}.u16 $Qd, $Qn, $Qm",
8938                    (VCGEuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8939def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm",
8940                    (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8941def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm",
8942                    (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8943let Predicates = [HasNEON, HasFullFP16] in
8944def : NEONInstAlias<"vcle${p}.f16 $Qd, $Qn, $Qm",
8945                    (VCGEhq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8946
8947// VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
8948// D-register versions.
8949def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm",
8950                    (VCGTsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8951def : NEONInstAlias<"vclt${p}.s16 $Dd, $Dn, $Dm",
8952                    (VCGTsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8953def : NEONInstAlias<"vclt${p}.s32 $Dd, $Dn, $Dm",
8954                    (VCGTsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8955def : NEONInstAlias<"vclt${p}.u8 $Dd, $Dn, $Dm",
8956                    (VCGTuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8957def : NEONInstAlias<"vclt${p}.u16 $Dd, $Dn, $Dm",
8958                    (VCGTuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8959def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm",
8960                    (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8961def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm",
8962                    (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8963let Predicates = [HasNEON, HasFullFP16] in
8964def : NEONInstAlias<"vclt${p}.f16 $Dd, $Dn, $Dm",
8965                    (VCGThd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
8966// Q-register versions.
8967def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm",
8968                    (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8969def : NEONInstAlias<"vclt${p}.s16 $Qd, $Qn, $Qm",
8970                    (VCGTsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8971def : NEONInstAlias<"vclt${p}.s32 $Qd, $Qn, $Qm",
8972                    (VCGTsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8973def : NEONInstAlias<"vclt${p}.u8 $Qd, $Qn, $Qm",
8974                    (VCGTuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8975def : NEONInstAlias<"vclt${p}.u16 $Qd, $Qn, $Qm",
8976                    (VCGTuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8977def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm",
8978                    (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8979def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm",
8980                    (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8981let Predicates = [HasNEON, HasFullFP16] in
8982def : NEONInstAlias<"vclt${p}.f16 $Qd, $Qn, $Qm",
8983                    (VCGThq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
8984
8985// VSWP allows, but does not require, a type suffix.
8986defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm",
8987                         (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>;
8988defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm",
8989                         (VSWPq QPR:$Vd, QPR:$Vm, pred:$p)>;
8990
8991// VBIF, VBIT, and VBSL allow, but do not require, a type suffix.
8992defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm",
8993                         (VBIFd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
8994defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm",
8995                         (VBITd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
8996defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm",
8997                         (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
8998defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm",
8999                         (VBIFq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
9000defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm",
9001                         (VBITq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
9002defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm",
9003                         (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
9004
9005// "vmov Rd, #-imm" can be handled via "vmvn".
9006def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm",
9007                    (VMVNv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
9008def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm",
9009                    (VMVNv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
9010def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm",
9011                    (VMOVv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
9012def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm",
9013                    (VMOVv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
9014
9015// 'gas' compatibility aliases for quad-word instructions. Strictly speaking,
9016// these should restrict to just the Q register variants, but the register
9017// classes are enough to match correctly regardless, so we keep it simple
9018// and just use MnemonicAlias.
9019def : NEONMnemonicAlias<"vbicq", "vbic">;
9020def : NEONMnemonicAlias<"vandq", "vand">;
9021def : NEONMnemonicAlias<"veorq", "veor">;
9022def : NEONMnemonicAlias<"vorrq", "vorr">;
9023
9024def : NEONMnemonicAlias<"vmovq", "vmov">;
9025def : NEONMnemonicAlias<"vmvnq", "vmvn">;
9026// Explicit versions for floating point so that the FPImm variants get
9027// handled early. The parser gets confused otherwise.
9028def : NEONMnemonicAlias<"vmovq.f32", "vmov.f32">;
9029def : NEONMnemonicAlias<"vmovq.f64", "vmov.f64">;
9030
9031def : NEONMnemonicAlias<"vaddq", "vadd">;
9032def : NEONMnemonicAlias<"vsubq", "vsub">;
9033
9034def : NEONMnemonicAlias<"vminq", "vmin">;
9035def : NEONMnemonicAlias<"vmaxq", "vmax">;
9036
9037def : NEONMnemonicAlias<"vmulq", "vmul">;
9038
9039def : NEONMnemonicAlias<"vabsq", "vabs">;
9040
9041def : NEONMnemonicAlias<"vshlq", "vshl">;
9042def : NEONMnemonicAlias<"vshrq", "vshr">;
9043
9044def : NEONMnemonicAlias<"vcvtq", "vcvt">;
9045
9046def : NEONMnemonicAlias<"vcleq", "vcle">;
9047def : NEONMnemonicAlias<"vceqq", "vceq">;
9048
9049def : NEONMnemonicAlias<"vzipq", "vzip">;
9050def : NEONMnemonicAlias<"vswpq", "vswp">;
9051
9052def : NEONMnemonicAlias<"vrecpeq.f32", "vrecpe.f32">;
9053def : NEONMnemonicAlias<"vrecpeq.u32", "vrecpe.u32">;
9054
9055
9056// Alias for loading floating point immediates that aren't representable
9057// using the vmov.f32 encoding but the bitpattern is representable using
9058// the .i32 encoding.
9059def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm",
9060                     (VMOVv4i32 QPR:$Vd, nImmVMOVI32:$imm, pred:$p)>;
9061def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm",
9062                     (VMOVv2i32 DPR:$Vd, nImmVMOVI32:$imm, pred:$p)>;
9063
9064// ARMv8.6a BFloat16 instructions.
9065let Predicates = [HasBF16, HasNEON] in {
9066class BF16VDOT<bits<5> op27_23, bits<2> op21_20, bit op6,
9067               dag oops, dag iops, list<dag> pattern>
9068   : N3Vnp<op27_23, op21_20, 0b1101, op6, 0, oops, iops,
9069           N3RegFrm, IIC_VDOTPROD, "", "", pattern>
9070{
9071    let DecoderNamespace = "VFPV8";
9072}
9073
9074class BF16VDOTS<bit Q, RegisterClass RegTy, string opc, ValueType AccumTy, ValueType InputTy>
9075   : BF16VDOT<0b11000, 0b00,  Q, (outs RegTy:$dst),
9076              (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm),
9077            [(set (AccumTy RegTy:$dst),
9078                  (int_arm_neon_bfdot (AccumTy RegTy:$Vd),
9079                                      (InputTy RegTy:$Vn),
9080                                      (InputTy RegTy:$Vm)))]> {
9081  let Constraints = "$dst = $Vd";
9082  let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm");
9083    let DecoderNamespace = "VFPV8";
9084}
9085
9086multiclass BF16VDOTI<bit Q, RegisterClass RegTy, string opc, ValueType AccumTy,
9087                     ValueType InputTy, dag RHS> {
9088
9089  def "" : BF16VDOT<0b11100, 0b00, Q, (outs RegTy:$dst),
9090                    (ins RegTy:$Vd, RegTy:$Vn,
9091                    DPR_VFP2:$Vm, VectorIndex32:$lane), []> {
9092    bit lane;
9093    let Inst{5} = lane;
9094    let Constraints = "$dst = $Vd";
9095    let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm$lane");
9096    let DecoderNamespace = "VFPV8";
9097  }
9098
9099  def : Pat<
9100    (AccumTy (int_arm_neon_bfdot (AccumTy RegTy:$Vd),
9101                                 (InputTy RegTy:$Vn),
9102                                 (InputTy (bitconvert (AccumTy
9103                                          (ARMvduplane (AccumTy RegTy:$Vm),
9104                                                        VectorIndex32:$lane)))))),
9105    (!cast<Instruction>(NAME) RegTy:$Vd, RegTy:$Vn, RHS, VectorIndex32:$lane)>;
9106}
9107
9108def BF16VDOTS_VDOTD : BF16VDOTS<0, DPR, "vdot", v2f32, v4bf16>;
9109def BF16VDOTS_VDOTQ : BF16VDOTS<1, QPR, "vdot", v4f32, v8bf16>;
9110
9111defm BF16VDOTI_VDOTD : BF16VDOTI<0, DPR, "vdot", v2f32, v4bf16, (v2f32 DPR_VFP2:$Vm)>;
9112defm BF16VDOTI_VDOTQ : BF16VDOTI<1, QPR, "vdot", v4f32, v8bf16, (EXTRACT_SUBREG QPR:$Vm, dsub_0)>;
9113
9114class BF16MM<bit Q, RegisterClass RegTy,
9115             string opc>
9116   : N3Vnp<0b11000, 0b00, 0b1100, Q, 0,
9117           (outs RegTy:$dst), (ins RegTy:$Vd, RegTy:$Vn, RegTy:$Vm),
9118           N3RegFrm, IIC_VDOTPROD, "", "",
9119                [(set (v4f32 QPR:$dst), (int_arm_neon_bfmmla (v4f32 QPR:$Vd),
9120                                                (v8bf16 QPR:$Vn),
9121                                                (v8bf16 QPR:$Vm)))]> {
9122   let Constraints = "$dst = $Vd";
9123   let AsmString = !strconcat(opc, ".bf16", "\t$Vd, $Vn, $Vm");
9124   let DecoderNamespace = "VFPV8";
9125}
9126
9127def VMMLA : BF16MM<1, QPR, "vmmla">;
9128
9129class VBF16MALQ<bit T, string suffix, SDPatternOperator OpNode>
9130  : N3VCP8<0b00, 0b11, T, 1,
9131           (outs QPR:$dst), (ins QPR:$Vd, QPR:$Vn, QPR:$Vm),
9132           NoItinerary, "vfma" # suffix, "bf16", "$Vd, $Vn, $Vm", "",
9133                [(set (v4f32 QPR:$dst),
9134                      (OpNode (v4f32 QPR:$Vd),
9135                              (v8bf16 QPR:$Vn),
9136                              (v8bf16 QPR:$Vm)))]> {
9137  let Constraints = "$dst = $Vd";
9138  let DecoderNamespace = "VFPV8";
9139}
9140
9141def VBF16MALTQ: VBF16MALQ<1, "t", int_arm_neon_bfmlalt>;
9142def VBF16MALBQ: VBF16MALQ<0, "b", int_arm_neon_bfmlalb>;
9143
9144multiclass VBF16MALQI<bit T, string suffix, SDPatternOperator OpNode> {
9145  def "" : N3VLaneCP8<0, 0b11, T, 1, (outs QPR:$dst),
9146              (ins QPR:$Vd, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$idx),
9147               IIC_VMACD, "vfma" # suffix, "bf16", "$Vd, $Vn, $Vm$idx", "", []> {
9148  bits<2> idx;
9149  let Inst{5} = idx{1};
9150  let Inst{3} = idx{0};
9151  let Constraints = "$dst = $Vd";
9152  let DecoderNamespace = "VFPV8";
9153  }
9154
9155  def : Pat<
9156    (v4f32 (OpNode (v4f32 QPR:$Vd),
9157                   (v8bf16 QPR:$Vn),
9158                   (v8bf16 (ARMvduplane (v8bf16 QPR:$Vm),
9159                            VectorIndex16:$lane)))),
9160    (!cast<Instruction>(NAME) QPR:$Vd,
9161                              QPR:$Vn,
9162                              (EXTRACT_SUBREG QPR:$Vm,
9163                                (DSubReg_i16_reg VectorIndex16:$lane)),
9164                              (SubReg_i16_lane VectorIndex16:$lane))>;
9165}
9166
9167defm VBF16MALTQI: VBF16MALQI<1, "t", int_arm_neon_bfmlalt>;
9168defm VBF16MALBQI: VBF16MALQI<0, "b", int_arm_neon_bfmlalb>;
9169
9170def BF16_VCVT :  N2V<0b11, 0b11, 0b01, 0b10, 0b01100, 1, 0,
9171                    (outs DPR:$Vd), (ins QPR:$Vm),
9172                    NoItinerary, "vcvt", "bf16.f32", "$Vd, $Vm", "", []>;
9173}
9174// End of BFloat16 instructions
9175