1//=- AArch64SchedA64FX.td - Fujitsu A64FX Scheduling Defs -*- tablegen -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the scheduling model for the Fujitsu A64FX processors.
10//
11//===----------------------------------------------------------------------===//
12
13def A64FXModel : SchedMachineModel {
14  let IssueWidth            =   6; // 6 micro-ops dispatched at a time.
15  let MicroOpBufferSize     = 180; // 180 entries in micro-op re-order buffer.
16  let LoadLatency           =   5; // Optimistic load latency.
17  let MispredictPenalty     =  12; // Extra cycles for mispredicted branch.
18  // Determined via a mix of micro-arch details and experimentation.
19  let LoopMicroOpBufferSize = 128;
20  let PostRAScheduler       =   1; // Using PostRA sched.
21  let CompleteModel         =   1;
22
23  list<Predicate> UnsupportedFeatures =
24    [HasSVE2, HasSVE2AES, HasSVE2SM4, HasSVE2SHA3, HasSVE2BitPerm, HasPAuth,
25     HasSVE2orSME, HasMTE, HasMatMulInt8, HasBF16, HasSME2, HasSME2p1, HasSVE2p1,
26     HasSVE2p1_or_HasSME2p1, HasSMEF16F16, HasSSVE_FP8FMA, HasSMEF8F16, HasSMEF8F32,
27     HasSMEFA64];
28
29  let FullInstRWOverlapCheck = 0;
30}
31
32let SchedModel = A64FXModel in {
33
34// Define the issue ports.
35
36// A64FXIP*
37
38// Port 0
39def A64FXIPFLA : ProcResource<1>;
40
41// Port 1
42def A64FXIPPR : ProcResource<1>;
43
44// Port 2
45def A64FXIPEXA : ProcResource<1>;
46
47// Port 3
48def A64FXIPFLB : ProcResource<1>;
49
50// Port 4
51def A64FXIPEXB : ProcResource<1>;
52
53// Port 5
54def A64FXIPEAGA : ProcResource<1>;
55
56// Port 6
57def A64FXIPEAGB : ProcResource<1>;
58
59// Port 7
60def A64FXIPBR : ProcResource<1>;
61
62// Define groups for the functional units on each issue port.  Each group
63// created will be used by a WriteRes later on.
64
65def A64FXGI7 : ProcResGroup<[A64FXIPBR]>;
66
67def A64FXGI0 : ProcResGroup<[A64FXIPFLA]>;
68
69def A64FXGI1 : ProcResGroup<[A64FXIPPR]>;
70
71def A64FXGI2 : ProcResGroup<[A64FXIPEXA]>;
72
73def A64FXGI3 : ProcResGroup<[A64FXIPFLB]>;
74
75def A64FXGI4 : ProcResGroup<[A64FXIPEXB]>;
76
77def A64FXGI5 : ProcResGroup<[A64FXIPEAGA]>;
78
79def A64FXGI6 : ProcResGroup<[A64FXIPEAGB]>;
80
81def A64FXGI03 : ProcResGroup<[A64FXIPFLA, A64FXIPFLB]>;
82
83def A64FXGI01 : ProcResGroup<[A64FXIPFLA, A64FXIPPR]>;
84
85def A64FXGI24 : ProcResGroup<[A64FXIPEXA, A64FXIPEXB]>;
86
87def A64FXGI56 : ProcResGroup<[A64FXIPEAGA, A64FXIPEAGB]>;
88
89def A64FXGI056 : ProcResGroup<[A64FXIPFLA, A64FXIPEAGA, A64FXIPEAGB]>;
90
91def A64FXGI2456 : ProcResGroup<[A64FXIPEXA, A64FXIPEXB, A64FXIPEAGA, A64FXIPEAGB]>;
92
93def A64FXAny : ProcResGroup<[A64FXIPFLA, A64FXIPPR, A64FXIPEXA, A64FXIPFLB,
94                             A64FXIPEXB, A64FXIPEAGA, A64FXIPEAGB, A64FXIPBR]>;
95
96def A64FXWrite_1Cyc_GI7 : SchedWriteRes<[A64FXGI7]> {
97  let Latency = 1;
98}
99
100def A64FXWrite_2Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
101  let Latency = 2;
102}
103
104def A64FXWrite_4Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
105  let Latency = 4;
106}
107
108def A64FXWrite_6Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
109  let Latency = 6;
110}
111
112def A64FXWrite_8Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
113  let Latency = 8;
114}
115
116def A64FXWrite_9Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
117  let Latency = 9;
118}
119
120def A64FXWrite_3Cyc_GI1 : SchedWriteRes<[A64FXGI1]> {
121  let Latency = 3;
122}
123
124def A64FXWrite_5Cyc_GI2 : SchedWriteRes<[A64FXGI2]> {
125  let Latency = 5;
126}
127
128def A64FXWrite_4Cyc_GI3 : SchedWriteRes<[A64FXGI3]> {
129  let Latency = 4;
130}
131
132def A64FXWrite_6Cyc_GI3 : SchedWriteRes<[A64FXGI3]> {
133  let Latency = 6;
134}
135
136def A64FXWrite_4Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
137  let Latency = 4;
138}
139
140def A64FXWrite_8Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
141  let Latency = 8;
142}
143
144def A64FXWrite_9Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
145  let Latency = 9;
146}
147
148def A64FXWrite_10Cyc_GI4 : SchedWriteRes<[A64FXGI4]> {
149  let Latency = 10;
150}
151
152def A64FXWrite_12Cyc_GI4 : SchedWriteRes<[A64FXGI4]> {
153  let Latency = 12;
154}
155
156def A64FXWrite_20Cyc_GI4 : SchedWriteRes<[A64FXGI4]> {
157  let Latency = 20;
158}
159
160def A64FXWrite_5Cyc_GI5 : SchedWriteRes<[A64FXGI5]> {
161  let Latency = 5;
162}
163
164def A64FXWrite_11Cyc_GI5 : SchedWriteRes<[A64FXGI5]> {
165  let Latency = 11;
166}
167
168def A64FXWrite_5Cyc_GI6 : SchedWriteRes<[A64FXGI6]> {
169  let Latency = 5;
170}
171
172def A64FXWrite_1Cyc_GI24 : SchedWriteRes<[A64FXGI24]> {
173  let Latency = 1;
174}
175
176def A64FXWrite_2Cyc_GI24 : SchedWriteRes<[A64FXGI24]> {
177  let Latency = 2;
178}
179
180def A64FXWrite_4Cyc_NGI24 : SchedWriteRes<[A64FXGI24]> {
181  let Latency = 4;
182  let NumMicroOps = 4;
183}
184
185def A64FXWrite_1Cyc_GI56 : SchedWriteRes<[A64FXGI56]> {
186  let Latency = 1;
187}
188
189def A64FXWrite_5Cyc_GI56 : SchedWriteRes<[A64FXGI56]> {
190  let Latency = 5;
191}
192
193def A64FXWrite_8Cyc_GI56 : SchedWriteRes<[A64FXGI56]> {
194  let Latency = 8;
195}
196
197def A64FXWrite_11Cyc_GI56 : SchedWriteRes<[A64FXGI56]> {
198  let Latency = 11;
199}
200
201def A64FXWrite_LDNP: SchedWriteRes<[A64FXGI56]> {
202  let Latency = 5;
203  let NumMicroOps = 2;
204}
205
206def A64FXWrite_LDP01: SchedWriteRes<[A64FXGI2456]> {
207  let Latency = 5;
208  let NumMicroOps = 3;
209}
210
211def A64FXWrite_LDR01: SchedWriteRes<[A64FXGI2456]> {
212  let Latency = 5;
213  let NumMicroOps = 2;
214}
215
216def A64FXWrite_LD102: SchedWriteRes<[A64FXGI56]> {
217  let Latency = 8;
218  let NumMicroOps = 2;
219}
220
221def A64FXWrite_LD103: SchedWriteRes<[A64FXGI56]> {
222  let Latency = 11;
223  let NumMicroOps = 2;
224
225}
226
227def A64FXWrite_LD104: SchedWriteRes<[A64FXGI56]> {
228  let Latency = 8;
229  let NumMicroOps = 3;
230}
231
232def A64FXWrite_LD105: SchedWriteRes<[A64FXGI56]> {
233  let Latency = 11;
234  let NumMicroOps = 3;
235}
236
237def A64FXWrite_LD106: SchedWriteRes<[A64FXGI56]> {
238  let Latency = 8;
239  let NumMicroOps = 4;
240}
241
242def A64FXWrite_LD107: SchedWriteRes<[A64FXGI56]> {
243  let Latency = 11;
244  let NumMicroOps = 4;
245}
246
247def A64FXWrite_LD108: SchedWriteRes<[A64FXGI56]> {
248  let Latency = 8;
249  let NumMicroOps = 2;
250}
251
252def A64FXWrite_LD109: SchedWriteRes<[A64FXGI56]> {
253  let Latency = 11;
254  let NumMicroOps = 2;
255}
256
257def A64FXWrite_LD110: SchedWriteRes<[A64FXGI56]> {
258  let Latency = 8;
259  let NumMicroOps = 3;
260}
261
262def A64FXWrite_LD111: SchedWriteRes<[A64FXGI56]> {
263  let Latency = 11;
264  let NumMicroOps = 3;
265}
266
267def A64FXWrite_LD112: SchedWriteRes<[A64FXGI56]> {
268  let Latency = 8;
269  let NumMicroOps = 4;
270}
271
272def A64FXWrite_LD113: SchedWriteRes<[A64FXGI56]> {
273  let Latency = 11;
274  let NumMicroOps = 4;
275}
276
277def A64FXWrite_LD114: SchedWriteRes<[A64FXGI56]> {
278  let Latency = 8;
279  let NumMicroOps = 5;
280}
281
282def A64FXWrite_LD115: SchedWriteRes<[A64FXGI56]> {
283  let Latency = 11;
284  let NumMicroOps = 5;
285}
286
287def A64FXWrite_LD1I0: SchedWriteRes<[A64FXGI056]> {
288  let Latency = 8;
289  let NumMicroOps = 2;
290}
291
292def A64FXWrite_LD1I1: SchedWriteRes<[A64FXGI056]> {
293  let Latency = 8;
294  let NumMicroOps = 3;
295}
296
297def A64FXWrite_LD2I0: SchedWriteRes<[A64FXGI056]> {
298  let Latency = 8;
299  let NumMicroOps = 4;
300}
301
302def A64FXWrite_LD2I1: SchedWriteRes<[A64FXGI056]> {
303  let Latency = 8;
304  let NumMicroOps = 5;
305}
306
307def A64FXWrite_LD3I0: SchedWriteRes<[A64FXGI056]> {
308  let Latency = 8;
309  let NumMicroOps = 6;
310}
311
312def A64FXWrite_LD3I1: SchedWriteRes<[A64FXGI056]> {
313  let Latency = 8;
314  let NumMicroOps = 7;
315}
316
317def A64FXWrite_LD4I0: SchedWriteRes<[A64FXGI056]> {
318  let Latency = 8;
319  let NumMicroOps = 8;
320}
321
322def A64FXWrite_LD4I1: SchedWriteRes<[A64FXGI056]> {
323  let Latency = 8;
324  let NumMicroOps = 9;
325}
326
327def A64FXWrite_1Cyc_GI2456 : SchedWriteRes<[A64FXGI2456]> {
328  let Latency = 1;
329}
330
331def A64FXWrite_FMOV_GV : SchedWriteRes<[A64FXGI03]> {
332  let Latency = 10;
333}
334
335def A64FXWrite_FMOV_VG14 : SchedWriteRes<[A64FXGI03]> {
336  let Latency = 14;
337}
338
339def A64FXWrite_ADDLV : SchedWriteRes<[A64FXGI03]> {
340  let Latency = 12;
341}
342
343def A64FXWrite_MULLE : SchedWriteRes<[A64FXGI03]> {
344  let Latency = 14;
345}
346
347def A64FXWrite_MULLV : SchedWriteRes<[A64FXGI03]> {
348  let Latency = 14;
349}
350
351def A64FXWrite_MADDL : SchedWriteRes<[A64FXGI03]> {
352  let Latency = 6;
353}
354
355def A64FXWrite_ABA : SchedWriteRes<[A64FXGI03]> {
356  let Latency = 8;
357}
358
359def A64FXWrite_ABAL : SchedWriteRes<[A64FXGI03]> {
360  let Latency = 10;
361}
362
363def A64FXWrite_ADDLV1 : SchedWriteRes<[A64FXGI03]> {
364  let Latency = 12;
365  let NumMicroOps = 6;
366}
367
368def A64FXWrite_MINMAXV : SchedWriteRes<[A64FXGI03]> {
369  let Latency = 14;
370  let NumMicroOps = 6;
371}
372
373def A64FXWrite_SQRDMULH : SchedWriteRes<[A64FXGI03]> {
374  let Latency = 9;
375}
376
377def A64FXWrite_PMUL : SchedWriteRes<[A64FXGI03]> {
378  let Latency = 8;
379}
380
381
382def A64FXWrite_SRSRAV : SchedWriteRes<[A64FXGI03]> {
383  let Latency = 8;
384  let NumMicroOps = 3;
385}
386
387def A64FXWrite_SSRAV : SchedWriteRes<[A64FXGI03]> {
388  let Latency = 8;
389  let NumMicroOps = 2;
390}
391
392def A64FXWrite_RSHRN : SchedWriteRes<[A64FXGI03]> {
393  let Latency = 10;
394  let NumMicroOps = 3;
395}
396
397def A64FXWrite_SHRN : SchedWriteRes<[A64FXGI03]> {
398  let Latency = 10;
399  let NumMicroOps = 2;
400}
401
402
403def A64FXWrite_ADDP : SchedWriteRes<[A64FXGI03]> {
404  let Latency = 10;
405  let NumMicroOps = 3;
406}
407
408def A64FXWrite_FMULXE : SchedWriteRes<[A64FXGI03]> {
409  let Latency = 15;
410  let NumMicroOps = 2;
411}
412
413def A64FXWrite_FADDPV : SchedWriteRes<[A64FXGI03]> {
414  let Latency = 15;
415  let NumMicroOps = 3;
416}
417
418def A64FXWrite_SADALP : SchedWriteRes<[A64FXGI03]> {
419  let Latency = 10;
420  let NumMicroOps = 3;
421}
422
423def A64FXWrite_SADDLP : SchedWriteRes<[A64FXGI03]> {
424  let Latency = 10;
425  let NumMicroOps = 2;
426}
427
428def A64FXWrite_FCVTXNV : SchedWriteRes<[A64FXGI03]> {
429  let Latency = 15;
430  let NumMicroOps = 2;
431}
432
433def A64FXWrite_FMAXVVH : SchedWriteRes<[A64FXGI03]> {
434  let Latency = 14;
435  let NumMicroOps = 7;
436}
437
438def A64FXWrite_BIF : SchedWriteRes<[A64FXGI03]> {
439  let Latency = 5;
440}
441
442def A64FXWrite_DUPGENERAL : SchedWriteRes<[A64FXGI03]> {
443  let Latency = 10;
444}
445
446def A64FXWrite_SHA00 : SchedWriteRes<[A64FXGI0]> {
447  let Latency = 9;
448}
449
450def A64FXWrite_SHA01 : SchedWriteRes<[A64FXGI0]> {
451  let Latency = 12;
452}
453
454def A64FXWrite_SMOV : SchedWriteRes<[A64FXGI03]> {
455  let Latency = 25;
456}
457
458def A64FXWrite_TBX1 : SchedWriteRes<[A64FXGI03]> {
459  let Latency = 10;
460  let NumMicroOps = 3;
461}
462
463def A64FXWrite_TBX2 : SchedWriteRes<[A64FXGI03]> {
464  let Latency = 10;
465  let NumMicroOps = 5;
466}
467
468def A64FXWrite_TBX3 : SchedWriteRes<[A64FXGI03]> {
469  let Latency = 10;
470  let NumMicroOps = 7;
471}
472
473def A64FXWrite_TBX4 : SchedWriteRes<[A64FXGI03]> {
474  let Latency = 10;
475  let NumMicroOps = 9;
476}
477
478def A64FXWrite_PREF0: SchedWriteRes<[A64FXGI56]> {
479  let Latency = 0;
480}
481
482def A64FXWrite_PREF1: SchedWriteRes<[A64FXGI56]> {
483  let Latency = 0;
484}
485
486def A64FXWrite_SWP: SchedWriteRes<[A64FXGI56]> {
487  let Latency = 0;
488}
489
490def A64FXWrite_STUR: SchedWriteRes<[A64FXGI56]> {
491  let Latency = 0;
492}
493
494def A64FXWrite_STNP: SchedWriteRes<[A64FXGI56]> {
495  let Latency = 0;
496}
497
498def A64FXWrite_STP01: SchedWriteRes<[A64FXGI56]> {
499  let Latency = 0;
500}
501
502def A64FXWrite_ST10: SchedWriteRes<[A64FXGI56]> {
503  let Latency = 0;
504}
505
506def A64FXWrite_ST11: SchedWriteRes<[A64FXGI56]> {
507  let Latency = 0;
508}
509
510def A64FXWrite_ST12: SchedWriteRes<[A64FXGI56]> {
511  let Latency = 0;
512}
513
514def A64FXWrite_ST13: SchedWriteRes<[A64FXGI56]> {
515  let Latency = 0;
516}
517
518def A64FXWrite_ST14: SchedWriteRes<[A64FXGI56]> {
519  let Latency = 1;
520}
521
522def A64FXWrite_ST15: SchedWriteRes<[A64FXGI56]> {
523  let Latency = 1;
524}
525
526def A64FXWrite_ST16: SchedWriteRes<[A64FXGI56]> {
527  let Latency = 1;
528}
529
530def A64FXWrite_ST17: SchedWriteRes<[A64FXGI56]> {
531  let Latency = 1;
532}
533
534def A64FXWrite_CAS: SchedWriteRes<[A64FXGI56]> {
535  let Latency = 7;
536}
537
538// Define commonly used read types.
539
540// No forwarding is provided for these types.
541def : ReadAdvance<ReadI,       0>;
542def : ReadAdvance<ReadISReg,   0>;
543def : ReadAdvance<ReadIEReg,   0>;
544def : ReadAdvance<ReadIM,      0>;
545def : ReadAdvance<ReadIMA,     0>;
546def : ReadAdvance<ReadID,      0>;
547def : ReadAdvance<ReadExtrHi,  0>;
548def : ReadAdvance<ReadAdrBase, 0>;
549def : ReadAdvance<ReadST,      0>;
550def : ReadAdvance<ReadVLD,     0>;
551
552//===----------------------------------------------------------------------===//
553// 3. Instruction Tables.
554
555//---
556// 3.1 Branch Instructions
557//---
558
559// Branch, immed
560// Branch and link, immed
561// Compare and branch
562def : WriteRes<WriteBr,      [A64FXGI7]> {
563  let Latency = 1;
564}
565
566// Branch, register
567// Branch and link, register != LR
568// Branch and link, register = LR
569def : WriteRes<WriteBrReg,   [A64FXGI7]> {
570  let Latency = 1;
571}
572
573def : WriteRes<WriteSys,     []> { let Latency = 1; }
574def : WriteRes<WriteBarrier, []> { let Latency = 1; }
575def : WriteRes<WriteHint,    []> { let Latency = 1; }
576
577def : WriteRes<WriteAtomic,  []> {
578  let Latency = 4;
579}
580
581//---
582// Branch
583//---
584def : InstRW<[A64FXWrite_1Cyc_GI7], (instrs B, BL, BR, BLR)>;
585def : InstRW<[A64FXWrite_1Cyc_GI7], (instrs RET)>;
586def : InstRW<[A64FXWrite_1Cyc_GI7], (instregex "^B..$")>;
587def : InstRW<[A64FXWrite_1Cyc_GI7],
588            (instregex "^CBZ", "^CBNZ", "^TBZ", "^TBNZ")>;
589
590//---
591// 3.2 Arithmetic and Logical Instructions
592// 3.3 Move and Shift Instructions
593//---
594
595// ALU, basic
596// Conditional compare
597// Conditional select
598// Address generation
599def : WriteRes<WriteI,       [A64FXGI2456]> {
600  let Latency = 1;
601}
602
603def : InstRW<[WriteI],
604            (instregex "ADD?(W|X)r(i|r|s|x)",   "ADDS?(W|X)r(i|r|s|x)(64)?",
605                       "AND?(W|X)r(i|r|s|x)",   "ANDS?(W|X)r(i|r|s|x)",
606                       "ADC(W|X)r",
607                       "BIC?(W|X)r(i|r|s|x)",   "BICS?(W|X)r(i|r|s|x)",
608                       "EON?(W|X)r(i|r|s|x)",   "ORN?(W|X)r(i|r|s|x)",
609                       "ORR?(W|X)r(i|r|s|x)",   "SUB?(W|X)r(i|r|s|x)",
610                       "SUBS?(W|X)r(i|r|s|x)",  "SBC(W|X)r",
611                       "SBCS(W|X)r",            "CCMN(W|X)(i|r)",
612                       "CCMP(W|X)(i|r)",        "CSEL(W|X)r",
613                       "CSINC(W|X)r",           "CSINV(W|X)r",
614                       "CSNEG(W|X)r")>;
615
616def : InstRW<[WriteI], (instrs COPY)>;
617
618// ALU, extend and/or shift
619def : WriteRes<WriteISReg,   [A64FXGI2456]> {
620  let Latency = 2;
621}
622
623def : InstRW<[WriteISReg],
624            (instregex "ADD?(W|X)r(i|r|s|x)",   "ADDS?(W|X)r(i|r|s|x)(64)?",
625                       "AND?(W|X)r(i|r|s|x)",   "ANDS?(W|X)r(i|r|s|x)",
626                       "ADC(W|X)r",
627                       "BIC?(W|X)r(i|r|s|x)",   "BICS?(W|X)r(i|r|s|x)",
628                       "EON?(W|X)r(i|r|s|x)",   "ORN?(W|X)r(i|r|s|x)",
629                       "ORR?(W|X)r(i|r|s|x)",   "SUB?(W|X)r(i|r|s|x)",
630                       "SUBS?(W|X)r(i|r|s|x)",  "SBC(W|X)r",
631                       "SBCS(W|X)r",            "CCMN(W|X)(i|r)",
632                       "CCMP(W|X)(i|r)",        "CSEL(W|X)r",
633                       "CSINC(W|X)r",           "CSINV(W|X)r",
634                       "CSNEG(W|X)r")>;
635
636def : WriteRes<WriteIEReg,   [A64FXGI2456]> {
637  let Latency = 1;
638}
639
640def : InstRW<[WriteIEReg],
641            (instregex "ADD?(W|X)r(i|r|s|x)",   "ADDS?(W|X)r(i|r|s|x)(64)?",
642                       "AND?(W|X)r(i|r|s|x)",   "ANDS?(W|X)r(i|r|s|x)",
643                       "ADC(W|X)r",
644                       "BIC?(W|X)r(i|r|s|x)",   "BICS?(W|X)r(i|r|s|x)",
645                       "EON?(W|X)r(i|r|s|x)",   "ORN?(W|X)r(i|r|s|x)",
646                       "ORR?(W|X)r(i|r|s|x)",   "SUB?(W|X)r(i|r|s|x)",
647                       "SUBS?(W|X)r(i|r|s|x)",  "SBC(W|X)r",
648                       "SBCS(W|X)r",            "CCMN(W|X)(i|r)",
649                       "CCMP(W|X)(i|r)",        "CSEL(W|X)r",
650                       "CSINC(W|X)r",           "CSINV(W|X)r",
651                       "CSNEG(W|X)r")>;
652
653// Move immed
654def : WriteRes<WriteImm,     [A64FXGI2456]> {
655  let Latency = 1;
656}
657
658def : InstRW<[A64FXWrite_1Cyc_GI2456],
659            (instrs MOVKWi, MOVKXi, MOVNWi, MOVNXi, MOVZWi, MOVZXi)>;
660
661def : InstRW<[A64FXWrite_2Cyc_GI24],
662            (instrs ASRVWr, ASRVXr, LSLVWr, LSLVXr, RORVWr, RORVXr)>;
663
664// Variable shift
665def : WriteRes<WriteIS,      [A64FXGI2456]> {
666  let Latency = 1;
667}
668
669//---
670// 3.4 Divide and Multiply Instructions
671//---
672
673// Divide, W-form
674def : WriteRes<WriteID32,    [A64FXGI4]> {
675  let Latency = 39;
676  let ReleaseAtCycles = [39];
677}
678
679// Divide, X-form
680def : WriteRes<WriteID64,    [A64FXGI4]> {
681  let Latency = 23;
682  let ReleaseAtCycles = [23];
683}
684
685// Multiply accumulate, W-form
686def : WriteRes<WriteIM32,    [A64FXGI2456]> {
687  let Latency = 5;
688}
689
690// Multiply accumulate, X-form
691def : WriteRes<WriteIM64,    [A64FXGI2456]> {
692  let Latency = 5;
693}
694
695def : InstRW<[WriteIM32], (instrs MADDWrrr, MSUBWrrr)>;
696def : InstRW<[WriteIM32], (instrs MADDXrrr, MSUBXrrr)>;
697def : InstRW<[A64FXWrite_MADDL],
698            (instregex "(S|U)(MADDL|MSUBL)rrr")>;
699
700def : InstRW<[WriteID32], (instrs SDIVWr, UDIVWr)>;
701def : InstRW<[WriteID64], (instrs SDIVXr, UDIVXr)>;
702
703// Bitfield extract, two reg
704def : WriteRes<WriteExtr,    [A64FXGI2456]> {
705  let Latency = 1;
706}
707
708// Multiply high
709def : InstRW<[A64FXWrite_5Cyc_GI2], (instrs SMULHrr, UMULHrr)>;
710
711// Miscellaneous Data-Processing Instructions
712// Bitfield extract
713def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs EXTRWrri, EXTRXrri)>;
714
715// Bitifield move - basic
716def : InstRW<[A64FXWrite_1Cyc_GI24],
717            (instrs SBFMWri, SBFMXri, UBFMWri, UBFMXri)>;
718
719// Bitfield move, insert
720def : InstRW<[A64FXWrite_4Cyc_NGI24], (instregex "^BFM")>;
721def : InstRW<[A64FXWrite_1Cyc_GI24], (instregex "(S|U)?BFM.*")>;
722
723// Count leading
724def : InstRW<[A64FXWrite_2Cyc_GI0], (instregex "^CLS(W|X)r$",
725                                               "^CLZ(W|X)r$")>;
726
727// Reverse bits
728def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs RBITWr, RBITXr)>;
729
730// Cryptography Extensions
731def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^AES[DE]")>;
732def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^AESI?MC")>;
733def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^PMULL")>;
734def : InstRW<[A64FXWrite_SHA00], (instregex "^SHA1SU0")>;
735def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^SHA1(H|SU1)")>;
736def : InstRW<[A64FXWrite_SHA01], (instregex "^SHA1[CMP]")>;
737def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^SHA256SU0")>;
738def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^SHA256SU1")>;
739def : InstRW<[A64FXWrite_SHA01], (instregex "^SHA256(H|H2)")>;
740
741// CRC Instructions
742def : InstRW<[A64FXWrite_10Cyc_GI4], (instrs CRC32Brr, CRC32Hrr)>;
743def : InstRW<[A64FXWrite_12Cyc_GI4], (instrs CRC32Wrr)>;
744def : InstRW<[A64FXWrite_20Cyc_GI4], (instrs CRC32Xrr)>;
745
746def : InstRW<[A64FXWrite_10Cyc_GI4], (instrs CRC32CBrr, CRC32CHrr)>;
747def : InstRW<[A64FXWrite_12Cyc_GI4], (instrs CRC32CWrr)>;
748def : InstRW<[A64FXWrite_20Cyc_GI4], (instrs CRC32CXrr)>;
749
750// Reverse bits/bytes
751// NOTE: Handled by WriteI.
752
753//---
754// 3.6 Load Instructions
755// 3.10 FP Load Instructions
756//---
757
758// Load register, literal
759// Load register, unscaled immed
760// Load register, immed unprivileged
761// Load register, unsigned immed
762def : WriteRes<WriteLD,      [A64FXGI56]> {
763  let Latency = 4;
764}
765
766// Load register, immed post-index
767// NOTE: Handled by WriteLD, WriteI.
768// Load register, immed pre-index
769// NOTE: Handled by WriteLD, WriteAdr.
770def : WriteRes<WriteAdr,     [A64FXGI2456]> {
771  let Latency = 1;
772}
773
774// Load pair, immed offset, normal
775// Load pair, immed offset, signed words, base != SP
776// Load pair, immed offset signed words, base = SP
777// LDP only breaks into *one* LS micro-op.  Thus
778// the resources are handled by WriteLD.
779def : WriteRes<WriteLDHi,    []> {
780  let Latency = 5;
781}
782
783// Load register offset, basic
784// Load register, register offset, scale by 4/8
785// Load register, register offset, scale by 2
786// Load register offset, extend
787// Load register, register offset, extend, scale by 4/8
788// Load register, register offset, extend, scale by 2
789def A64FXWriteLDIdx : SchedWriteVariant<[
790  SchedVar<ScaledIdxPred, [A64FXWrite_1Cyc_GI56]>,
791  SchedVar<NoSchedPred,   [A64FXWrite_1Cyc_GI56]>]>;
792def : SchedAlias<WriteLDIdx, A64FXWriteLDIdx>;
793
794def A64FXReadAdrBase : SchedReadVariant<[
795  SchedVar<ScaledIdxPred, [ReadDefault]>,
796  SchedVar<NoSchedPred,   [ReadDefault]>]>;
797def : SchedAlias<ReadAdrBase, A64FXReadAdrBase>;
798
799// Load pair, immed pre-index, normal
800// Load pair, immed pre-index, signed words
801// Load pair, immed post-index, normal
802// Load pair, immed post-index, signed words
803// NOTE: Handled by WriteLD, WriteLDHi, WriteAdr.
804
805def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPDi)>;
806def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPQi)>;
807def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPSi)>;
808def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPWi)>;
809def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPXi)>;
810
811def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPDi)>;
812def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPQi)>;
813def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPSi)>;
814def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPSWi)>;
815def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPWi)>;
816def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPXi)>;
817
818def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRBui)>;
819def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRDui)>;
820def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRHui)>;
821def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRQui)>;
822def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRSui)>;
823
824def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRDl)>;
825def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRQl)>;
826def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRWl)>;
827def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRXl)>;
828
829def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRBi)>;
830def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRHi)>;
831def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRWi)>;
832def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRXi)>;
833
834def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSBWi)>;
835def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSBXi)>;
836def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSHWi)>;
837def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSHXi)>;
838def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSWi)>;
839
840def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
841            (instrs LDPDpre)>;
842def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
843            (instrs LDPQpre)>;
844def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
845            (instrs LDPSpre)>;
846def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
847            (instrs LDPWpre)>;
848def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
849            (instrs LDPWpre)>;
850
851def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBpre)>;
852def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRDpre)>;
853def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHpre)>;
854def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRQpre)>;
855def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSpre)>;
856def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRWpre)>;
857def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRXpre)>;
858
859def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBWpre)>;
860def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBXpre)>;
861def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBWpost)>;
862def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBXpost)>;
863
864def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHWpre)>;
865def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHXpre)>;
866def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHWpost)>;
867def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHXpost)>;
868
869def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBBpre)>;
870def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBBpost)>;
871
872def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHHpre)>;
873def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHHpost)>;
874
875def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
876            (instrs LDPDpost)>;
877def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
878            (instrs LDPQpost)>;
879def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
880            (instrs LDPSpost)>;
881def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
882            (instrs LDPWpost)>;
883def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
884            (instrs LDPXpost)>;
885
886def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRBpost)>;
887def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRDpost)>;
888def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRHpost)>;
889def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRQpost)>;
890def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRSpost)>;
891def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRWpost)>;
892def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRXpost)>;
893
894def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
895            (instrs LDPDpre)>;
896def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
897            (instrs LDPQpre)>;
898def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
899            (instrs LDPSpre)>;
900def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
901            (instrs LDPWpre)>;
902def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
903            (instrs LDPXpre)>;
904
905def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBpre)>;
906def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRDpre)>;
907def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHpre)>;
908def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRQpre)>;
909def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSpre)>;
910def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRWpre)>;
911def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRXpre)>;
912
913def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
914            (instrs LDPDpost)>;
915def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
916            (instrs LDPQpost)>;
917def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
918            (instrs LDPSpost)>;
919def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
920            (instrs LDPWpost)>;
921def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
922            (instrs LDPXpost)>;
923
924def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRBpost)>;
925def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRDpost)>;
926def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRHpost)>;
927def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRQpost)>;
928def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRSpost)>;
929def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRWpost)>;
930def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRXpost)>;
931
932def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRBroW)>;
933def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRDroW)>;
934def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHroW)>;
935def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHHroW)>;
936def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRQroW)>;
937def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSroW)>;
938def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHWroW)>;
939def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHXroW)>;
940def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRWroW)>;
941def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRXroW)>;
942
943def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRBroX)>;
944def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRDroX)>;
945def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHHroX)>;
946def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHroX)>;
947def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRQroX)>;
948def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSroX)>;
949def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHWroX)>;
950def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHXroX)>;
951def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRWroX)>;
952def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRXroX)>;
953
954def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
955            (instrs LDRBroW)>;
956def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
957            (instrs LDRBroW)>;
958def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
959             (instrs LDRDroW)>;
960def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
961            (instrs LDRHroW)>;
962def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
963            (instrs LDRHHroW)>;
964def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
965            (instrs LDRQroW)>;
966def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
967            (instrs LDRSroW)>;
968def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
969            (instrs LDRSHWroW)>;
970def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
971            (instrs LDRSHXroW)>;
972def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
973            (instrs LDRWroW)>;
974def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
975            (instrs LDRXroW)>;
976def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
977            (instrs LDRBroX)>;
978def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
979            (instrs LDRDroX)>;
980def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
981            (instrs LDRHroX)>;
982def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
983            (instrs LDRHHroX)>;
984def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
985            (instrs LDRQroX)>;
986def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
987            (instrs LDRSroX)>;
988def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
989            (instrs LDRSHWroX)>;
990def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
991            (instrs LDRSHXroX)>;
992def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
993            (instrs LDRWroX)>;
994def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
995            (instrs LDRXroX)>;
996
997def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURBi)>;
998def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURBBi)>;
999def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURDi)>;
1000def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURHi)>;
1001def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURHHi)>;
1002def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURQi)>;
1003def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSi)>;
1004def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURXi)>;
1005def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSBWi)>;
1006def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSBXi)>;
1007def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSHWi)>;
1008def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSHXi)>;
1009def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSWi)>;
1010
1011//---
1012// Prefetch
1013//---
1014def : InstRW<[A64FXWrite_PREF0], (instrs PRFMl)>;
1015def : InstRW<[A64FXWrite_PREF1], (instrs PRFUMi)>;
1016def : InstRW<[A64FXWrite_PREF1], (instrs PRFMui)>;
1017def : InstRW<[A64FXWrite_PREF1], (instrs PRFMroW)>;
1018def : InstRW<[A64FXWrite_PREF1], (instrs PRFMroX)>;
1019
1020//--
1021// 3.7 Store Instructions
1022// 3.11 FP Store Instructions
1023//--
1024
1025// Store register, unscaled immed
1026// Store register, immed unprivileged
1027// Store register, unsigned immed
1028def : WriteRes<WriteST,      [A64FXGI56]> {
1029  let Latency = 1;
1030}
1031
1032// Store register, immed post-index
1033// NOTE: Handled by WriteAdr, WriteST, ReadAdrBase
1034
1035// Store register, immed pre-index
1036// NOTE: Handled by WriteAdr, WriteST
1037
1038// Store register, register offset, basic
1039// Store register, register offset, scaled by 4/8
1040// Store register, register offset, scaled by 2
1041// Store register, register offset, extend
1042// Store register, register offset, extend, scale by 4/8
1043// Store register, register offset, extend, scale by 1
1044def : WriteRes<WriteSTIdx, [A64FXGI56, A64FXGI2456]> {
1045  let Latency = 1;
1046}
1047
1048// Store pair, immed offset, W-form
1049// Store pair, immed offset, X-form
1050def : WriteRes<WriteSTP,     [A64FXGI56]> {
1051  let Latency = 1;
1052}
1053
1054// Store pair, immed post-index, W-form
1055// Store pair, immed post-index, X-form
1056// Store pair, immed pre-index, W-form
1057// Store pair, immed pre-index, X-form
1058// NOTE: Handled by WriteAdr, WriteSTP.
1059
1060def : InstRW<[A64FXWrite_STUR], (instrs STURBi)>;
1061def : InstRW<[A64FXWrite_STUR], (instrs STURBBi)>;
1062def : InstRW<[A64FXWrite_STUR], (instrs STURDi)>;
1063def : InstRW<[A64FXWrite_STUR], (instrs STURHi)>;
1064def : InstRW<[A64FXWrite_STUR], (instrs STURHHi)>;
1065def : InstRW<[A64FXWrite_STUR], (instrs STURQi)>;
1066def : InstRW<[A64FXWrite_STUR], (instrs STURSi)>;
1067def : InstRW<[A64FXWrite_STUR], (instrs STURWi)>;
1068def : InstRW<[A64FXWrite_STUR], (instrs STURXi)>;
1069
1070def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRBi)>;
1071def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRHi)>;
1072def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRWi)>;
1073def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRXi)>;
1074
1075def : InstRW<[A64FXWrite_STNP], (instrs STNPDi)>;
1076def : InstRW<[A64FXWrite_STNP], (instrs STNPQi)>;
1077def : InstRW<[A64FXWrite_STNP], (instrs STNPXi)>;
1078def : InstRW<[A64FXWrite_STNP], (instrs STNPWi)>;
1079
1080def : InstRW<[A64FXWrite_STNP], (instrs STPDi)>;
1081def : InstRW<[A64FXWrite_STNP], (instrs STPQi)>;
1082def : InstRW<[A64FXWrite_STNP], (instrs STPXi)>;
1083def : InstRW<[A64FXWrite_STNP], (instrs STPWi)>;
1084
1085def : InstRW<[A64FXWrite_STUR], (instrs STRBui)>;
1086def : InstRW<[A64FXWrite_STUR], (instrs STRBui)>;
1087def : InstRW<[A64FXWrite_STUR], (instrs STRDui)>;
1088def : InstRW<[A64FXWrite_STUR], (instrs STRDui)>;
1089def : InstRW<[A64FXWrite_STUR], (instrs STRHui)>;
1090def : InstRW<[A64FXWrite_STUR], (instrs STRHui)>;
1091def : InstRW<[A64FXWrite_STUR], (instrs STRQui)>;
1092def : InstRW<[A64FXWrite_STUR], (instrs STRQui)>;
1093def : InstRW<[A64FXWrite_STUR], (instrs STRXui)>;
1094def : InstRW<[A64FXWrite_STUR], (instrs STRXui)>;
1095def : InstRW<[A64FXWrite_STUR], (instrs STRWui)>;
1096def : InstRW<[A64FXWrite_STUR], (instrs STRWui)>;
1097
1098def : InstRW<[A64FXWrite_STP01],
1099            (instrs STPDpre, STPDpost)>;
1100def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
1101            (instrs STPDpre, STPDpost)>;
1102def : InstRW<[A64FXWrite_STP01],
1103            (instrs STPDpre, STPDpost)>;
1104def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
1105            (instrs STPDpre, STPDpost)>;
1106def : InstRW<[A64FXWrite_STP01],
1107            (instrs STPQpre, STPQpost)>;
1108def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
1109            (instrs STPQpre, STPQpost)>;
1110def : InstRW<[A64FXWrite_STP01],
1111            (instrs STPQpre, STPQpost)>;
1112def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
1113            (instrs STPQpre, STPQpost)>;
1114def : InstRW<[A64FXWrite_STP01],
1115            (instrs STPSpre, STPSpost)>;
1116def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
1117            (instrs STPSpre, STPSpost)>;
1118def : InstRW<[A64FXWrite_STP01],
1119            (instrs STPSpre, STPSpost)>;
1120def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
1121            (instrs STPSpre, STPSpost)>;
1122def : InstRW<[A64FXWrite_STP01],
1123            (instrs STPWpre, STPWpost)>;
1124def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
1125            (instrs STPWpre, STPWpost)>;
1126def : InstRW<[A64FXWrite_STP01],
1127            (instrs STPWpre, STPWpost)>;
1128def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
1129            (instrs STPWpre, STPWpost)>;
1130def : InstRW<[A64FXWrite_STP01],
1131            (instrs STPXpre, STPXpost)>;
1132def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
1133            (instrs STPXpre, STPXpost)>;
1134def : InstRW<[A64FXWrite_STP01],
1135            (instrs STPXpre, STPXpost)>;
1136def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
1137            (instrs STPXpre, STPXpost)>;
1138
1139def : InstRW<[WriteAdr, A64FXWrite_STP01],
1140            (instrs STRBpre, STRBpost)>;
1141def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1142            (instrs STRBpre, STRBpost)>;
1143def : InstRW<[WriteAdr, A64FXWrite_STP01],
1144            (instrs STRBpre, STRBpost)>;
1145def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1146            (instrs STRBpre, STRBpost)>;
1147def : InstRW<[WriteAdr, A64FXWrite_STP01],
1148            (instrs STRBBpre, STRBBpost)>;
1149def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1150            (instrs STRBBpre, STRBBpost)>;
1151def : InstRW<[WriteAdr, A64FXWrite_STP01],
1152            (instrs STRBBpre, STRBBpost)>;
1153def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1154            (instrs STRBBpre, STRBBpost)>;
1155def : InstRW<[WriteAdr, A64FXWrite_STP01],
1156            (instrs STRDpre, STRDpost)>;
1157def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1158            (instrs STRDpre, STRDpost)>;
1159def : InstRW<[WriteAdr, A64FXWrite_STP01],
1160            (instrs STRDpre, STRDpost)>;
1161def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1162            (instrs STRDpre, STRDpost)>;
1163def : InstRW<[WriteAdr, A64FXWrite_STP01],
1164            (instrs STRHpre, STRHpost)>;
1165def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1166            (instrs STRHpre, STRHpost)>;
1167def : InstRW<[WriteAdr, A64FXWrite_STP01],
1168            (instrs STRHpre, STRHpost)>;
1169def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1170            (instrs STRHpre, STRHpost)>;
1171def : InstRW<[WriteAdr, A64FXWrite_STP01],
1172            (instrs STRHHpre, STRHHpost)>;
1173def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1174            (instrs STRHHpre, STRHHpost)>;
1175def : InstRW<[WriteAdr, A64FXWrite_STP01],
1176            (instrs STRHHpre, STRHHpost)>;
1177def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1178            (instrs STRHHpre, STRHHpost)>;
1179def : InstRW<[WriteAdr, A64FXWrite_STP01],
1180            (instrs STRQpre, STRQpost)>;
1181def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1182            (instrs STRQpre, STRQpost)>;
1183def : InstRW<[WriteAdr, A64FXWrite_STP01],
1184            (instrs STRQpre, STRQpost)>;
1185def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1186            (instrs STRQpre, STRQpost)>;
1187def : InstRW<[WriteAdr, A64FXWrite_STP01],
1188            (instrs STRSpre, STRSpost)>;
1189def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1190            (instrs STRSpre, STRSpost)>;
1191def : InstRW<[WriteAdr, A64FXWrite_STP01],
1192            (instrs STRSpre, STRSpost)>;
1193def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1194            (instrs STRSpre, STRSpost)>;
1195def : InstRW<[WriteAdr, A64FXWrite_STP01],
1196            (instrs STRWpre, STRWpost)>;
1197def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1198            (instrs STRWpre, STRWpost)>;
1199def : InstRW<[WriteAdr, A64FXWrite_STP01],
1200            (instrs STRWpre, STRWpost)>;
1201def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1202            (instrs STRWpre, STRWpost)>;
1203def : InstRW<[WriteAdr, A64FXWrite_STP01],
1204            (instrs STRXpre, STRXpost)>;
1205def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1206            (instrs STRXpre, STRXpost)>;
1207def : InstRW<[WriteAdr, A64FXWrite_STP01],
1208            (instrs STRXpre, STRXpost)>;
1209def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1210            (instrs STRXpre, STRXpost)>;
1211
1212def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1213            (instrs STRBroW, STRBroX)>;
1214def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1215            (instrs STRBroW, STRBroX)>;
1216def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1217            (instrs STRBBroW, STRBBroX)>;
1218def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1219            (instrs STRBBroW, STRBBroX)>;
1220def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1221            (instrs STRDroW, STRDroX)>;
1222def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1223            (instrs STRDroW, STRDroX)>;
1224def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1225            (instrs STRHroW, STRHroX)>;
1226def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1227            (instrs STRHroW, STRHroX)>;
1228def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1229            (instrs STRHHroW, STRHHroX)>;
1230def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1231            (instrs STRHHroW, STRHHroX)>;
1232def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1233            (instrs STRQroW, STRQroX)>;
1234def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1235            (instrs STRQroW, STRQroX)>;
1236def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1237            (instrs STRSroW, STRSroX)>;
1238def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1239            (instrs STRSroW, STRSroX)>;
1240def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1241            (instrs STRWroW, STRWroX)>;
1242def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1243            (instrs STRWroW, STRWroX)>;
1244def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1245            (instrs STRXroW, STRXroX)>;
1246def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1247            (instrs STRXroW, STRXroX)>;
1248
1249//---
1250// 3.8 FP Data Processing Instructions
1251//---
1252
1253// FP absolute value
1254// FP min/max
1255// FP negate
1256def : WriteRes<WriteF,       [A64FXGI03]> {
1257  let Latency = 4;
1258  let ReleaseAtCycles = [2];
1259}
1260
1261// FP arithmetic
1262
1263def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FADDDrr, FADDHrr)>;
1264def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FSUBDrr, FSUBHrr)>;
1265
1266// FP compare
1267def : WriteRes<WriteFCmp,    [A64FXGI03]> {
1268  let Latency = 4;
1269  let ReleaseAtCycles = [2];
1270}
1271
1272// FP Div, Sqrt
1273def : WriteRes<WriteFDiv, [A64FXGI0]> {
1274  let Latency = 43;
1275}
1276
1277def A64FXXWriteFDiv : SchedWriteRes<[A64FXGI0]> {
1278  let Latency = 38;
1279}
1280
1281def A64FXXWriteFDivSP : SchedWriteRes<[A64FXGI0]> {
1282  let Latency = 29;
1283}
1284
1285def A64FXXWriteFDivDP : SchedWriteRes<[A64FXGI0]> {
1286  let Latency = 43;
1287}
1288
1289def A64FXXWriteFSqrtSP : SchedWriteRes<[A64FXGI0]> {
1290  let Latency = 29;
1291}
1292
1293def A64FXXWriteFSqrtDP : SchedWriteRes<[A64FXGI0]> {
1294  let Latency = 43;
1295}
1296
1297// FP divide, S-form
1298// FP square root, S-form
1299def : InstRW<[A64FXXWriteFDivSP], (instrs FDIVSrr)>;
1300def : InstRW<[A64FXXWriteFSqrtSP], (instrs FSQRTSr)>;
1301def : InstRW<[A64FXXWriteFDivSP], (instregex "^FDIVv.*32$")>;
1302def : InstRW<[A64FXXWriteFSqrtSP], (instregex "^.*SQRT.*32$")>;
1303def : InstRW<[A64FXXWriteFDivSP], (instregex "^FDIVSrr")>;
1304def : InstRW<[A64FXXWriteFSqrtSP], (instregex "^FSQRTSr")>;
1305
1306// FP divide, D-form
1307// FP square root, D-form
1308def : InstRW<[A64FXXWriteFDivDP], (instrs FDIVDrr)>;
1309def : InstRW<[A64FXXWriteFSqrtDP], (instrs FSQRTDr)>;
1310def : InstRW<[A64FXXWriteFDivDP], (instregex "^FDIVv.*64$")>;
1311def : InstRW<[A64FXXWriteFSqrtDP], (instregex "^.*SQRT.*64$")>;
1312def : InstRW<[A64FXXWriteFDivDP], (instregex "^FDIVDrr")>;
1313def : InstRW<[A64FXXWriteFSqrtDP], (instregex "^FSQRTDr")>;
1314
1315// FP round to integral
1316def : InstRW<[A64FXWrite_9Cyc_GI03],
1317            (instregex "^FRINT(A|I|M|N|P|X|Z)(Sr|Dr)")>;
1318
1319// FP select
1320def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FCSEL")>;
1321
1322//---
1323// 3.9 FP Miscellaneous Instructions
1324//---
1325
1326// FP convert, from vec to vec reg
1327// FP convert, from gen to vec reg
1328// FP convert, from vec to gen reg
1329def : WriteRes<WriteFCvt, [A64FXGI03]> {
1330  let Latency = 9;
1331  let ReleaseAtCycles = [2];
1332}
1333
1334// FP move, immed
1335// FP move, register
1336def : WriteRes<WriteFImm, [A64FXGI0]> {
1337  let Latency = 4;
1338  let ReleaseAtCycles = [2];
1339}
1340
1341// FP transfer, from gen to vec reg
1342// FP transfer, from vec to gen reg
1343def : WriteRes<WriteFCopy, [A64FXGI0]> {
1344  let Latency = 4;
1345  let ReleaseAtCycles = [2];
1346}
1347
1348def : InstRW<[A64FXWrite_FMOV_GV], (instrs FMOVXDHighr)>;
1349def : InstRW<[A64FXWrite_FMOV_VG14], (instrs FMOVDXHighr)>;
1350
1351//---
1352// 3.12 ASIMD Integer Instructions
1353//---
1354
1355// ASIMD absolute diff, D-form
1356// ASIMD absolute diff, Q-form
1357// ASIMD absolute diff accum, D-form
1358// ASIMD absolute diff accum, Q-form
1359// ASIMD absolute diff accum long
1360// ASIMD absolute diff long
1361// ASIMD arith, basic
1362// ASIMD arith, complex
1363// ASIMD compare
1364// ASIMD logical (AND, BIC, EOR)
1365// ASIMD max/min, basic
1366// ASIMD max/min, reduce, 4H/4S
1367// ASIMD max/min, reduce, 8B/8H
1368// ASIMD max/min, reduce, 16B
1369// ASIMD multiply, D-form
1370// ASIMD multiply, Q-form
1371// ASIMD multiply accumulate long
1372// ASIMD multiply accumulate saturating long
1373// ASIMD multiply long
1374// ASIMD pairwise add and accumulate
1375// ASIMD shift accumulate
1376// ASIMD shift by immed, basic
1377// ASIMD shift by immed and insert, basic, D-form
1378// ASIMD shift by immed and insert, basic, Q-form
1379// ASIMD shift by immed, complex
1380// ASIMD shift by register, basic, D-form
1381// ASIMD shift by register, basic, Q-form
1382// ASIMD shift by register, complex, D-form
1383// ASIMD shift by register, complex, Q-form
1384def : WriteRes<WriteVd, [A64FXGI03]> {
1385  let Latency = 4;
1386}
1387def : WriteRes<WriteVq, [A64FXGI03]> {
1388  let Latency = 4;
1389}
1390
1391// ASIMD arith, reduce, 4H/4S
1392// ASIMD arith, reduce, 8B/8H
1393// ASIMD arith, reduce, 16B
1394
1395// ASIMD logical (MVN (alias for NOT), ORN, ORR)
1396def : InstRW<[A64FXWrite_4Cyc_GI03],
1397            (instregex "^ANDv", "^BICv", "^EORv", "^ORRv", "^ORNv", "^NOTv")>;
1398
1399// ASIMD arith, reduce
1400def : InstRW<[A64FXWrite_ADDLV],
1401            (instregex "^ADDVv", "^SADDLVv", "^UADDLVv")>;
1402
1403// ASIMD polynomial (8x8) multiply long
1404def : InstRW<[A64FXWrite_MULLE], (instregex "^(S|U|SQD)MULL")>;
1405def : InstRW<[A64FXWrite_MULLV],
1406            (instregex "(S|U|SQD)(MLAL|MLSL|MULL)v.*")>;
1407def : InstRW<[A64FXWrite_8Cyc_GI03], (instregex "^PMULL(v8i8|v16i8)")>;
1408def : InstRW<[A64FXWrite_8Cyc_GI03], (instregex "^PMULL(v1i64|v2i64)")>;
1409
1410// ASIMD absolute diff accum, D-form
1411def : InstRW<[A64FXWrite_ABA],
1412            (instregex "^[SU]ABA(v8i8|v4i16|v2i32)$")>;
1413// ASIMD absolute diff accum, Q-form
1414def : InstRW<[A64FXWrite_ABA],
1415            (instregex "^[SU]ABA(v16i8|v8i16|v4i32)$")>;
1416// ASIMD absolute diff accum long
1417def : InstRW<[A64FXWrite_ABAL],
1418            (instregex "^[SU]ABAL")>;
1419// ASIMD arith, reduce, 4H/4S
1420def : InstRW<[A64FXWrite_ADDLV1],
1421            (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>;
1422// ASIMD arith, reduce, 8B
1423def : InstRW<[A64FXWrite_ADDLV1],
1424            (instregex "^[SU]?ADDL?V(v8i16|v4i32)v$")>;
1425// ASIMD arith, reduce, 16B/16H
1426def : InstRW<[A64FXWrite_ADDLV1],
1427            (instregex "^[SU]?ADDL?Vv16i8v$")>;
1428// ASIMD max/min, reduce, 4H/4S
1429def : InstRW<[A64FXWrite_MINMAXV],
1430            (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v$")>;
1431// ASIMD max/min, reduce, 8B/8H
1432def : InstRW<[A64FXWrite_MINMAXV],
1433            (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")>;
1434// ASIMD max/min, reduce, 16B/16H
1435def : InstRW<[A64FXWrite_MINMAXV],
1436            (instregex "^[SU](MIN|MAX)Vv16i8v$")>;
1437// ASIMD multiply, D-form
1438def : InstRW<[A64FXWrite_PMUL],
1439            (instregex "^(P?MUL|SQR?DMUL)" #
1440                       "(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)" #
1441                       "(_indexed)?$")>;
1442
1443// ASIMD multiply, Q-form
1444def : InstRW<[A64FXWrite_PMUL],
1445            (instregex "^(P?MUL)(v16i8|v8i16|v4i32)(_indexed)?$")>;
1446
1447// ASIMD multiply, Q-form
1448def : InstRW<[A64FXWrite_SQRDMULH],
1449            (instregex "^(SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>;
1450
1451// ASIMD multiply accumulate, D-form
1452def : InstRW<[A64FXWrite_9Cyc_GI03],
1453            (instregex "^ML[AS](v8i8|v4i16|v2i32)(_indexed)?$")>;
1454// ASIMD multiply accumulate, Q-form
1455def : InstRW<[A64FXWrite_9Cyc_GI03],
1456            (instregex "^ML[AS](v16i8|v8i16|v4i32)(_indexed)?$")>;
1457// ASIMD shift accumulate
1458def : InstRW<[A64FXWrite_SRSRAV],
1459            (instregex "SRSRAv", "URSRAv")>;
1460def : InstRW<[A64FXWrite_SSRAV],
1461            (instregex "SSRAv", "USRAv")>;
1462
1463// ASIMD shift by immed, basic
1464def : InstRW<[A64FXWrite_RSHRN],
1465            (instregex "RSHRNv", "SQRSHRNv", "SQRSHRUNv", "UQRSHRNv")>;
1466def : InstRW<[A64FXWrite_SHRN],
1467            (instregex "SHRNv", "SQSHRNv", "SQSHRUNv", "UQSHRNv")>;
1468
1469def : InstRW<[A64FXWrite_6Cyc_GI3],
1470            (instregex "SQXTNv", "SQXTUNv", "UQXTNv")>;
1471
1472// ASIMD shift by immed, complex
1473def : InstRW<[A64FXWrite_ABA], (instregex "^[SU]?(Q|R){1,2}SHR")>;
1474def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^SQSHLU")>;
1475// ASIMD shift by register, basic, Q-form
1476def : InstRW<[A64FXWrite_6Cyc_GI3],
1477            (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>;
1478// ASIMD shift by register, complex, D-form
1479def : InstRW<[A64FXWrite_6Cyc_GI3],
1480            (instregex "^[SU][QR]{1,2}SHL" #
1481                       "(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32|b|d|h|s)")>;
1482// ASIMD shift by register, complex, Q-form
1483def : InstRW<[A64FXWrite_6Cyc_GI3],
1484            (instregex "^[SU][QR]{1,2}SHL(v16i8|v8i16|v4i32|v2i64)")>;
1485
1486// ASIMD Arithmetic
1487def : InstRW<[A64FXWrite_4Cyc_GI03],
1488            (instregex "(ADD|SUB)(v8i8|v4i16|v2i32|v1i64)")>;
1489def : InstRW<[A64FXWrite_4Cyc_GI03],
1490            (instregex "(ADD|SUB)(v16i8|v8i16|v4i32|v2i64)")>;
1491def : InstRW<[A64FXWrite_SHRN], (instregex "(ADD|SUB)HNv.*")>;
1492def : InstRW<[A64FXWrite_RSHRN], (instregex "(RADD|RSUB)HNv.*")>;
1493def : InstRW<[A64FXWrite_4Cyc_GI03],
1494            (instregex "^SQADD", "^SQNEG", "^SQSUB", "^SRHADD",
1495                       "^SUQADD", "^UQADD", "^UQSUB", "^URHADD", "^USQADD")>;
1496def : InstRW<[A64FXWrite_ADDP],
1497            (instregex "ADDP(v16i8|v8i16|v4i32|v2i64)")>;
1498def : InstRW<[A64FXWrite_4Cyc_GI03],
1499            (instregex "((AND|ORN|EOR|EON)S?(Xr[rsi]|v16i8|v8i16|v4i32)|" #
1500                       "(ORR|BIC)S?(Xr[rs]|v16i8|v8i16|v4i32))")>;
1501def : InstRW<[A64FXWrite_4Cyc_GI0],
1502            (instregex "(CLS|CLZ|CNT)(v4i32|v8i16|v16i8)")>;
1503def : InstRW<[A64FXWrite_SADALP], (instregex "^SADALP", "^UADALP")>;
1504def : InstRW<[A64FXWrite_SADDLP], (instregex "^SADDLPv", "^UADDLPv")>;
1505def : InstRW<[A64FXWrite_ADDLV1], (instregex "^SADDLV", "^UADDLV")>;
1506def : InstRW<[A64FXWrite_MINMAXV],
1507             (instregex "^ADDVv", "^SMAXVv", "^UMAXVv", "^SMINVv", "^UMINVv")>;
1508def : InstRW<[A64FXWrite_ABA],
1509             (instregex "^SABAv", "^UABAv", "^SABALv", "^UABALv")>;
1510def : InstRW<[A64FXWrite_4Cyc_GI03],
1511            (instregex "^SQADDv", "^SQSUBv", "^UQADDv", "^UQSUBv")>;
1512def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^SUQADDv", "^USQADDv")>;
1513def : InstRW<[A64FXWrite_SHRN],
1514            (instregex "^ADDHNv", "^SUBHNv")>;
1515def : InstRW<[A64FXWrite_RSHRN],
1516            (instregex "^RADDHNv", "^RSUBHNv")>;
1517def : InstRW<[A64FXWrite_4Cyc_GI03],
1518            (instregex "^SQABS", "^SQADD", "^SQNEG", "^SQSUB",
1519                       "^SRHADD", "^SUQADD", "^UQADD", "^UQSUB",
1520                      "^URHADD", "^USQADD")>;
1521
1522def : InstRW<[A64FXWrite_4Cyc_GI03],
1523            (instregex "^CMEQv", "^CMGEv", "^CMGTv",
1524                       "^CMLEv", "^CMLTv", "^CMHIv", "^CMHSv")>;
1525def : InstRW<[A64FXWrite_MINMAXV],
1526            (instregex "^SMAXv", "^SMINv", "^UMAXv", "^UMINv")>;
1527def : InstRW<[A64FXWrite_ADDP],
1528            (instregex "^SMAXPv", "^SMINPv", "^UMAXPv", "^UMINPv")>;
1529def : InstRW<[A64FXWrite_4Cyc_GI03],
1530            (instregex "^SABDv", "^UABDv")>;
1531def : InstRW<[A64FXWrite_TBX1],
1532            (instregex "^SABDLv", "^UABDLv")>;
1533
1534//---
1535// 3.13 ASIMD Floating-point Instructions
1536//---
1537
1538def : WriteRes<WriteFMul, [A64FXGI03]> {
1539  let Latency = 9;
1540}
1541
1542// ASIMD FP absolute value
1543def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FABSv")>;
1544
1545// ASIMD FP arith, normal, D-form
1546// ASIMD FP arith, normal, Q-form
1547def : InstRW<[A64FXWrite_9Cyc_GI03],
1548            (instregex "^FABDv", "^FADDv", "^FSUBv")>;
1549
1550// ASIMD FP arith, pairwise, D-form
1551// ASIMD FP arith, pairwise, Q-form
1552def : InstRW<[A64FXWrite_FADDPV], (instregex "^FADDPv")>;
1553
1554// ASIMD FP compare, D-form
1555// ASIMD FP compare, Q-form
1556def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FACGEv", "^FACGTv")>;
1557def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FCMEQv", "^FCMGEv",
1558                                                 "^FCMGTv", "^FCMLEv",
1559                                                 "^FCMLTv")>;
1560// ASIMD FP round, D-form
1561def : InstRW<[A64FXWrite_9Cyc_GI03],
1562            (instregex "^FRINT[AIMNPXZ](v2f32)")>;
1563// ASIMD FP round, Q-form
1564def : InstRW<[A64FXWrite_9Cyc_GI03],
1565            (instregex "^FRINT[AIMNPXZ](v4f32|v2f64)")>;
1566
1567// ASIMD FP convert, long
1568// ASIMD FP convert, narrow
1569// ASIMD FP convert, other, D-form
1570// ASIMD FP convert, other, Q-form
1571
1572// ASIMD FP convert, long and narrow
1573def : InstRW<[A64FXWrite_FCVTXNV], (instregex "^FCVT(L|N|XN)v")>;
1574// ASIMD FP convert, other, D-form
1575def : InstRW<[A64FXWrite_FCVTXNV],
1576      (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v2f32|v1i32|v2i32|v1i64)")>;
1577// ASIMD FP convert, other, Q-form
1578def : InstRW<[A64FXWrite_FCVTXNV],
1579      (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v4f32|v2f64|v4i32|v2i64)")>;
1580
1581// ASIMD FP divide, D-form, F32
1582def : InstRW<[A64FXXWriteFDivSP], (instrs FDIVv2f32)>;
1583def : InstRW<[A64FXXWriteFDivSP], (instregex "FDIVv2f32")>;
1584
1585// ASIMD FP divide, Q-form, F32
1586def : InstRW<[A64FXXWriteFDiv], (instrs FDIVv4f32)>;
1587def : InstRW<[A64FXXWriteFDiv], (instregex "FDIVv4f32")>;
1588
1589// ASIMD FP divide, Q-form, F64
1590def : InstRW<[A64FXXWriteFDivDP], (instrs FDIVv2f64)>;
1591def : InstRW<[A64FXXWriteFDivDP], (instregex "FDIVv2f64")>;
1592
1593// ASIMD FP max/min, normal, D-form
1594// ASIMD FP max/min, normal, Q-form
1595def : InstRW<[A64FXWrite_4Cyc_GI0], (instregex "^FMAXv", "^FMAXNMv",
1596                                               "^FMINv", "^FMINNMv")>;
1597
1598// ASIMD FP max/min, pairwise, D-form
1599// ASIMD FP max/min, pairwise, Q-form
1600def : InstRW<[A64FXWrite_ADDP], (instregex "^FMAXPv", "^FMAXNMPv",
1601                                           "^FMINPv", "^FMINNMPv")>;
1602
1603// ASIMD FP max/min, reduce
1604def : InstRW<[A64FXWrite_FMAXVVH], (instregex "^FMAXVv", "^FMAXNMVv",
1605                                              "^FMINVv", "^FMINNMVv")>;
1606
1607// ASIMD FP multiply, D-form, FZ
1608// ASIMD FP multiply, D-form, no FZ
1609// ASIMD FP multiply, Q-form, FZ
1610// ASIMD FP multiply, Q-form, no FZ
1611def : InstRW<[A64FXWrite_9Cyc_GI03], (instregex "^FMULv", "^FMULXv")>;
1612def : InstRW<[A64FXWrite_FMULXE],
1613            (instregex "^FMULX?(v2f32|v1i32|v2i32|v1i64|32|64)")>;
1614def : InstRW<[A64FXWrite_FMULXE],
1615            (instregex "^FMULX?(v4f32|v2f64|v4i32|v2i64)")>;
1616
1617// ASIMD FP multiply accumulate, Dform, FZ
1618// ASIMD FP multiply accumulate, Dform, no FZ
1619// ASIMD FP multiply accumulate, Qform, FZ
1620// ASIMD FP multiply accumulate, Qform, no FZ
1621def : InstRW<[A64FXWrite_9Cyc_GI03], (instregex "^FMLAv", "^FMLSv")>;
1622def : InstRW<[A64FXWrite_FMULXE],
1623            (instregex "^FML[AS](v2f32|v1i32|v2i32|v1i64)")>;
1624def : InstRW<[A64FXWrite_FMULXE],
1625            (instregex "^FML[AS](v4f32|v2f64|v4i32|v2i64)")>;
1626
1627// ASIMD FP negate
1628def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FNEGv")>;
1629
1630//--
1631// 3.14 ASIMD Miscellaneous Instructions
1632//--
1633
1634// ASIMD bit reverse
1635def : InstRW<[A64FXWrite_1Cyc_GI2456], (instregex "^RBITv")>;
1636
1637// ASIMD bitwise insert, D-form
1638// ASIMD bitwise insert, Q-form
1639def : InstRW<[A64FXWrite_BIF],
1640            (instregex "^BIFv", "^BITv", "^BSLv")>;
1641
1642// ASIMD count, D-form
1643// ASIMD count, Q-form
1644def : InstRW<[A64FXWrite_4Cyc_GI0],
1645            (instregex "^CLSv", "^CLZv", "^CNTv")>;
1646
1647// ASIMD duplicate, gen reg
1648// ASIMD duplicate, element
1649def : InstRW<[A64FXWrite_DUPGENERAL], (instregex "^DUPv")>;
1650def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^DUP(i8|i16|i32|i64)$")>;
1651def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^DUPv.+gpr")>;
1652
1653// ASIMD extract
1654def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^EXTv")>;
1655
1656// ASIMD extract narrow
1657def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^XTNv")>;
1658
1659// ASIMD extract narrow, saturating
1660def : InstRW<[A64FXWrite_6Cyc_GI3],
1661            (instregex "^SQXTNv", "^SQXTUNv", "^UQXTNv")>;
1662
1663// ASIMD insert, element to element
1664def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^INSv")>;
1665
1666// ASIMD transfer, element to gen reg
1667def : InstRW<[A64FXWrite_SMOV], (instregex "^[SU]MOVv")>;
1668
1669// ASIMD move, integer immed
1670def : InstRW<[A64FXWrite_4Cyc_GI0], (instregex "^MOVIv")>;
1671
1672// ASIMD move, FP immed
1673def : InstRW<[A64FXWrite_4Cyc_GI0], (instregex "^FMOVv")>;
1674
1675// ASIMD table lookup, D-form
1676def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^TBLv8i8One")>;
1677def : InstRW<[A64FXWrite_TBX1], (instregex "^TBLv8i8Two")>;
1678def : InstRW<[A64FXWrite_TBX2], (instregex "^TBLv8i8Three")>;
1679def : InstRW<[A64FXWrite_TBX3], (instregex "^TBLv8i8Four")>;
1680def : InstRW<[A64FXWrite_TBX1], (instregex "^TBXv8i8One")>;
1681def : InstRW<[A64FXWrite_TBX2], (instregex "^TBXv8i8Two")>;
1682def : InstRW<[A64FXWrite_TBX3], (instregex "^TBXv8i8Three")>;
1683def : InstRW<[A64FXWrite_TBX4], (instregex "^TBXv8i8Four")>;
1684
1685// ASIMD table lookup, Q-form
1686def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^TBLv16i8One")>;
1687def : InstRW<[A64FXWrite_TBX1], (instregex "^TBLv16i8Two")>;
1688def : InstRW<[A64FXWrite_TBX2], (instregex "^TBLv16i8Three")>;
1689def : InstRW<[A64FXWrite_TBX3], (instregex "^TBLv16i8Four")>;
1690def : InstRW<[A64FXWrite_TBX1], (instregex "^TBXv16i8One")>;
1691def : InstRW<[A64FXWrite_TBX2], (instregex "^TBXv16i8Two")>;
1692def : InstRW<[A64FXWrite_TBX3], (instregex "^TBXv16i8Three")>;
1693def : InstRW<[A64FXWrite_TBX4], (instregex "^TBXv16i8Four")>;
1694
1695// ASIMD unzip/zip
1696def : InstRW<[A64FXWrite_6Cyc_GI0],
1697            (instregex "^UZP1", "^UZP2", "^ZIP1", "^ZIP2")>;
1698
1699// ASIMD reciprocal estimate, D-form
1700// ASIMD reciprocal estimate, Q-form
1701def : InstRW<[A64FXWrite_4Cyc_GI03],
1702            (instregex "^FRECPEv", "^FRECPXv", "^URECPEv",
1703                       "^FRSQRTEv", "^URSQRTEv")>;
1704
1705// ASIMD reciprocal step, D-form, FZ
1706// ASIMD reciprocal step, D-form, no FZ
1707// ASIMD reciprocal step, Q-form, FZ
1708// ASIMD reciprocal step, Q-form, no FZ
1709def : InstRW<[A64FXWrite_9Cyc_GI0], (instregex "^FRECPSv", "^FRSQRTSv")>;
1710
1711// ASIMD reverse
1712def : InstRW<[A64FXWrite_4Cyc_GI03],
1713            (instregex "^REV16v", "^REV32v", "^REV64v")>;
1714
1715// ASIMD table lookup, D-form
1716// ASIMD table lookup, Q-form
1717def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^TBLv", "^TBXv")>;
1718
1719// ASIMD transfer, element to word or word
1720def : InstRW<[A64FXWrite_SMOV], (instregex "^[SU]MOVv")>;
1721
1722// ASIMD transfer, element to gen reg
1723def : InstRW<[A64FXWrite_SMOV], (instregex "(S|U)MOVv.*")>;
1724
1725// ASIMD transfer gen reg to element
1726def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^INSv")>;
1727
1728// ASIMD transpose
1729def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^TRN1v", "^TRN2v",
1730                                                 "^UZP1v", "^UZP2v")>;
1731
1732// ASIMD unzip/zip
1733def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^ZIP1v", "^ZIP2v")>;
1734
1735//--
1736// 3.15 ASIMD Load Instructions
1737//--
1738
1739// ASIMD load, 1 element, multiple, 1 reg, D-form
1740// ASIMD load, 1 element, multiple, 1 reg, Q-form
1741def : InstRW<[A64FXWrite_8Cyc_GI56],
1742            (instregex "^LD1Onev(8b|4h|2s|1d|2d)$")>;
1743def : InstRW<[A64FXWrite_11Cyc_GI56],
1744            (instregex "^LD1Onev(16b|8h|4s)$")>;
1745def : InstRW<[A64FXWrite_LD108, WriteAdr],
1746            (instregex "^LD1Onev(8b|4h|2s|1d|2d)_POST$")>;
1747def : InstRW<[A64FXWrite_LD109, WriteAdr],
1748            (instregex "^LD1Onev(16b|8h|4s)_POST$")>;
1749
1750// ASIMD load, 1 element, multiple, 2 reg, D-form
1751// ASIMD load, 1 element, multiple, 2 reg, Q-form
1752def : InstRW<[A64FXWrite_LD102],
1753            (instregex "^LD1Twov(8b|4h|2s|1d|2d)$")>;
1754def : InstRW<[A64FXWrite_LD103],
1755            (instregex "^LD1Twov(16b|8h|4s)$")>;
1756def : InstRW<[A64FXWrite_LD110, WriteAdr],
1757            (instregex "^LD1Twov(8b|4h|2s|1d|2d)_POST$")>;
1758def : InstRW<[A64FXWrite_LD111, WriteAdr],
1759            (instregex "^LD1Twov(16b|8h|4s)_POST$")>;
1760
1761// ASIMD load, 1 element, multiple, 3 reg, D-form
1762// ASIMD load, 1 element, multiple, 3 reg, Q-form
1763def : InstRW<[A64FXWrite_LD104],
1764            (instregex "^LD1Threev(8b|4h|2s|1d|2d)$")>;
1765def : InstRW<[A64FXWrite_LD105],
1766            (instregex "^LD1Threev(16b|8h|4s)$")>;
1767def : InstRW<[A64FXWrite_LD112, WriteAdr],
1768            (instregex "^LD1Threev(8b|4h|2s|1d|2d)_POST$")>;
1769def : InstRW<[A64FXWrite_LD113, WriteAdr],
1770            (instregex "^LD1Threev(16b|8h|4s)_POST$")>;
1771
1772// ASIMD load, 1 element, multiple, 4 reg, D-form
1773// ASIMD load, 1 element, multiple, 4 reg, Q-form
1774def : InstRW<[A64FXWrite_LD106],
1775            (instregex "^LD1Fourv(8b|4h|2s|1d|2d)$")>;
1776def : InstRW<[A64FXWrite_LD107],
1777            (instregex "^LD1Fourv(16b|8h|4s)$")>;
1778def : InstRW<[A64FXWrite_LD114, WriteAdr],
1779            (instregex "^LD1Fourv(8b|4h|2s|1d|2d)_POST$")>;
1780def : InstRW<[A64FXWrite_LD115, WriteAdr],
1781            (instregex "^LD1Fourv(16b|8h|4s)_POST$")>;
1782
1783// ASIMD load, 1 element, one lane, B/H/S
1784// ASIMD load, 1 element, one lane, D
1785def : InstRW<[A64FXWrite_LD1I0], (instregex "^LD1i(8|16|32|64)$")>;
1786def : InstRW<[A64FXWrite_LD1I1, WriteAdr],
1787            (instregex "^LD1i(8|16|32|64)_POST$")>;
1788
1789// ASIMD load, 1 element, all lanes, D-form, B/H/S
1790// ASIMD load, 1 element, all lanes, D-form, D
1791// ASIMD load, 1 element, all lanes, Q-form
1792def : InstRW<[A64FXWrite_8Cyc_GI03],
1793            (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
1794def : InstRW<[A64FXWrite_LD108, WriteAdr],
1795            (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
1796
1797// ASIMD load, 2 element, multiple, D-form, B/H/S
1798// ASIMD load, 2 element, multiple, Q-form, D
1799def : InstRW<[A64FXWrite_LD103],
1800            (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)$")>;
1801def : InstRW<[A64FXWrite_LD111, WriteAdr],
1802            (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
1803
1804// ASIMD load, 2 element, one lane, B/H
1805// ASIMD load, 2 element, one lane, S
1806// ASIMD load, 2 element, one lane, D
1807def : InstRW<[A64FXWrite_LD2I0], (instregex "^LD2i(8|16|32|64)$")>;
1808def : InstRW<[A64FXWrite_LD2I1, WriteAdr],
1809            (instregex "^LD2i(8|16|32|64)_POST$")>;
1810
1811// ASIMD load, 2 element, all lanes, D-form, B/H/S
1812// ASIMD load, 2 element, all lanes, D-form, D
1813// ASIMD load, 2 element, all lanes, Q-form
1814def : InstRW<[A64FXWrite_LD102],
1815            (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
1816def : InstRW<[A64FXWrite_LD110, WriteAdr],
1817            (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
1818
1819// ASIMD load, 3 element, multiple, D-form, B/H/S
1820// ASIMD load, 3 element, multiple, Q-form, B/H/S
1821// ASIMD load, 3 element, multiple, Q-form, D
1822def : InstRW<[A64FXWrite_LD105],
1823            (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)$")>;
1824def : InstRW<[A64FXWrite_LD113, WriteAdr],
1825            (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
1826
1827// ASIMD load, 3 element, one lone, B/H
1828// ASIMD load, 3 element, one lane, S
1829// ASIMD load, 3 element, one lane, D
1830def : InstRW<[A64FXWrite_LD3I0], (instregex "^LD3i(8|16|32|64)$")>;
1831def : InstRW<[A64FXWrite_LD3I1, WriteAdr],
1832            (instregex "^LD3i(8|16|32|64)_POST$")>;
1833
1834// ASIMD load, 3 element, all lanes, D-form, B/H/S
1835// ASIMD load, 3 element, all lanes, D-form, D
1836// ASIMD load, 3 element, all lanes, Q-form, B/H/S
1837// ASIMD load, 3 element, all lanes, Q-form, D
1838def : InstRW<[A64FXWrite_LD104],
1839            (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
1840def : InstRW<[A64FXWrite_LD112, WriteAdr],
1841            (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
1842
1843// ASIMD load, 4 element, multiple, D-form, B/H/S
1844// ASIMD load, 4 element, multiple, Q-form, B/H/S
1845// ASIMD load, 4 element, multiple, Q-form, D
1846def : InstRW<[A64FXWrite_LD107],
1847            (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>;
1848def : InstRW<[A64FXWrite_LD115, WriteAdr],
1849            (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
1850
1851// ASIMD load, 4 element, one lane, B/H
1852// ASIMD load, 4 element, one lane, S
1853// ASIMD load, 4 element, one lane, D
1854def : InstRW<[A64FXWrite_LD4I0], (instregex "^LD4i(8|16|32|64)$")>;
1855def : InstRW<[A64FXWrite_LD4I1, WriteAdr],
1856            (instregex "^LD4i(8|16|32|64)_POST$")>;
1857
1858// ASIMD load, 4 element, all lanes, D-form, B/H/S
1859// ASIMD load, 4 element, all lanes, D-form, D
1860// ASIMD load, 4 element, all lanes, Q-form, B/H/S
1861// ASIMD load, 4 element, all lanes, Q-form, D
1862def : InstRW<[A64FXWrite_LD106],
1863            (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
1864def : InstRW<[A64FXWrite_LD114, WriteAdr],
1865            (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
1866
1867//--
1868// 3.16 ASIMD Store Instructions
1869//--
1870
1871// ASIMD store, 1 element, multiple, 1 reg, D-form
1872// ASIMD store, 1 element, multiple, 1 reg, Q-form
1873def : InstRW<[A64FXWrite_ST10],
1874            (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
1875def : InstRW<[A64FXWrite_ST14, WriteAdr],
1876            (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
1877
1878// ASIMD store, 1 element, multiple, 2 reg, D-form
1879// ASIMD store, 1 element, multiple, 2 reg, Q-form
1880def : InstRW<[A64FXWrite_ST11],
1881            (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
1882def : InstRW<[A64FXWrite_ST15, WriteAdr],
1883            (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
1884
1885// ASIMD store, 1 element, multiple, 3 reg, D-form
1886// ASIMD store, 1 element, multiple, 3 reg, Q-form
1887def : InstRW<[A64FXWrite_ST12],
1888            (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
1889def : InstRW<[A64FXWrite_ST16, WriteAdr],
1890            (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
1891
1892// ASIMD store, 1 element, multiple, 4 reg, D-form
1893// ASIMD store, 1 element, multiple, 4 reg, Q-form
1894def : InstRW<[A64FXWrite_ST13],
1895            (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
1896def : InstRW<[A64FXWrite_ST17, WriteAdr],
1897            (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
1898
1899// ASIMD store, 1 element, one lane, B/H/S
1900// ASIMD store, 1 element, one lane, D
1901def : InstRW<[A64FXWrite_ST10],
1902            (instregex "^ST1i(8|16|32|64)$")>;
1903def : InstRW<[A64FXWrite_ST14, WriteAdr],
1904            (instregex "^ST1i(8|16|32|64)_POST$")>;
1905
1906// ASIMD store, 2 element, multiple, D-form, B/H/S
1907// ASIMD store, 2 element, multiple, Q-form, B/H/S
1908// ASIMD store, 2 element, multiple, Q-form, D
1909def : InstRW<[A64FXWrite_ST11],
1910            (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)$")>;
1911def : InstRW<[A64FXWrite_ST15, WriteAdr],
1912            (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
1913
1914// ASIMD store, 2 element, one lane, B/H/S
1915// ASIMD store, 2 element, one lane, D
1916def : InstRW<[A64FXWrite_ST11],
1917            (instregex "^ST2i(8|16|32|64)$")>;
1918def : InstRW<[A64FXWrite_ST15, WriteAdr],
1919            (instregex "^ST2i(8|16|32|64)_POST$")>;
1920
1921// ASIMD store, 3 element, multiple, D-form, B/H/S
1922// ASIMD store, 3 element, multiple, Q-form, B/H/S
1923// ASIMD store, 3 element, multiple, Q-form, D
1924def : InstRW<[A64FXWrite_ST12],
1925            (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)$")>;
1926def : InstRW<[A64FXWrite_ST16, WriteAdr],
1927            (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
1928
1929// ASIMD store, 3 element, one lane, B/H
1930// ASIMD store, 3 element, one lane, S
1931// ASIMD store, 3 element, one lane, D
1932def : InstRW<[A64FXWrite_ST12], (instregex "^ST3i(8|16|32|64)$")>;
1933def : InstRW<[A64FXWrite_ST16, WriteAdr],
1934            (instregex "^ST3i(8|16|32|64)_POST$")>;
1935
1936// ASIMD store, 4 element, multiple, D-form, B/H/S
1937// ASIMD store, 4 element, multiple, Q-form, B/H/S
1938// ASIMD store, 4 element, multiple, Q-form, D
1939def : InstRW<[A64FXWrite_ST13],
1940            (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>;
1941def : InstRW<[A64FXWrite_ST17, WriteAdr],
1942            (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
1943
1944// ASIMD store, 4 element, one lane, B/H
1945// ASIMD store, 4 element, one lane, S
1946// ASIMD store, 4 element, one lane, D
1947def : InstRW<[A64FXWrite_ST13], (instregex "^ST4i(8|16|32|64)$")>;
1948def : InstRW<[A64FXWrite_ST17, WriteAdr],
1949            (instregex "^ST4i(8|16|32|64)_POST$")>;
1950
1951// V8.1a Atomics (LSE)
1952def : InstRW<[A64FXWrite_CAS, WriteAtomic],
1953            (instrs CASB, CASH, CASW, CASX)>;
1954
1955def : InstRW<[A64FXWrite_CAS, WriteAtomic],
1956            (instrs CASAB, CASAH, CASAW, CASAX)>;
1957
1958def : InstRW<[A64FXWrite_CAS, WriteAtomic],
1959            (instrs CASLB, CASLH, CASLW, CASLX)>;
1960
1961def : InstRW<[A64FXWrite_CAS, WriteAtomic],
1962            (instrs CASALB, CASALH, CASALW, CASALX)>;
1963
1964def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1965            (instrs LDLARB, LDLARH, LDLARW, LDLARX)>;
1966
1967def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1968            (instrs LDADDB, LDADDH, LDADDW, LDADDX)>;
1969
1970def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1971            (instrs LDADDAB, LDADDAH, LDADDAW, LDADDAX)>;
1972
1973def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1974            (instrs LDADDLB, LDADDLH, LDADDLW, LDADDLX)>;
1975
1976def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1977            (instrs LDADDALB, LDADDALH, LDADDALW, LDADDALX)>;
1978
1979def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1980            (instrs LDCLRB, LDCLRH, LDCLRW, LDCLRX)>;
1981
1982def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1983            (instrs LDCLRAB, LDCLRAH, LDCLRAW, LDCLRAX)>;
1984
1985def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1986            (instrs LDCLRLB, LDCLRLH, LDCLRLW, LDCLRLX)>;
1987
1988def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1989            (instrs LDCLRALB, LDCLRALH, LDCLRALW, LDCLRALX)>;
1990
1991def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1992            (instrs LDEORB, LDEORH, LDEORW, LDEORX)>;
1993
1994def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1995            (instrs LDEORAB, LDEORAH, LDEORAW, LDEORAX)>;
1996
1997def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1998            (instrs LDEORLB, LDEORLH, LDEORLW, LDEORLX)>;
1999
2000def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
2001            (instrs LDEORALB, LDEORALH, LDEORALW, LDEORALX)>;
2002
2003def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
2004            (instrs LDSETB, LDSETH, LDSETW, LDSETX)>;
2005
2006def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
2007            (instrs LDSETAB, LDSETAH, LDSETAW, LDSETAX)>;
2008
2009def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
2010            (instrs LDSETLB, LDSETLH, LDSETLW, LDSETLX)>;
2011
2012def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
2013            (instrs LDSETALB, LDSETALH, LDSETALW, LDSETALX)>;
2014
2015def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
2016            (instrs LDSMAXB, LDSMAXH, LDSMAXW, LDSMAXX,
2017             LDSMAXAB, LDSMAXAH, LDSMAXAW, LDSMAXAX,
2018             LDSMAXLB, LDSMAXLH, LDSMAXLW, LDSMAXLX,
2019             LDSMAXALB, LDSMAXALH, LDSMAXALW, LDSMAXALX)>;
2020
2021def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
2022            (instrs LDSMINB, LDSMINH, LDSMINW, LDSMINX,
2023             LDSMINAB, LDSMINAH, LDSMINAW, LDSMINAX,
2024             LDSMINLB, LDSMINLH, LDSMINLW, LDSMINLX,
2025             LDSMINALB, LDSMINALH, LDSMINALW, LDSMINALX)>;
2026
2027def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
2028            (instrs LDUMAXB, LDUMAXH, LDUMAXW, LDUMAXX,
2029             LDUMAXAB, LDUMAXAH, LDUMAXAW, LDUMAXAX,
2030             LDUMAXLB, LDUMAXLH, LDUMAXLW, LDUMAXLX,
2031             LDUMAXALB, LDUMAXALH, LDUMAXALW, LDUMAXALX)>;
2032
2033def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
2034            (instrs LDUMINB, LDUMINH, LDUMINW, LDUMINX,
2035             LDUMINAB, LDUMINAH, LDUMINAW, LDUMINAX,
2036             LDUMINLB, LDUMINLH, LDUMINLW, LDUMINLX,
2037             LDUMINALB, LDUMINALH, LDUMINALW, LDUMINALX)>;
2038
2039def : InstRW<[A64FXWrite_SWP, WriteAtomic],
2040            (instrs SWPB, SWPH, SWPW, SWPX)>;
2041
2042def : InstRW<[A64FXWrite_SWP, WriteAtomic],
2043            (instrs SWPAB, SWPAH, SWPAW, SWPAX)>;
2044
2045def : InstRW<[A64FXWrite_SWP, WriteAtomic],
2046            (instrs SWPLB, SWPLH, SWPLW, SWPLX)>;
2047
2048def : InstRW<[A64FXWrite_SWP, WriteAtomic],
2049            (instrs SWPALB, SWPALH, SWPALW, SWPALX)>;
2050
2051def : InstRW<[A64FXWrite_STUR, WriteAtomic],
2052            (instrs STLLRB, STLLRH, STLLRW, STLLRX)>;
2053
2054// SVE instructions
2055
2056// The modeling method for SVE instructions is more accurate than others.
2057// TODO: modify the model of other instructions similarly.
2058
2059def : InstRW<[A64FXWrite_4Cyc_GI0],
2060            (instregex "^AND_ZI", "^CL[SZ]_Z", "^CPY_ZP[mz]I", "^DUP_ZZ?I", "^DUPM_Z",
2061                       "^EOR_ZI", "^ORR_ZI", "^FCM(EQ|GT|GE|LT|LE|NE|UO)_P",
2062                       "^FCPY_Z", "^F(MAX|MIN).*I_", "^NEG_Z", "^[SU](MAX|MIN)_ZI",
2063                       "^SUBR?_ZI")>;
2064
2065def : InstRW<[A64FXWrite_6Cyc_GI0],
2066            (instregex "^CLAST[AB]_[VZ]", "^COMPACT_Z", "^CPY_ZPmV", "^DUP_ZR",
2067                       "^EXT_Z", "^FDUP_Z", "^INSR_ZV", "^LAST[AB]_V", "^REV_Z",
2068                       "^SPLICE_Z", "^[SU]UNPK(HI|LO)_Z", "^TBL_Z", "^TRN[12]_Z")>;
2069
2070def : InstRW<[A64FXWrite_9Cyc_GI0],
2071            (instregex "^F(ADD|SUBR?)_.*I_", "^FRECPS_Z", "^FRSQRTS_Z",
2072                       "^INDEX_II_[SD]", "^MUL_ZI")>;
2073
2074def : InstRW<[A64FXWrite_4Cyc_GI3],
2075            (instregex "^CNT_Z")>;
2076
2077def : InstRW<[A64FXWrite_4Cyc_GI03],
2078            (instregex "^ABS_Z", "^ADD_Z", "^AND_Z[^I]", "^ASRR?_(WIDE_)?Z",
2079                       "^BIC_Z", "^ADR_[SU]XTW_Z", "^CNOT_Z", "^DEC[BHWD]_Z",
2080                       "^EOR_Z[^I]", "^INC[BHWD]_Z", "^ORR_Z[^I]", "^FABS_Z",
2081                       "^FACG[ET]_P", "^FEXPA_Z", "^F(MAX|MIN)[^V]*Z_",
2082                       "^FNEG_Z", "^FRECP[EX]_Z", "^FRSQRTE_Z", "^FTSSEL_Z",
2083                       "^LS[LR]R?(_WIDE)?_Z", "^NOT_Z", "^RBIT_Z", "^REV[BHW]_Z", "^SABD_Z",
2084                       "^SEL_Z", "^[SU](MAX|MIN)_ZP", "^[SU]Q(INC|DEC)[^P]_Z",
2085                       "^SUBR?_Z[^I]", "^[SU]XT._Z", "^UABD_Z")>;
2086
2087def : InstRW<[A64FXWrite_9Cyc_GI03      ],
2088            (instregex "^FABD_Z", "^F(ADD|SUBR?)_.*Z_", "^FN?(MAD|MLA|MLS|MSB)_ZP",
2089                       "^FMUL_(ZP|ZZZ_)", "^FMULX_Z", "^FCVT(ZS|ZU)?_Z",
2090                       "^FRINT._Z", "^FSCALE_Z", "^FTMAD_Z", "^FTSMUL_Z",
2091                       "^MAD_Z", "^MLA_Z", "^MLS_Z", "^MSB_Z", "^MUL_ZP",
2092                       "^[SU]CVTF_Z", "^[SU]DOT_ZZZ_", "^[SU]MULH_Z")>;
2093
2094def : InstRW<[A64FXWrite_3Cyc_GI1],
2095            (instregex "^ANDS?_P", "^BICS?_P", "^BRK.*_P", "^EORS?_P", "^ORRS?_P",
2096                       "^NANDS?_P", "^NORS?_P", "^ORNS?_P", "^PFALSE", "^PNEXT",
2097                       "^PFIRST", "^PTEST", "^PTRUES?", "^PUNPK(HI|LO)",
2098                       "^RDFFRS?", "^REV_P", "^SEL_P", "^TRN[12]_P")>;
2099
2100def : InstRW<[A64FXWrite_1Cyc_GI24],
2101            (instregex "^ADD[PV]L", "^CNT[BHWD]_X", "^DEC[BHWD]_X", "^INC[BHWD]_X",
2102                       "^RDVLI")>;
2103
2104def : InstRW<[A64FXWrite_11Cyc_GI5],
2105            (instregex "^LDR_[PZ]XI")>;
2106
2107def : InstRW<[A64FXWrite_11Cyc_GI56],
2108            (instregex "^LD(NF|FF|NT)?1R?S?[BHSWDQ]")>;
2109
2110def A64FXWrite_None : SchedWriteRes<[]> {
2111}
2112def : InstRW<[A64FXWrite_None], (instregex "^SETFFR", "^MOVPRFX")>;
2113
2114def A64FXWrite_FMAIndexed : SchedWriteRes<[A64FXGI03]> {
2115  let Latency = 15;
2116  let NumMicroOps = 2;
2117  let ReleaseAtCycles = [2];
2118}
2119def : InstRW<[A64FXWrite_FMAIndexed], (instregex "^F(MLA|MLS|MUL)_ZZZI")>;
2120
2121def A64FXWrite_ADR_LSL_Z : SchedWriteRes<[A64FXGI0]> {
2122  let Latency = 5;
2123  let NumMicroOps = 2;
2124  let ReleaseAtCycles = [2];
2125}
2126def : InstRW<[A64FXWrite_ADR_LSL_Z], (instregex "^ADR_LSL_Z")>;
2127
2128def A64FXWrite_ASRD : SchedWriteRes<[A64FXGI0, A64FXGI01]> {
2129  let Latency = 8;
2130  let NumMicroOps = 2;
2131}
2132def : InstRW<[A64FXWrite_ASRD], (instregex "^ASRD_Z")>;
2133
2134def A64FXWrite_Reduction4CycB : SchedWriteRes<[A64FXGI03]> {
2135  let Latency = 46;
2136  let NumMicroOps = 10;
2137  let ReleaseAtCycles = [10];
2138}
2139def : InstRW<[A64FXWrite_Reduction4CycB],
2140      (instregex "^(AND|EOR|OR|SADD|SMAX|SMIN|UADD|UMAX|UMIN)V_VPZ_B")>;
2141
2142def A64FXWrite_Reduction4CycH : SchedWriteRes<[A64FXGI03]> {
2143  let Latency = 42;
2144  let NumMicroOps = 9;
2145  let ReleaseAtCycles = [9];
2146}
2147def : InstRW<[A64FXWrite_Reduction4CycH],
2148      (instregex "^(AND|EOR|OR|SADD|SMAX|SMIN|UADD|UMAX|UMIN)V_VPZ_H")>;
2149
2150def A64FXWrite_Reduction4CycS : SchedWriteRes<[A64FXGI03]> {
2151  let Latency = 38;
2152  let NumMicroOps = 8;
2153  let ReleaseAtCycles = [8];
2154}
2155def : InstRW<[A64FXWrite_Reduction4CycS],
2156      (instregex "^(AND|EOR|OR|SADD|SMAX|SMIN|UADD|UMAX|UMIN)V_VPZ_S")>;
2157
2158def A64FXWrite_Reduction4CycD : SchedWriteRes<[A64FXGI03]> {
2159  let Latency = 34;
2160  let NumMicroOps = 7;
2161  let ReleaseAtCycles = [7];
2162}
2163def : InstRW<[A64FXWrite_Reduction4CycD],
2164      (instregex "^(AND|EOR|OR|SADD|SMAX|SMIN|UADD|UMAX|UMIN)V_VPZ_D")>;
2165
2166def A64FXWrite_CLAST_R : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2167  let Latency = 29;
2168}
2169def : InstRW<[A64FXWrite_CLAST_R], (instregex "^CLAST[AB]_R")>;
2170
2171def A64FXWrite_CMP : SchedWriteRes<[A64FXGI0, A64FXGI1]> {
2172  let Latency = 4;
2173}
2174def : InstRW<[A64FXWrite_CMP], (instregex "^CMP.*_P")>;
2175
2176def A64FXWrite_CNTP : SchedWriteRes<[A64FXGI1, A64FXGI2]> {
2177  let Latency = 6;
2178}
2179def : InstRW<[A64FXWrite_CNTP], (instregex "^CNTP_X")>;
2180
2181def A64FXWrite_CPYScalar : SchedWriteRes<[A64FXGI0, A64FXGI2]> {
2182  let Latency = 8;
2183}
2184def : InstRW<[A64FXWrite_CPYScalar], (instregex "^CPY_ZPmR")>;
2185
2186def A64FXWrite_CTERM : SchedWriteRes<[A64FXGI24]> {
2187  let Latency = 2;
2188  let ReleaseAtCycles = [2];
2189}
2190def : InstRW<[A64FXWrite_CTERM], (instregex "^CTERM")>;
2191
2192def A64FXWrite_INCPScalar : SchedWriteRes<[A64FXGI1, A64FXGI2, A64FXGI4]> {
2193  let Latency = 7;
2194  let NumMicroOps = 2;
2195}
2196def : InstRW<[A64FXWrite_INCPScalar], (instregex "^DECP_X", "^INCP_X")>;
2197
2198def A64FXWrite_INCPVector : SchedWriteRes<[A64FXGI0, A64FXGI1]> {
2199  let Latency = 12;
2200}
2201def : InstRW<[A64FXWrite_INCPVector], (instregex "^DECP_Z", "^INCP_Z")>;
2202
2203def A64FXWrite_FADDVH : SchedWriteRes<[A64FXGI03]> {
2204  let Latency = 75;
2205  let NumMicroOps = 11;
2206  let ReleaseAtCycles = [11];
2207}
2208def : InstRW<[A64FXWrite_FADDVH], (instrs FADDV_VPZ_H)>;
2209
2210def A64FXWrite_FADDVS : SchedWriteRes<[A64FXGI03]> {
2211  let Latency = 60;
2212  let NumMicroOps = 9;
2213  let ReleaseAtCycles = [9];
2214}
2215def : InstRW<[A64FXWrite_FADDVS], (instrs FADDV_VPZ_S)>;
2216
2217def A64FXWrite_FADDVD : SchedWriteRes<[A64FXGI03]> {
2218  let Latency = 45;
2219  let NumMicroOps = 7;
2220  let ReleaseAtCycles = [7];
2221}
2222def : InstRW<[A64FXWrite_FADDVD], (instrs FADDV_VPZ_D)>;
2223
2224def A64FXWrite_FADDAH : SchedWriteRes<[A64FXGI03]> {
2225  let Latency = 468;
2226  let NumMicroOps = 63;
2227  let ReleaseAtCycles = [63];
2228}
2229def : InstRW<[A64FXWrite_FADDAH], (instrs FADDA_VPZ_H)>;
2230
2231def A64FXWrite_FADDAS : SchedWriteRes<[A64FXGI03]> {
2232  let Latency = 228;
2233  let NumMicroOps = 31;
2234  let ReleaseAtCycles = [31];
2235}
2236def : InstRW<[A64FXWrite_FADDAS], (instrs FADDA_VPZ_S)>;
2237
2238def A64FXWrite_FADDAD : SchedWriteRes<[A64FXGI03]> {
2239  let Latency = 108;
2240  let NumMicroOps = 15;
2241  let ReleaseAtCycles = [15];
2242}
2243def : InstRW<[A64FXWrite_FADDAD], (instrs FADDA_VPZ_D)>;
2244
2245def A64FXWrite_FCADDZ : SchedWriteRes<[A64FXGI0, A64FXGI3]> {
2246  let Latency = 15;
2247  let NumMicroOps = 2;
2248}
2249def : InstRW<[A64FXWrite_FCADDZ], (instregex "^FCADD_Z")>;
2250
2251def A64FXWrite_FCMLAZ : SchedWriteRes<[A64FXGI03]> {
2252  let Latency = 15;
2253  let NumMicroOps = 3;
2254  let ReleaseAtCycles = [3];
2255}
2256def : InstRW<[A64FXWrite_FCMLAZ], (instregex "^FCMLA_Z")>;
2257
2258def A64FXWrite_FDIVH : SchedWriteRes<[A64FXGI0]> {
2259  let Latency = 134;
2260  let ReleaseAtCycles = [134];
2261}
2262def : InstRW<[A64FXWrite_FDIVH], (instregex "^F(DIVR?|SQRT)_Z.*_H")>;
2263
2264def A64FXWrite_FDIVS : SchedWriteRes<[A64FXGI0]> {
2265  let Latency = 98;
2266  let ReleaseAtCycles = [98];
2267}
2268def : InstRW<[A64FXWrite_FDIVS], (instregex "^F(DIVR?|SQRT)_Z.*_S")>;
2269
2270def A64FXWrite_FDIVD : SchedWriteRes<[A64FXGI0]> {
2271  let Latency = 154;
2272  let ReleaseAtCycles = [154];
2273}
2274def : InstRW<[A64FXWrite_FDIVD], (instregex "^F(DIVR?|SQRT)_Z.*_D")>;
2275
2276def A64FXWrite_FMAXVH : SchedWriteRes<[A64FXGI03]> {
2277  let Latency = 54;
2278  let NumMicroOps = 11;
2279  let ReleaseAtCycles = [11];
2280}
2281def : InstRW<[A64FXWrite_FMAXVH], (instregex "^F(MAX|MIN)(NM)?V_VPZ_H")>;
2282
2283def A64FXWrite_FMAXVS : SchedWriteRes<[A64FXGI03]> {
2284  let Latency = 44;
2285  let NumMicroOps = 9;
2286  let ReleaseAtCycles = [9];
2287}
2288def : InstRW<[A64FXWrite_FMAXVS], (instregex "^F(MAX|MIN)(NM)?V_VPZ_S")>;
2289
2290def A64FXWrite_FMAXVD : SchedWriteRes<[A64FXGI03]> {
2291  let Latency = 34;
2292  let NumMicroOps = 7;
2293  let ReleaseAtCycles = [7];
2294}
2295def : InstRW<[A64FXWrite_FMAXVH], (instregex "^F(MAX|MIN)(NM)?V_VPZ_D")>;
2296
2297def A64FXWrite_INDEX_RI_BH : SchedWriteRes<[A64FXGI0, A64FXGI2]> {
2298  let Latency = 17;
2299  let NumMicroOps = 2;
2300  let ReleaseAtCycles = [2, 2];
2301}
2302def : InstRW<[A64FXWrite_INDEX_RI_BH], (instregex "^INDEX_(RI|IR)_[BH]")>;
2303
2304def A64FXWrite_INDEX_RI_SD : SchedWriteRes<[A64FXGI0, A64FXGI2]> {
2305  let Latency = 13;
2306  let NumMicroOps = 1;
2307}
2308def : InstRW<[A64FXWrite_INDEX_RI_SD], (instregex "^INDEX_(RI|IR)_[SD]")>;
2309
2310def A64FXWrite_INDEX_II_BH : SchedWriteRes<[A64FXGI0]> {
2311  let Latency = 13;
2312  let NumMicroOps = 2;
2313  let ReleaseAtCycles = [2];
2314}
2315def : InstRW<[A64FXWrite_INDEX_II_BH], (instregex "^INDEX_II_[BH]")>;
2316
2317def A64FXWrite_INDEX_RR_BH : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI3]> {
2318  let Latency = 17;
2319  let NumMicroOps = 3;
2320  let ReleaseAtCycles = [2, 2, 1];
2321}
2322def : InstRW<[A64FXWrite_INDEX_RR_BH], (instregex "^INDEX_RR_[BH]")>;
2323
2324def A64FXWrite_INDEX_RR_SD : SchedWriteRes<[A64FXGI0, A64FXGI2]> {
2325  let Latency = 17;
2326  let NumMicroOps = 2;
2327  let ReleaseAtCycles = [2, 1];
2328}
2329def : InstRW<[A64FXWrite_INDEX_RR_SD], (instregex "^INDEX_RR_[SD]")>;
2330
2331def A64FXWrite_INSR_ZR : SchedWriteRes<[A64FXGI0, A64FXGI2]> {
2332  let Latency = 10;
2333}
2334def : InstRW<[A64FXWrite_INSR_ZR], (instregex "^INSR_ZR")>;
2335
2336def A64FXWrite_LAST_R : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2337  let Latency = 25;
2338}
2339def : InstRW<[A64FXWrite_CLAST_R], (instregex "^LAST[AB]_R")>;
2340
2341def A64FXWrite_GLD_S_ZI : SchedWriteRes<[A64FXGI0, A64FXGI5, A64FXGI6]> {
2342  let Latency = 19;
2343  let ReleaseAtCycles = [2, 4, 4];
2344}
2345def : InstRW<[A64FXWrite_GLD_S_ZI],
2346      (instregex "^GLD(FF)?1W_IMM", "^GLD(FF)?1S?[BHW]_S_IMM")>;
2347
2348def A64FXWrite_GLD_D_ZI : SchedWriteRes<[A64FXGI0, A64FXGI5, A64FXGI6]> {
2349  let Latency = 16;
2350  let ReleaseAtCycles = [1, 2, 2];
2351}
2352def : InstRW<[A64FXWrite_GLD_D_ZI],
2353      (instregex "^GLD(FF)?1D_IMM", "^GLD(FF)?1S?[BHW]_D_IMM")>;
2354
2355def A64FXWrite_GLD_S_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI5, A64FXGI6]> {
2356  let Latency = 23;
2357  let ReleaseAtCycles = [2, 1, 4, 4];
2358}
2359def : InstRW<[A64FXWrite_GLD_S_RZ],
2360      (instregex "^GLD(FF)?1W_[^DI]", "^GLD(FF)?1S?[BHW]_S_[^I]")>;
2361
2362def A64FXWrite_GLD_D_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI5, A64FXGI6]> {
2363  let Latency = 20;
2364  let ReleaseAtCycles = [1, 1, 2, 2];
2365}
2366def : InstRW<[A64FXWrite_GLD_D_RZ],
2367      (instregex "^GLD(FF)?1D_[^I]", "^GLD(FF)?1D$", "^GLD(FF)?1S?[BHW]_D_[^I]",
2368                 "^GLD(FF)?1S?[BHW]_D$")>;
2369
2370def A64FXWrite_LD2_BH : SchedWriteRes<[A64FXGI56]> {
2371  let Latency = 15;
2372  let NumMicroOps = 3;
2373  let ReleaseAtCycles = [9];
2374}
2375def : InstRW<[A64FXWrite_LD2_BH], (instregex "^LD2[BH]")>;
2376
2377def A64FXWrite_LD2_WD_IMM : SchedWriteRes<[A64FXGI56]> {
2378  let Latency = 11;
2379  let NumMicroOps = 2;
2380  let ReleaseAtCycles = [2];
2381}
2382def : InstRW<[A64FXWrite_LD2_WD_IMM], (instregex "^LD2[WD]_IMM")>;
2383
2384def A64FXWrite_LD2_WD : SchedWriteRes<[A64FXGI56]> {
2385  let Latency = 12;
2386  let NumMicroOps = 3;
2387  let ReleaseAtCycles = [3];
2388}
2389def : InstRW<[A64FXWrite_LD2_WD], (instregex "^LD2[WD]$")>;
2390
2391def A64FXWrite_LD3_BH : SchedWriteRes<[A64FXGI56]> {
2392  let Latency = 15;
2393  let NumMicroOps = 4;
2394  let ReleaseAtCycles = [13];
2395}
2396def : InstRW<[A64FXWrite_LD3_BH], (instregex "^LD3[BH]")>;
2397
2398def A64FXWrite_LD3_WD_IMM : SchedWriteRes<[A64FXGI56]> {
2399  let Latency = 11;
2400  let NumMicroOps = 3;
2401  let ReleaseAtCycles = [3];
2402}
2403def : InstRW<[A64FXWrite_LD3_WD_IMM], (instregex "^LD3[WD]_IMM")>;
2404
2405def A64FXWrite_LD3_WD : SchedWriteRes<[A64FXGI56]> {
2406  let Latency = 12;
2407  let NumMicroOps = 4;
2408  let ReleaseAtCycles = [4];
2409}
2410def : InstRW<[A64FXWrite_LD3_WD], (instregex "^LD3[WD]$")>;
2411
2412def A64FXWrite_LD4_BH : SchedWriteRes<[A64FXGI56]> {
2413  let Latency = 15;
2414  let NumMicroOps = 5;
2415  let ReleaseAtCycles = [17];
2416}
2417def : InstRW<[A64FXWrite_LD4_BH], (instregex "^LD4[BH]")>;
2418
2419def A64FXWrite_LD4_WD_IMM : SchedWriteRes<[A64FXGI56]> {
2420  let Latency = 11;
2421  let NumMicroOps = 4;
2422  let ReleaseAtCycles = [4];
2423}
2424def : InstRW<[A64FXWrite_LD4_WD_IMM], (instregex "^LD4[WD]_IMM")>;
2425
2426def A64FXWrite_LD4_WD : SchedWriteRes<[A64FXGI56]> {
2427  let Latency = 12;
2428  let NumMicroOps = 5;
2429  let ReleaseAtCycles = [5];
2430}
2431def : InstRW<[A64FXWrite_LD4_WD], (instregex "^LD4[WD]$")>;
2432
2433def A64FXWrite_PRF : SchedWriteRes<[A64FXGI56]> {
2434}
2435def : InstRW<[A64FXWrite_PRF], (instregex "^PRF._PR")>;
2436
2437def A64FXWrite_PRF_W_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI56]> {
2438  let ReleaseAtCycles = [2, 1, 4];
2439}
2440def : InstRW<[A64FXWrite_PRF_W_RZ], (instregex "^PRF._S_[^P]")>;
2441
2442def A64FXWrite_PRF_W_ZI : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2443  let ReleaseAtCycles = [2, 4];
2444}
2445def : InstRW<[A64FXWrite_PRF_W_ZI], (instregex "^PRF._S_PZI")>;
2446
2447def A64FXWrite_PRF_D_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI56]> {
2448  let ReleaseAtCycles = [1, 1, 2];
2449}
2450def : InstRW<[A64FXWrite_PRF_D_RZ], (instregex "^PRF._D_[^P]")>;
2451
2452def A64FXWrite_PRF_D_ZI : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2453  let ReleaseAtCycles = [1, 2];
2454}
2455def : InstRW<[A64FXWrite_PRF_D_ZI], (instregex "^PRF._D_PZI")>;
2456
2457def A64FXWrite_SDIV_S : SchedWriteRes<[A64FXGI0]> {
2458  let Latency = 114;
2459  let ReleaseAtCycles = [114];
2460}
2461def : InstRW<[A64FXWrite_SDIV_S], (instregex "^[SU]DIVR?.*_S")>;
2462
2463def A64FXWrite_SDIV_D : SchedWriteRes<[A64FXGI0]> {
2464  let Latency = 178;
2465  let ReleaseAtCycles = [178];
2466}
2467def : InstRW<[A64FXWrite_SDIV_D], (instregex "^[SU]DIVR?.*_D")>;
2468
2469def A64FXWrite_SDOT_I : SchedWriteRes<[A64FXGI0, A64FXGI3]> {
2470  let Latency = 15;
2471  let NumMicroOps = 2;
2472}
2473def : InstRW<[A64FXWrite_SDOT_I], (instregex "^[SU]DOT_ZZZI")>;
2474
2475def A64FXWrite_SQINC_Scalar : SchedWriteRes<[A64FXGI24]> {
2476  let Latency = 2;
2477  let ReleaseAtCycles = [2];
2478}
2479def : InstRW<[A64FXWrite_SQINC_Scalar], (instregex "^[SU]Q(INC|DEC)[BHWD]_[WX]")>;
2480
2481def A64FXWrite_SQINCP_X : SchedWriteRes<[A64FXGI24, A64FXGI3]> {
2482  let Latency = 6;
2483  let NumMicroOps = 2;
2484  let ReleaseAtCycles = [3, 1];
2485}
2486def : InstRW<[A64FXWrite_SQINCP_X], (instregex "^[SU]Q(INC|DEC)P_[WX]")>;
2487
2488def A64FXWrite_SQINCP_Z : SchedWriteRes<[A64FXGI24, A64FXGI3]> {
2489  let Latency = 12;
2490}
2491def : InstRW<[A64FXWrite_SQINCP_Z], (instregex "^[SU]Q(INC|DEC)P_Z")>;
2492
2493def A64FXWrite_ST1 : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2494  let Latency = 11;
2495}
2496def : InstRW<[A64FXWrite_ST1], (instregex "^ST(NT)?1[BHWD]")>;
2497
2498def A64FXWrite_SST1_W_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI5, A64FXGI6]> {
2499  let Latency = 20;
2500  let NumMicroOps = 8;
2501  let ReleaseAtCycles = [8, 8, 8, 8];
2502}
2503def : InstRW<[A64FXWrite_SST1_W_RZ],
2504      (instregex "^SST1[BH]_S(_[^I]|$)", "^SST1W(_[^ID]|$)")>;
2505
2506def A64FXWrite_SST1_D_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI5, A64FXGI6]> {
2507  let Latency = 20;
2508  let NumMicroOps = 4;
2509  let ReleaseAtCycles = [4, 4, 4, 4];
2510}
2511def : InstRW<[A64FXWrite_SST1_D_RZ],
2512      (instregex "^SST1[BHW]_D(_[^I]|$)", "^SST1D(_[^I]|$)")>;
2513
2514def A64FXWrite_SST1_W_ZI : SchedWriteRes<[A64FXGI0, A64FXGI5, A64FXGI6]> {
2515  let Latency = 16;
2516  let NumMicroOps = 8;
2517  let ReleaseAtCycles = [12, 8, 8];
2518}
2519def : InstRW<[A64FXWrite_SST1_W_ZI],
2520      (instregex "^SST1[BH]_S_I", "^SST1W_I")>;
2521
2522def A64FXWrite_SST1_D_ZI : SchedWriteRes<[A64FXGI0, A64FXGI5, A64FXGI6]> {
2523  let Latency = 16;
2524  let NumMicroOps = 4;
2525  let ReleaseAtCycles = [4, 4, 4];
2526}
2527def : InstRW<[A64FXWrite_SST1_D_ZI],
2528      (instregex "^SST1[BHW]_D_I", "^SST1D_I")>;
2529
2530def A64FXWrite_ST2_BH : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2531  let Latency = 12;
2532  let NumMicroOps = 3;
2533  let ReleaseAtCycles = [8, 9];
2534}
2535def : InstRW<[A64FXWrite_ST2_BH], (instregex "^ST2[BH]")>;
2536
2537def A64FXWrite_ST2_WD_RI : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2538  let Latency = 11;
2539  let NumMicroOps = 2;
2540  let ReleaseAtCycles = [2, 2];
2541}
2542def : InstRW<[A64FXWrite_ST2_WD_RI], (instregex "^ST2[WD]$")>;
2543
2544def A64FXWrite_ST2_WD_RR : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2545  let Latency = 12;
2546  let NumMicroOps = 3;
2547  let ReleaseAtCycles = [2, 3];
2548}
2549def : InstRW<[A64FXWrite_ST2_WD_RR], (instregex "^ST2[WD]_I")>;
2550
2551def A64FXWrite_ST3_BH : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2552  let Latency = 15;
2553  let NumMicroOps = 4;
2554  let ReleaseAtCycles = [12, 13];
2555}
2556def : InstRW<[A64FXWrite_ST3_BH], (instregex "^ST3[BH]")>;
2557
2558def A64FXWrite_ST3_WD_RI : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2559  let Latency = 11;
2560  let NumMicroOps = 3;
2561  let ReleaseAtCycles = [3, 3];
2562}
2563def : InstRW<[A64FXWrite_ST3_WD_RI], (instregex "^ST3[WD]$")>;
2564
2565def A64FXWrite_ST3_WD_RR : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2566  let Latency = 12;
2567  let NumMicroOps = 4;
2568  let ReleaseAtCycles = [3, 4];
2569}
2570def : InstRW<[A64FXWrite_ST3_WD_RR], (instregex "^ST3[WD]_I")>;
2571
2572def A64FXWrite_ST4_BH : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2573  let Latency = 15;
2574  let NumMicroOps = 5;
2575  let ReleaseAtCycles = [16, 17];
2576}
2577def : InstRW<[A64FXWrite_ST4_BH], (instregex "^ST4[BH]")>;
2578
2579def A64FXWrite_ST4_WD_RI : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2580  let Latency = 11;
2581  let NumMicroOps = 4;
2582  let ReleaseAtCycles = [4, 4];
2583}
2584def : InstRW<[A64FXWrite_ST4_WD_RI], (instregex "^ST4[WD]$")>;
2585
2586def A64FXWrite_ST4_WD_RR : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2587  let Latency = 12;
2588  let NumMicroOps = 5;
2589  let ReleaseAtCycles = [4, 5];
2590}
2591def : InstRW<[A64FXWrite_ST4_WD_RR], (instregex "^ST4[WD]_I")>;
2592
2593def A64FXWrite_STR_P : SchedWriteRes<[A64FXGI3, A64FXGI5]> {
2594  let Latency = 11;
2595}
2596def : InstRW<[A64FXWrite_STR_P], (instrs STR_PXI)>;
2597
2598def A64FXWrite_STR_Z : SchedWriteRes<[A64FXGI0, A64FXGI5]> {
2599  let Latency = 11;
2600}
2601def : InstRW<[A64FXWrite_STR_Z], (instrs STR_ZXI)>;
2602
2603def A64FXWrite_WHILE : SchedWriteRes<[A64FXGI3, A64FXGI5]> {
2604  let Latency = 4;
2605}
2606def : InstRW<[A64FXWrite_WHILE], (instregex "^WHILEL._P")>;
2607
2608def A64FXWrite_WRFFR : SchedWriteRes<[A64FXGI3, A64FXGI5]> {
2609  let Latency = 3;
2610  let NumMicroOps = 2;
2611}
2612def : InstRW<[A64FXWrite_WRFFR], (instrs WRFFR)>;
2613
2614} // SchedModel = A64FXModel
2615