1e8d8bef9SDimitry Andric//==- AArch64SchedCortexA55.td - ARM Cortex-A55 Scheduling Definitions -*- tablegen -*-=// 2e8d8bef9SDimitry Andric// 3e8d8bef9SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4e8d8bef9SDimitry Andric// See https://llvm.org/LICENSE.txt for license information. 5e8d8bef9SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6e8d8bef9SDimitry Andric// 7e8d8bef9SDimitry Andric//===----------------------------------------------------------------------===// 8e8d8bef9SDimitry Andric// 981ad6265SDimitry Andric// This file defines the machine model for the ARM Cortex-A55 processors. Note 1081ad6265SDimitry Andric// that this schedule is currently used as the default for -mcpu=generic. As a 1181ad6265SDimitry Andric// result, some of the modelling decision made do not precisely model the 1281ad6265SDimitry Andric// Cortex-A55, instead aiming to be a good compromise between different cpus. 13e8d8bef9SDimitry Andric// 14e8d8bef9SDimitry Andric//===----------------------------------------------------------------------===// 15e8d8bef9SDimitry Andric 16e8d8bef9SDimitry Andric// ===---------------------------------------------------------------------===// 17e8d8bef9SDimitry Andric// The following definitions describe the per-operand machine model. 18e8d8bef9SDimitry Andric// This works with MachineScheduler. See MCSchedModel.h for details. 19e8d8bef9SDimitry Andric 20e8d8bef9SDimitry Andric// Cortex-A55 machine model for scheduling and other instruction cost heuristics. 21e8d8bef9SDimitry Andricdef CortexA55Model : SchedMachineModel { 22e8d8bef9SDimitry Andric let MicroOpBufferSize = 0; // The Cortex-A55 is an in-order processor 23e8d8bef9SDimitry Andric let IssueWidth = 2; // It dual-issues under most circumstances 24e8d8bef9SDimitry Andric let LoadLatency = 4; // Cycles for loads to access the cache. The 25e8d8bef9SDimitry Andric // optimisation guide shows that most loads have 26e8d8bef9SDimitry Andric // a latency of 3, but some have a latency of 4 27e8d8bef9SDimitry Andric // or 5. Setting it 4 looked to be good trade-off. 28e8d8bef9SDimitry Andric let MispredictPenalty = 8; // A branch direction mispredict. 29e8d8bef9SDimitry Andric let PostRAScheduler = 1; // Enable PostRA scheduler pass. 30e8d8bef9SDimitry Andric let CompleteModel = 0; // Covers instructions applicable to Cortex-A55. 31e8d8bef9SDimitry Andric 32753f127fSDimitry Andric list<Predicate> UnsupportedFeatures = [HasSVE, HasMTE]; 33e8d8bef9SDimitry Andric 34e8d8bef9SDimitry Andric // FIXME: Remove when all errors have been fixed. 35e8d8bef9SDimitry Andric let FullInstRWOverlapCheck = 0; 36e8d8bef9SDimitry Andric} 37e8d8bef9SDimitry Andric 38e8d8bef9SDimitry Andric//===----------------------------------------------------------------------===// 39e8d8bef9SDimitry Andric// Define each kind of processor resource and number available. 40e8d8bef9SDimitry Andric 41e8d8bef9SDimitry Andric// Modeling each pipeline as a ProcResource using the BufferSize = 0 since the 42e8d8bef9SDimitry Andric// Cortex-A55 is in-order. 43e8d8bef9SDimitry Andric 44e8d8bef9SDimitry Andricdef CortexA55UnitALU : ProcResource<2> { let BufferSize = 0; } // Int ALU 45e8d8bef9SDimitry Andricdef CortexA55UnitMAC : ProcResource<1> { let BufferSize = 0; } // Int MAC, 64-bi wide 46e8d8bef9SDimitry Andricdef CortexA55UnitDiv : ProcResource<1> { let BufferSize = 0; } // Int Division, not pipelined 47e8d8bef9SDimitry Andricdef CortexA55UnitLd : ProcResource<1> { let BufferSize = 0; } // Load pipe 48e8d8bef9SDimitry Andricdef CortexA55UnitSt : ProcResource<1> { let BufferSize = 0; } // Store pipe 49e8d8bef9SDimitry Andricdef CortexA55UnitB : ProcResource<1> { let BufferSize = 0; } // Branch 50e8d8bef9SDimitry Andric 51e8d8bef9SDimitry Andric// The FP DIV/SQRT instructions execute totally differently from the FP ALU 52e8d8bef9SDimitry Andric// instructions, which can mostly be dual-issued; that's why for now we model 53e8d8bef9SDimitry Andric// them with 2 resources. 54e8d8bef9SDimitry Andricdef CortexA55UnitFPALU : ProcResource<2> { let BufferSize = 0; } // FP ALU 55e8d8bef9SDimitry Andricdef CortexA55UnitFPMAC : ProcResource<2> { let BufferSize = 0; } // FP MAC 56e8d8bef9SDimitry Andricdef CortexA55UnitFPDIV : ProcResource<1> { let BufferSize = 0; } // FP Div/SQRT, 64/128 57e8d8bef9SDimitry Andric 58e8d8bef9SDimitry Andric//===----------------------------------------------------------------------===// 59e8d8bef9SDimitry Andric// Subtarget-specific SchedWrite types 60e8d8bef9SDimitry Andric 61e8d8bef9SDimitry Andriclet SchedModel = CortexA55Model in { 62e8d8bef9SDimitry Andric 63e8d8bef9SDimitry Andric// These latencies are modeled without taking into account forwarding paths 64e8d8bef9SDimitry Andric// (the software optimisation guide lists latencies taking into account 65e8d8bef9SDimitry Andric// typical forwarding paths). 66e8d8bef9SDimitry Andricdef : WriteRes<WriteImm, [CortexA55UnitALU]> { let Latency = 3; } // MOVN, MOVZ 67e8d8bef9SDimitry Andricdef : WriteRes<WriteI, [CortexA55UnitALU]> { let Latency = 3; } // ALU 68e8d8bef9SDimitry Andricdef : WriteRes<WriteISReg, [CortexA55UnitALU]> { let Latency = 3; } // ALU of Shifted-Reg 69e8d8bef9SDimitry Andricdef : WriteRes<WriteIEReg, [CortexA55UnitALU]> { let Latency = 3; } // ALU of Extended-Reg 70e8d8bef9SDimitry Andricdef : WriteRes<WriteExtr, [CortexA55UnitALU]> { let Latency = 3; } // EXTR from a reg pair 71e8d8bef9SDimitry Andricdef : WriteRes<WriteIS, [CortexA55UnitALU]> { let Latency = 3; } // Shift/Scale 72e8d8bef9SDimitry Andric 73e8d8bef9SDimitry Andric// MAC 74e8d8bef9SDimitry Andricdef : WriteRes<WriteIM32, [CortexA55UnitMAC]> { let Latency = 4; } // 32-bit Multiply 75e8d8bef9SDimitry Andricdef : WriteRes<WriteIM64, [CortexA55UnitMAC]> { let Latency = 4; } // 64-bit Multiply 76e8d8bef9SDimitry Andric 77e8d8bef9SDimitry Andric// Div 78e8d8bef9SDimitry Andricdef : WriteRes<WriteID32, [CortexA55UnitDiv]> { 79*5f757f3fSDimitry Andric let Latency = 8; let ReleaseAtCycles = [8]; 80e8d8bef9SDimitry Andric} 81e8d8bef9SDimitry Andricdef : WriteRes<WriteID64, [CortexA55UnitDiv]> { 82*5f757f3fSDimitry Andric let Latency = 8; let ReleaseAtCycles = [8]; 83e8d8bef9SDimitry Andric} 84e8d8bef9SDimitry Andric 85e8d8bef9SDimitry Andric// Load 86e8d8bef9SDimitry Andricdef : WriteRes<WriteLD, [CortexA55UnitLd]> { let Latency = 3; } 87e8d8bef9SDimitry Andricdef : WriteRes<WriteLDIdx, [CortexA55UnitLd]> { let Latency = 4; } 88e8d8bef9SDimitry Andricdef : WriteRes<WriteLDHi, [CortexA55UnitLd]> { let Latency = 5; } 89e8d8bef9SDimitry Andric 90e8d8bef9SDimitry Andric// Vector Load - Vector loads take 1-5 cycles to issue. For the WriteVecLd 91e8d8bef9SDimitry Andric// below, choosing the median of 3 which makes the latency 6. 92e8d8bef9SDimitry Andric// An extra cycle is needed to get the swizzling right. 93e8d8bef9SDimitry Andricdef : WriteRes<WriteVLD, [CortexA55UnitLd]> { let Latency = 6; 94*5f757f3fSDimitry Andric let ReleaseAtCycles = [3]; } 95e8d8bef9SDimitry Andricdef CortexA55WriteVLD1 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 4; } 96fe6060f1SDimitry Andricdef CortexA55WriteVLD1SI : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 4; let SingleIssue = 1; } 97e8d8bef9SDimitry Andricdef CortexA55WriteVLD2 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 5; 98*5f757f3fSDimitry Andric let ReleaseAtCycles = [2]; } 99e8d8bef9SDimitry Andricdef CortexA55WriteVLD3 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 6; 100*5f757f3fSDimitry Andric let ReleaseAtCycles = [3]; } 101e8d8bef9SDimitry Andricdef CortexA55WriteVLD4 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 7; 102*5f757f3fSDimitry Andric let ReleaseAtCycles = [4]; } 103e8d8bef9SDimitry Andricdef CortexA55WriteVLD5 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 8; 104*5f757f3fSDimitry Andric let ReleaseAtCycles = [5]; } 105e8d8bef9SDimitry Andricdef CortexA55WriteVLD6 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 9; 106*5f757f3fSDimitry Andric let ReleaseAtCycles = [6]; } 107e8d8bef9SDimitry Andricdef CortexA55WriteVLD7 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 10; 108*5f757f3fSDimitry Andric let ReleaseAtCycles = [7]; } 109e8d8bef9SDimitry Andricdef CortexA55WriteVLD8 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 11; 110*5f757f3fSDimitry Andric let ReleaseAtCycles = [8]; } 111e8d8bef9SDimitry Andric 112fe6060f1SDimitry Andricdef CortexA55WriteLDP1 : SchedWriteRes<[]> { let Latency = 4; } 113fe6060f1SDimitry Andricdef CortexA55WriteLDP2 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 5; } 114fe6060f1SDimitry Andricdef CortexA55WriteLDP4 : SchedWriteRes<[CortexA55UnitLd, CortexA55UnitLd, CortexA55UnitLd, CortexA55UnitLd, CortexA55UnitLd]> { let Latency = 6; } 115fe6060f1SDimitry Andric 116e8d8bef9SDimitry Andric// Pre/Post Indexing - Performed as part of address generation 117e8d8bef9SDimitry Andricdef : WriteRes<WriteAdr, []> { let Latency = 0; } 118e8d8bef9SDimitry Andric 119e8d8bef9SDimitry Andric// Store 120fe6060f1SDimitry Andriclet RetireOOO = 1 in { 121fe6060f1SDimitry Andricdef : WriteRes<WriteST, [CortexA55UnitSt]> { let Latency = 1; } 122fe6060f1SDimitry Andricdef : WriteRes<WriteSTP, [CortexA55UnitSt]> { let Latency = 1; } 123fe6060f1SDimitry Andricdef : WriteRes<WriteSTIdx, [CortexA55UnitSt]> { let Latency = 1; } 124fe6060f1SDimitry Andric} 125e8d8bef9SDimitry Andricdef : WriteRes<WriteSTX, [CortexA55UnitSt]> { let Latency = 4; } 126e8d8bef9SDimitry Andric 127e8d8bef9SDimitry Andric// Vector Store - Similar to vector loads, can take 1-3 cycles to issue. 128e8d8bef9SDimitry Andricdef : WriteRes<WriteVST, [CortexA55UnitSt]> { let Latency = 5; 129*5f757f3fSDimitry Andric let ReleaseAtCycles = [2];} 130e8d8bef9SDimitry Andricdef CortexA55WriteVST1 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 4; } 131e8d8bef9SDimitry Andricdef CortexA55WriteVST2 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 5; 132*5f757f3fSDimitry Andric let ReleaseAtCycles = [2]; } 133e8d8bef9SDimitry Andricdef CortexA55WriteVST3 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 6; 134*5f757f3fSDimitry Andric let ReleaseAtCycles = [3]; } 135e8d8bef9SDimitry Andricdef CortexA55WriteVST4 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 5; 136*5f757f3fSDimitry Andric let ReleaseAtCycles = [4]; } 137e8d8bef9SDimitry Andric 138e8d8bef9SDimitry Andricdef : WriteRes<WriteAtomic, []> { let Unsupported = 1; } 139e8d8bef9SDimitry Andric 140e8d8bef9SDimitry Andric// Branch 141e8d8bef9SDimitry Andricdef : WriteRes<WriteBr, [CortexA55UnitB]>; 142e8d8bef9SDimitry Andricdef : WriteRes<WriteBrReg, [CortexA55UnitB]>; 143e8d8bef9SDimitry Andricdef : WriteRes<WriteSys, [CortexA55UnitB]>; 144e8d8bef9SDimitry Andricdef : WriteRes<WriteBarrier, [CortexA55UnitB]>; 145e8d8bef9SDimitry Andricdef : WriteRes<WriteHint, [CortexA55UnitB]>; 146e8d8bef9SDimitry Andric 147e8d8bef9SDimitry Andric// FP ALU 148e8d8bef9SDimitry Andric// As WriteF result is produced in F5 and it can be mostly forwarded 149e8d8bef9SDimitry Andric// to consumer at F1, the effectively latency is set as 4. 150e8d8bef9SDimitry Andricdef : WriteRes<WriteF, [CortexA55UnitFPALU]> { let Latency = 4; } 151e8d8bef9SDimitry Andricdef : WriteRes<WriteFCmp, [CortexA55UnitFPALU]> { let Latency = 3; } 152e8d8bef9SDimitry Andricdef : WriteRes<WriteFCvt, [CortexA55UnitFPALU]> { let Latency = 4; } 153e8d8bef9SDimitry Andricdef : WriteRes<WriteFCopy, [CortexA55UnitFPALU]> { let Latency = 3; } 154e8d8bef9SDimitry Andricdef : WriteRes<WriteFImm, [CortexA55UnitFPALU]> { let Latency = 3; } 15581ad6265SDimitry Andric 15681ad6265SDimitry Andric// NEON 15781ad6265SDimitry Andricclass CortexA55WriteVd<int n, ProcResourceKind res> : SchedWriteRes<[res]> { 15881ad6265SDimitry Andric let Latency = n; 15981ad6265SDimitry Andric} 16081ad6265SDimitry Andricclass CortexA55WriteVq<int n, ProcResourceKind res> : SchedWriteRes<[res, res]> { 16181ad6265SDimitry Andric let Latency = n; 16281ad6265SDimitry Andric let BeginGroup = 1; 16381ad6265SDimitry Andric} 16481ad6265SDimitry Andricdef CortexA55WriteDotScVq_4 : CortexA55WriteVq<4, CortexA55UnitFPALU>; 16581ad6265SDimitry Andricdef CortexA55WriteDotVq_4 : CortexA55WriteVq<4, CortexA55UnitFPALU>; 16681ad6265SDimitry Andricdef CortexA55WriteDotVd_4 : CortexA55WriteVd<4, CortexA55UnitFPALU>; 16781ad6265SDimitry Andricdef CortexA55WriteMlaLVq_4 : CortexA55WriteVq<4, CortexA55UnitFPALU>; 16881ad6265SDimitry Andricdef CortexA55WriteMlaIxVq_4 : CortexA55WriteVq<4, CortexA55UnitFPALU>; 16981ad6265SDimitry Andricdef CortexA55WriteMlaVq_4 : CortexA55WriteVq<4, CortexA55UnitFPALU>; 17081ad6265SDimitry Andricdef CortexA55WriteMlaVd_4 : CortexA55WriteVd<4, CortexA55UnitFPALU>; 17181ad6265SDimitry Andricdef CortexA55WriteAluVq_4 : CortexA55WriteVq<4, CortexA55UnitFPALU>; 17281ad6265SDimitry Andricdef CortexA55WriteAluVd_3 : CortexA55WriteVd<3, CortexA55UnitFPALU>; 17381ad6265SDimitry Andricdef CortexA55WriteAluVq_3 : CortexA55WriteVq<3, CortexA55UnitFPALU>; 17481ad6265SDimitry Andricdef CortexA55WriteAluVd_2 : CortexA55WriteVd<2, CortexA55UnitFPALU>; 17581ad6265SDimitry Andricdef CortexA55WriteAluVq_2 : CortexA55WriteVq<2, CortexA55UnitFPALU>; 17681ad6265SDimitry Andricdef CortexA55WriteAluVd_1 : CortexA55WriteVd<1, CortexA55UnitFPALU>; 17781ad6265SDimitry Andricdef CortexA55WriteAluVq_1 : CortexA55WriteVq<1, CortexA55UnitFPALU>; 17881ad6265SDimitry Andricdef : SchedAlias<WriteVd, CortexA55WriteVd<4, CortexA55UnitFPALU>>; 17981ad6265SDimitry Andricdef : SchedAlias<WriteVq, CortexA55WriteVq<4, CortexA55UnitFPALU>>; 180e8d8bef9SDimitry Andric 181e8d8bef9SDimitry Andric// FP ALU specific new schedwrite definitions 182349cc55cSDimitry Andricdef CortexA55WriteFPALU_F2 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 2;} 183e8d8bef9SDimitry Andricdef CortexA55WriteFPALU_F3 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 3;} 184e8d8bef9SDimitry Andricdef CortexA55WriteFPALU_F4 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 4;} 185e8d8bef9SDimitry Andricdef CortexA55WriteFPALU_F5 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 5;} 186e8d8bef9SDimitry Andric 187e8d8bef9SDimitry Andric// FP Mul, Div, Sqrt. Div/Sqrt are not pipelined 188e8d8bef9SDimitry Andricdef : WriteRes<WriteFMul, [CortexA55UnitFPMAC]> { let Latency = 4; } 189fe6060f1SDimitry Andric 190fe6060f1SDimitry Andriclet RetireOOO = 1 in { 191e8d8bef9SDimitry Andricdef : WriteRes<WriteFDiv, [CortexA55UnitFPDIV]> { let Latency = 22; 192*5f757f3fSDimitry Andric let ReleaseAtCycles = [29]; } 193e8d8bef9SDimitry Andricdef CortexA55WriteFMAC : SchedWriteRes<[CortexA55UnitFPMAC]> { let Latency = 4; } 194e8d8bef9SDimitry Andricdef CortexA55WriteFDivHP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 8; 195*5f757f3fSDimitry Andric let ReleaseAtCycles = [5]; } 196e8d8bef9SDimitry Andricdef CortexA55WriteFDivSP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 13; 197*5f757f3fSDimitry Andric let ReleaseAtCycles = [10]; } 198e8d8bef9SDimitry Andricdef CortexA55WriteFDivDP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 22; 199*5f757f3fSDimitry Andric let ReleaseAtCycles = [19]; } 200e8d8bef9SDimitry Andricdef CortexA55WriteFSqrtHP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 8; 201*5f757f3fSDimitry Andric let ReleaseAtCycles = [5]; } 202e8d8bef9SDimitry Andricdef CortexA55WriteFSqrtSP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 12; 203*5f757f3fSDimitry Andric let ReleaseAtCycles = [9]; } 204e8d8bef9SDimitry Andricdef CortexA55WriteFSqrtDP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 22; 205*5f757f3fSDimitry Andric let ReleaseAtCycles = [19]; } 206fe6060f1SDimitry Andric} 207e8d8bef9SDimitry Andric//===----------------------------------------------------------------------===// 208e8d8bef9SDimitry Andric// Subtarget-specific SchedRead types. 209e8d8bef9SDimitry Andric 210e8d8bef9SDimitry Andricdef : ReadAdvance<ReadVLD, 0>; 211e8d8bef9SDimitry Andricdef : ReadAdvance<ReadExtrHi, 1>; 212e8d8bef9SDimitry Andricdef : ReadAdvance<ReadAdrBase, 1>; 213349cc55cSDimitry Andricdef : ReadAdvance<ReadST, 1>; 214e8d8bef9SDimitry Andric 215e8d8bef9SDimitry Andric// ALU - ALU input operands are generally needed in EX1. An operand produced in 216e8d8bef9SDimitry Andric// in say EX2 can be forwarded for consumption to ALU in EX1, thereby 217e8d8bef9SDimitry Andric// allowing back-to-back ALU operations such as add. If an operand requires 218e8d8bef9SDimitry Andric// a shift, it will, however, be required in ISS stage. 219e8d8bef9SDimitry Andricdef : ReadAdvance<ReadI, 2, [WriteImm,WriteI, 220e8d8bef9SDimitry Andric WriteISReg, WriteIEReg,WriteIS, 221e8d8bef9SDimitry Andric WriteID32,WriteID64, 222e8d8bef9SDimitry Andric WriteIM32,WriteIM64]>; 223e8d8bef9SDimitry Andric// Shifted operand 224e8d8bef9SDimitry Andricdef CortexA55ReadShifted : SchedReadAdvance<1, [WriteImm,WriteI, 225e8d8bef9SDimitry Andric WriteISReg, WriteIEReg,WriteIS, 226e8d8bef9SDimitry Andric WriteID32,WriteID64, 227e8d8bef9SDimitry Andric WriteIM32,WriteIM64]>; 228e8d8bef9SDimitry Andricdef CortexA55ReadNotShifted : SchedReadAdvance<2, [WriteImm,WriteI, 229e8d8bef9SDimitry Andric WriteISReg, WriteIEReg,WriteIS, 230e8d8bef9SDimitry Andric WriteID32,WriteID64, 231e8d8bef9SDimitry Andric WriteIM32,WriteIM64]>; 232e8d8bef9SDimitry Andricdef CortexA55ReadISReg : SchedReadVariant<[ 233e8d8bef9SDimitry Andric SchedVar<RegShiftedPred, [CortexA55ReadShifted]>, 234e8d8bef9SDimitry Andric SchedVar<NoSchedPred, [CortexA55ReadNotShifted]>]>; 235e8d8bef9SDimitry Andricdef : SchedAlias<ReadISReg, CortexA55ReadISReg>; 236e8d8bef9SDimitry Andric 237e8d8bef9SDimitry Andricdef CortexA55ReadIEReg : SchedReadVariant<[ 238e8d8bef9SDimitry Andric SchedVar<RegExtendedPred, [CortexA55ReadShifted]>, 239e8d8bef9SDimitry Andric SchedVar<NoSchedPred, [CortexA55ReadNotShifted]>]>; 240e8d8bef9SDimitry Andricdef : SchedAlias<ReadIEReg, CortexA55ReadIEReg>; 241e8d8bef9SDimitry Andric 242e8d8bef9SDimitry Andric// MUL 243e8d8bef9SDimitry Andricdef : ReadAdvance<ReadIM, 1, [WriteImm,WriteI, 244e8d8bef9SDimitry Andric WriteISReg, WriteIEReg,WriteIS, 245e8d8bef9SDimitry Andric WriteID32,WriteID64, 246e8d8bef9SDimitry Andric WriteIM32,WriteIM64]>; 247e8d8bef9SDimitry Andricdef : ReadAdvance<ReadIMA, 2, [WriteImm,WriteI, 248e8d8bef9SDimitry Andric WriteISReg, WriteIEReg,WriteIS, 249e8d8bef9SDimitry Andric WriteID32,WriteID64, 250e8d8bef9SDimitry Andric WriteIM32,WriteIM64]>; 251e8d8bef9SDimitry Andric 252e8d8bef9SDimitry Andric// Div 253e8d8bef9SDimitry Andricdef : ReadAdvance<ReadID, 1, [WriteImm,WriteI, 254e8d8bef9SDimitry Andric WriteISReg, WriteIEReg,WriteIS, 255e8d8bef9SDimitry Andric WriteID32,WriteID64, 256e8d8bef9SDimitry Andric WriteIM32,WriteIM64]>; 257e8d8bef9SDimitry Andric 258e8d8bef9SDimitry Andric//===----------------------------------------------------------------------===// 259e8d8bef9SDimitry Andric// Subtarget-specific InstRWs. 260e8d8bef9SDimitry Andric 261e8d8bef9SDimitry Andric//--- 262e8d8bef9SDimitry Andric// Miscellaneous 263e8d8bef9SDimitry Andric//--- 26404eeddc0SDimitry Andricdef : InstRW<[CortexA55WriteVLD1SI,CortexA55WriteLDP1], (instregex "LDPS?Wi")>; 26504eeddc0SDimitry Andricdef : InstRW<[CortexA55WriteVLD1,CortexA55WriteLDP1], (instregex "LDPSi")>; 26604eeddc0SDimitry Andricdef : InstRW<[CortexA55WriteVLD1,CortexA55WriteLDP2], (instregex "LDP(X|D)i")>; 26704eeddc0SDimitry Andricdef : InstRW<[CortexA55WriteVLD1,CortexA55WriteLDP4], (instregex "LDPQi")>; 26804eeddc0SDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD1SI,CortexA55WriteLDP1], (instregex "LDPS?W(pre|post)")>; 26904eeddc0SDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD1,CortexA55WriteLDP1], (instregex "LDPS(pre|post)")>; 27004eeddc0SDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD1,CortexA55WriteLDP2], (instregex "LDP(X|D)(pre|post)")>; 27104eeddc0SDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD1,CortexA55WriteLDP4], (instregex "LDPQ(pre|post)")>; 272e8d8bef9SDimitry Andricdef : InstRW<[WriteI], (instrs COPY)>; 273e8d8bef9SDimitry Andric//--- 274e8d8bef9SDimitry Andric// Vector Loads - 64-bit per cycle 275e8d8bef9SDimitry Andric//--- 276e8d8bef9SDimitry Andric// 1-element structures 277e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVLD1], (instregex "LD1i(8|16|32|64)$")>; // single element 278e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVLD1], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; // replicate 279e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d)$")>; 280e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVLD2], (instregex "LD1Onev(16b|8h|4s|2d)$")>; 281e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVLD2], (instregex "LD1Twov(8b|4h|2s|1d)$")>; // multiple structures 282e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVLD4], (instregex "LD1Twov(16b|8h|4s|2d)$")>; 283e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVLD3], (instregex "LD1Threev(8b|4h|2s|1d)$")>; 284e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVLD6], (instregex "LD1Threev(16b|8h|4s|2d)$")>; 285e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVLD4], (instregex "LD1Fourv(8b|4h|2s|1d)$")>; 286e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVLD8], (instregex "LD1Fourv(16b|8h|4s|2d)$")>; 287e8d8bef9SDimitry Andric 288*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD1], (instregex "LD1i(8|16|32|64)_POST$")>; 289*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD1], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 290*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>; 291*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD2], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>; 292*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD2], (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>; 293*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD4], (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>; 294*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD3], (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>; 295*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD6], (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>; 296*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD4], (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>; 297*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD8], (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>; 298e8d8bef9SDimitry Andric 299e8d8bef9SDimitry Andric// 2-element structures 300e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVLD2], (instregex "LD2i(8|16|32|64)$")>; 301e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVLD2], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 302e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVLD2], (instregex "LD2Twov(8b|4h|2s)$")>; 303e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)$")>; 304e8d8bef9SDimitry Andric 305*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD2], (instregex "LD2i(8|16|32|64)(_POST)?$")>; 306*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD2], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>; 307*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD2], (instregex "LD2Twov(8b|4h|2s)(_POST)?$")>; 308*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)(_POST)?$")>; 309e8d8bef9SDimitry Andric 310e8d8bef9SDimitry Andric// 3-element structures 311e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVLD2], (instregex "LD3i(8|16|32|64)$")>; 312e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 313e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVLD3], (instregex "LD3Threev(8b|4h|2s|1d)$")>; 314e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVLD6], (instregex "LD3Threev(16b|8h|4s|2d)$")>; 315e8d8bef9SDimitry Andric 316*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD2], (instregex "LD3i(8|16|32|64)_POST$")>; 317*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 318*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD3], (instregex "LD3Threev(8b|4h|2s|1d)_POST$")>; 319*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD6], (instregex "LD3Threev(16b|8h|4s|2d)_POST$")>; 320e8d8bef9SDimitry Andric 321e8d8bef9SDimitry Andric// 4-element structures 322e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVLD2], (instregex "LD4i(8|16|32|64)$")>; // load single 4-el structure to one lane of 4 regs. 323e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; // load single 4-el structure, replicate to all lanes of 4 regs. 324e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVLD4], (instregex "LD4Fourv(8b|4h|2s|1d)$")>; // load multiple 4-el structures to 4 regs. 325e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVLD8], (instregex "LD4Fourv(16b|8h|4s|2d)$")>; 326e8d8bef9SDimitry Andric 327*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD2], (instregex "LD4i(8|16|32|64)_POST$")>; 328*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 329*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD4], (instregex "LD4Fourv(8b|4h|2s|1d)_POST$")>; 330*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVLD8], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>; 331e8d8bef9SDimitry Andric 332e8d8bef9SDimitry Andric//--- 333e8d8bef9SDimitry Andric// Vector Stores 334e8d8bef9SDimitry Andric//--- 335e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVST1], (instregex "ST1i(8|16|32|64)$")>; 336e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 337e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 338e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 339e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVST4], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 340*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVST1], (instregex "ST1i(8|16|32|64)_POST$")>; 341*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 342*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 343*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 344*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVST4], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 345e8d8bef9SDimitry Andric 346e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVST2], (instregex "ST2i(8|16|32|64)$")>; 347e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVST2], (instregex "ST2Twov(8b|4h|2s)$")>; 348e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVST4], (instregex "ST2Twov(16b|8h|4s|2d)$")>; 349*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVST2], (instregex "ST2i(8|16|32|64)_POST$")>; 350*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVST2], (instregex "ST2Twov(8b|4h|2s)_POST$")>; 351*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVST4], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>; 352e8d8bef9SDimitry Andric 353e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVST2], (instregex "ST3i(8|16|32|64)$")>; 354e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVST4], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 355*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVST2], (instregex "ST3i(8|16|32|64)_POST$")>; 356*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVST4], (instregex "ST3Threev(8b|4h|2s|1d|2d|16b|8h|4s|4d)_POST$")>; 357e8d8bef9SDimitry Andric 358e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVST2], (instregex "ST4i(8|16|32|64)$")>; 359e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteVST4], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 360*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVST2], (instregex "ST4i(8|16|32|64)_POST$")>; 361*5f757f3fSDimitry Andricdef : InstRW<[WriteAdr, CortexA55WriteVST4], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 362e8d8bef9SDimitry Andric 363e8d8bef9SDimitry Andric//--- 364e8d8bef9SDimitry Andric// Floating Point Conversions, MAC, DIV, SQRT 365e8d8bef9SDimitry Andric//--- 366349cc55cSDimitry Andricdef : InstRW<[CortexA55WriteFPALU_F2], (instregex "^DUP(v2i64|v4i32|v8i16|v16i8)")>; 367349cc55cSDimitry Andricdef : InstRW<[CortexA55WriteFPALU_F2], (instregex "^XTN")>; 368e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteFPALU_F3], (instregex "^FCVT[ALMNPZ][SU](S|U)?(W|X)")>; 369e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteFPALU_F4], (instregex "^FCVT(X)?[ALMNPXZ](S|U|N)?v")>; 370e8d8bef9SDimitry Andric 371e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteFPALU_F4], (instregex "^(S|U)CVTF(S|U)(W|X)(H|S|D)")>; 372e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteFPALU_F4], (instregex "^(S|U)CVTF(h|s|d)")>; 373e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteFPALU_F4], (instregex "^(S|U)CVTFv")>; 374e8d8bef9SDimitry Andric 375e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteFMAC], (instregex "^FN?M(ADD|SUB).*")>; 376e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteFMAC], (instregex "^FML(A|S).*")>; 377e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteFDivHP], (instrs FDIVHrr)>; 378e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteFDivSP], (instrs FDIVSrr)>; 379e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteFDivDP], (instrs FDIVDrr)>; 380e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteFDivHP], (instregex "^FDIVv.*16$")>; 381e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteFDivSP], (instregex "^FDIVv.*32$")>; 382e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteFDivDP], (instregex "^FDIVv.*64$")>; 383e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteFSqrtHP], (instregex "^.*SQRT.*16$")>; 384e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteFSqrtSP], (instregex "^.*SQRT.*32$")>; 385e8d8bef9SDimitry Andricdef : InstRW<[CortexA55WriteFSqrtDP], (instregex "^.*SQRT.*64$")>; 386fe6060f1SDimitry Andric 38781ad6265SDimitry Andric// 4.15. Advanced SIMD integer instructions 38881ad6265SDimitry Andric// ASIMD absolute diff 38981ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVd_3], (instregex "[SU]ABDv(2i32|4i16|8i8)")>; 39081ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_3], (instregex "[SU]ABDv(16i8|4i32|8i16)")>; 39181ad6265SDimitry Andric// ASIMD absolute diff accum 39281ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_4], (instregex "[SU]ABAL?v")>; 39381ad6265SDimitry Andric// ASIMD absolute diff long 39481ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_3], (instregex "[SU]ABDLv")>; 39581ad6265SDimitry Andric// ASIMD arith #1 39681ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVd_2], (instregex "(ADD|SUB|NEG)v(1i64|2i32|4i16|8i8)", 39781ad6265SDimitry Andric "[SU]R?HADDv(2i32|4i16|8i8)", "[SU]HSUBv(2i32|4i16|8i8)")>; 39881ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_2], (instregex "(ADD|SUB|NEG)v(2i64|4i32|8i16|16i8)", 39981ad6265SDimitry Andric "[SU]R?HADDv(8i16|4i32|16i8)", "[SU]HSUBv(8i16|4i32|16i8)")>; 40081ad6265SDimitry Andric// ASIMD arith #2 40181ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVd_3], (instregex "ABSv(1i64|2i32|4i16|8i8)$", 40281ad6265SDimitry Andric "[SU]ADDLPv(2i32_v1i64|4i16_v2i32|8i8_v4i16)$", 40381ad6265SDimitry Andric "([SU]QADD|[SU]QSUB|SQNEG|SUQADD|USQADD)v(1i16|1i32|1i64|1i8|2i32|4i16|8i8)$", 40481ad6265SDimitry Andric "ADDPv(2i32|4i16|8i8)$")>; 40581ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_3], (instregex "ABSv(2i64|4i32|8i16|16i8)$", 40681ad6265SDimitry Andric "[SU]ADDLPv(16i8_v8i16|4i32_v2i64|8i16_v4i32)$", 40781ad6265SDimitry Andric "([SU]QADD|[SU]QSUB|SQNEG|SUQADD|USQADD)v(16i8|2i64|4i32|8i16)$", 40881ad6265SDimitry Andric "ADDPv(16i8|2i64|4i32|8i16)$")>; 40981ad6265SDimitry Andric// ASIMD arith #3 41081ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_3], (instregex "SADDLv", "UADDLv", "SADDWv", 41181ad6265SDimitry Andric "UADDWv", "SSUBLv", "USUBLv", "SSUBWv", "USUBWv", "ADDHNv", "SUBHNv")>; 41281ad6265SDimitry Andric// ASIMD arith #5 41381ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_4], (instregex "RADDHNv", "RSUBHNv")>; 41481ad6265SDimitry Andric// ASIMD arith, reduce 41581ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_3], (instregex "ADDVv", "SADDLVv", "UADDLVv")>; 41681ad6265SDimitry Andric// ASIMD compare #1 41781ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVd_2], (instregex "CM(EQ|GE|GT|HI|HS|LE|LT)v(1i64|2i32|4i16|8i8)")>; 41881ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_2], (instregex "CM(EQ|GE|GT|HI|HS|LE|LT)v(2i64|4i32|8i16|16i8)")>; 41981ad6265SDimitry Andric// ASIMD compare #2 42081ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVd_3], (instregex "CMTSTv(1i64|2i32|4i16|8i8)")>; 42181ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_3], (instregex "CMTSTv(2i64|4i32|8i16|16i8)")>; 42281ad6265SDimitry Andric// ASIMD logical $1 42381ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVd_1], (instregex "(AND|EOR|NOT|ORN)v8i8", 42481ad6265SDimitry Andric "(ORR|BIC)v(2i32|4i16|8i8)$", "MVNIv(2i|2s|4i16)")>; 42581ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_1], (instregex "(AND|EOR|NOT|ORN)v16i8", 42681ad6265SDimitry Andric "(ORR|BIC)v(16i8|4i32|8i16)$", "MVNIv(4i32|4s|8i16)")>; 42781ad6265SDimitry Andric// ASIMD max/min, basic 42881ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVd_2], (instregex "[SU](MIN|MAX)P?v(2i32|4i16|8i8)")>; 42981ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_2], (instregex "[SU](MIN|MAX)P?v(16i8|4i132|8i16)")>; 43081ad6265SDimitry Andric// SIMD max/min, reduce 43181ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_4], (instregex "[SU](MAX|MIN)Vv")>; 43281ad6265SDimitry Andric// ASIMD multiply, by element 43381ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_4], (instregex "MULv(2i32|4i16|4i32|8i16)_indexed$", 43481ad6265SDimitry Andric "SQR?DMULHv(1i16|1i32|2i32|4i16|4i32|8i16)_indexed$")>; 43581ad6265SDimitry Andric// ASIMD multiply 43681ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVd_3], (instrs PMULv8i8)>; 43781ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_3], (instrs PMULv16i8)>; 43881ad6265SDimitry Andric// ASIMD multiply accumulate 43981ad6265SDimitry Andricdef : InstRW<[CortexA55WriteMlaVd_4], (instregex "ML[AS]v(2i32|4i16|8i8)$")>; 44081ad6265SDimitry Andricdef : InstRW<[CortexA55WriteMlaVq_4], (instregex "ML[AS]v(16i8|4i32|8i16)$")>; 44181ad6265SDimitry Andricdef : InstRW<[CortexA55WriteMlaIxVq_4], (instregex "ML[AS]v(2i32|4i16|4i32|8i16)_indexed$")>; 44281ad6265SDimitry Andric// ASIMD multiply accumulate half 44381ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_4], (instregex "SQRDML[AS]H[vi]")>; 44481ad6265SDimitry Andric// ASIMD multiply accumulate long 44581ad6265SDimitry Andricdef : InstRW<[CortexA55WriteMlaLVq_4], (instregex "[SU]ML[AS]Lv")>; 44681ad6265SDimitry Andric// ASIMD multiply accumulate long #2 44781ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_4], (instregex "SQDML[AS]L[iv]")>; 44881ad6265SDimitry Andric// ASIMD dot product 44981ad6265SDimitry Andricdef : InstRW<[CortexA55WriteDotVd_4], (instregex "[SU]DOTv8i8")>; 45081ad6265SDimitry Andricdef : InstRW<[CortexA55WriteDotVq_4], (instregex "[SU]DOTv16i8")>; 45181ad6265SDimitry Andric// ASIMD dot product, by scalar 45281ad6265SDimitry Andricdef : InstRW<[CortexA55WriteDotScVq_4], (instregex "[SU]DOTlanev")>; 45381ad6265SDimitry Andric// ASIMD multiply long 45481ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_4], (instregex "[SU]MULLv", "SQDMULL[iv]")>; 45581ad6265SDimitry Andric// ASIMD polynomial (8x8) multiply long 45681ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_3], (instrs PMULLv8i8, PMULLv16i8)>; 45781ad6265SDimitry Andric// ASIMD pairwise add and accumulate 45881ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_4], (instregex "[SU]ADALPv")>; 45981ad6265SDimitry Andric// ASIMD shift accumulate 46081ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVd_3], (instregex "[SU]SRA(d|v2i32|v4i16|v8i8)")>; 46181ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_3], (instregex "[SU]SRAv(16i8|2i64|4i32|8i16)")>; 46281ad6265SDimitry Andric// ASIMD shift accumulate #2 46381ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_4], (instregex "[SU]RSRA[vd]")>; 46481ad6265SDimitry Andric// ASIMD shift by immed 46581ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVd_2], (instregex "SHLd$", "SHLv", 46681ad6265SDimitry Andric "SLId$", "SRId$", "[SU]SHR[vd]", "SHRNv")>; 46781ad6265SDimitry Andric// ASIMD shift by immed 46881ad6265SDimitry Andric// SXTL and UXTL are aliases for SHLL 46981ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_2], (instregex "[US]?SHLLv")>; 47081ad6265SDimitry Andric// ASIMD shift by immed #2 47181ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVd_3], (instregex "[SU]RSHR(d|v2i32|v4i16|v8i8)", 47281ad6265SDimitry Andric "RSHRNv(2i32|4i16|8i8)")>; 47381ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_3], (instregex "[SU]RSHRv(16i8|2i64|4i32|8i16)", 47481ad6265SDimitry Andric "RSHRNv(16i8|4i32|8i16)")>; 47581ad6265SDimitry Andric// ASIMD shift by register 47681ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVd_2], (instregex "[SU]SHLv(1i64|2i32|4i16|8i8)")>; 47781ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_2], (instregex "[SU]SHLv(2i64|4i32|8i16|16i8)")>; 47881ad6265SDimitry Andric// ASIMD shift by register #2 47981ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVd_3], (instregex "[SU]RSHLv(1i64|2i32|4i16|8i8)")>; 48081ad6265SDimitry Andricdef : InstRW<[CortexA55WriteAluVq_3], (instregex "[SU]RSHLv(2i64|4i32|8i16|16i8)")>; 48181ad6265SDimitry Andric 482e8d8bef9SDimitry Andric} 483