1//==- ARMScheduleR52.td - Cortex-R52 Scheduling Definitions -*- tablegen -*-=// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the SchedRead/Write data for the ARM Cortex-R52 processor. 11// 12//===----------------------------------------------------------------------===// 13 14// ===---------------------------------------------------------------------===// 15// The Cortex-R52 is an in-order pipelined superscalar microprocessor with 16// a 8 stage pipeline. It can issue maximum two instructions in each cycle. 17// There are two ALUs, one LDST, one MUL and a non-pipelined integer DIV. 18// A number of forwarding paths enable results of computations to be input 19// to subsequent operations before they are written to registers. 20// This scheduler is a MachineScheduler. See TargetSchedule.td for details. 21 22def CortexR52Model : SchedMachineModel { 23 let MicroOpBufferSize = 0; // R52 is in-order processor 24 let IssueWidth = 2; // 2 micro-ops dispatched per cycle 25 let LoadLatency = 1; // Optimistic, assuming no misses 26 let MispredictPenalty = 8; // A branch direction mispredict, including PFU 27 let CompleteModel = 0; // Covers instructions applicable to cortex-r52. 28} 29 30 31//===----------------------------------------------------------------------===// 32// Define each kind of processor resource and number available. 33 34// Modeling each pipeline as a ProcResource using the BufferSize = 0 since 35// Cortex-R52 is an in-order processor. 36 37def R52UnitALU : ProcResource<2> { let BufferSize = 0; } // Int ALU 38def R52UnitMAC : ProcResource<1> { let BufferSize = 0; } // Int MAC 39def R52UnitDiv : ProcResource<1> { let BufferSize = 0; } // Int Division 40def R52UnitLd : ProcResource<1> { let BufferSize = 0; } // Load/Store 41def R52UnitB : ProcResource<1> { let BufferSize = 0; } // Branch 42def R52UnitFPALU : ProcResource<2> { let BufferSize = 0; } // FP ALU 43def R52UnitFPMUL : ProcResource<2> { let BufferSize = 0; } // FP MUL 44def R52UnitFPDIV : ProcResource<1> { let BufferSize = 0; } // FP DIV 45 46// Cortex-R52 specific SchedReads 47def R52Read_ISS : SchedRead; 48def R52Read_EX1 : SchedRead; 49def R52Read_EX2 : SchedRead; 50def R52Read_WRI : SchedRead; 51def R52Read_F0 : SchedRead; // F0 maps to ISS stage of integer pipe 52def R52Read_F1 : SchedRead; 53def R52Read_F2 : SchedRead; 54 55 56//===----------------------------------------------------------------------===// 57// Subtarget-specific SchedWrite types which map ProcResources and set latency. 58 59let SchedModel = CortexR52Model in { 60 61// ALU - Write occurs in Late EX2 (independent of whether shift was required) 62def : WriteRes<WriteALU, [R52UnitALU]> { let Latency = 3; } 63def : WriteRes<WriteALUsi, [R52UnitALU]> { let Latency = 3; } 64def : WriteRes<WriteALUsr, [R52UnitALU]> { let Latency = 3; } 65def : WriteRes<WriteALUSsr, [R52UnitALU]> { let Latency = 3; } 66 67// Compares 68def : WriteRes<WriteCMP, [R52UnitALU]> { let Latency = 0; } 69def : WriteRes<WriteCMPsi, [R52UnitALU]> { let Latency = 0; } 70def : WriteRes<WriteCMPsr, [R52UnitALU]> { let Latency = 0; } 71 72// Multiply - aliased to sub-target specific later 73 74// Div - may stall 0-9 cycles depending on input (i.e. WRI+(0-9)/2) 75def : WriteRes<WriteDIV, [R52UnitDiv]> { 76 let Latency = 8; let ResourceCycles = [8]; // non-pipelined 77} 78 79// Branches - LR written in Late EX2 80def : WriteRes<WriteBr, [R52UnitB]> { let Latency = 0; } 81def : WriteRes<WriteBrL, [R52UnitB]> { let Latency = 0; } 82def : WriteRes<WriteBrTbl, [R52UnitALU]> { let Latency = 0; } 83 84// Misc 85def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; } 86 87// Integer pipeline by-passes 88def : ReadAdvance<ReadALU, 1>; // Operand needed in EX1 stage 89def : ReadAdvance<ReadALUsr, 0>; // Shift operands needed in ISS 90def : ReadAdvance<ReadMUL, 0>; 91def : ReadAdvance<ReadMAC, 0>; 92 93// Floating-point. Map target-defined SchedReadWrites to subtarget 94def : WriteRes<WriteFPMUL32, [R52UnitFPMUL]> { let Latency = 6; } 95 96def : WriteRes<WriteFPMUL64, [R52UnitFPMUL, R52UnitFPMUL]> { 97 let Latency = 6; 98} 99 100def : WriteRes<WriteFPMAC32, [R52UnitFPMUL, R52UnitFPALU]> { 101 let Latency = 11; // as it is internally two insns (MUL then ADD) 102} 103 104def : WriteRes<WriteFPMAC64, [R52UnitFPMUL, R52UnitFPMUL, 105 R52UnitFPALU, R52UnitFPALU]> { 106 let Latency = 11; 107} 108 109def : WriteRes<WriteFPDIV32, [R52UnitDiv]> { 110 let Latency = 7; // FP div takes fixed #cycles 111 let ResourceCycles = [7]; // is not pipelined 112} 113 114def : WriteRes<WriteFPDIV64, [R52UnitDiv]> { 115 let Latency = 17; 116 let ResourceCycles = [17]; 117} 118 119def : WriteRes<WriteFPSQRT32, [R52UnitDiv]> { let Latency = 7; } 120def : WriteRes<WriteFPSQRT64, [R52UnitDiv]> { let Latency = 17; } 121 122// Overriden via InstRW for this processor. 123def : WriteRes<WriteVST1, []>; 124def : WriteRes<WriteVST2, []>; 125def : WriteRes<WriteVST3, []>; 126def : WriteRes<WriteVST4, []>; 127 128def : ReadAdvance<ReadFPMUL, 1>; // mul operand read in F1 129def : ReadAdvance<ReadFPMAC, 1>; // fp-mac operand read in F1 130 131//===----------------------------------------------------------------------===// 132// Subtarget-specific SchedReadWrites. 133 134// Forwarding information - based on when an operand is read 135def : ReadAdvance<R52Read_ISS, 0>; 136def : ReadAdvance<R52Read_EX1, 1>; 137def : ReadAdvance<R52Read_EX2, 2>; 138def : ReadAdvance<R52Read_F0, 0>; 139def : ReadAdvance<R52Read_F1, 1>; 140def : ReadAdvance<R52Read_F2, 2>; 141 142 143// Cortex-R52 specific SchedWrites for use with InstRW 144def R52WriteMAC : SchedWriteRes<[R52UnitMAC]> { let Latency = 4; } 145def R52WriteMACHi : SchedWriteRes<[R52UnitMAC]> { 146 let Latency = 4; let NumMicroOps = 0; 147} 148def R52WriteDIV : SchedWriteRes<[R52UnitDiv]> { 149 let Latency = 8; let ResourceCycles = [8]; // not pipelined 150} 151def R52WriteLd : SchedWriteRes<[R52UnitLd]> { let Latency = 4; } 152def R52WriteST : SchedWriteRes<[R52UnitLd]> { let Latency = 4; } 153def R52WriteAdr : SchedWriteRes<[]> { let Latency = 0; } 154def R52WriteCC : SchedWriteRes<[]> { let Latency = 0; } 155def R52WriteALU_EX1 : SchedWriteRes<[R52UnitALU]> { let Latency = 2; } 156def R52WriteALU_EX2 : SchedWriteRes<[R52UnitALU]> { let Latency = 3; } 157def R52WriteALU_WRI : SchedWriteRes<[R52UnitALU]> { let Latency = 4; } 158 159def R52WriteNoRSRC_EX2 : SchedWriteRes<[]> { let Latency = 3; } 160def R52WriteNoRSRC_WRI : SchedWriteRes<[]> { let Latency = 4; } 161 162// Alias generics to sub-target specific 163def : SchedAlias<WriteMUL16, R52WriteMAC>; 164def : SchedAlias<WriteMUL32, R52WriteMAC>; 165def : SchedAlias<WriteMUL64Lo, R52WriteMAC>; 166def : SchedAlias<WriteMUL64Hi, R52WriteMACHi>; 167def : SchedAlias<WriteMAC16, R52WriteMAC>; 168def : SchedAlias<WriteMAC32, R52WriteMAC>; 169def : SchedAlias<WriteMAC64Lo, R52WriteMAC>; 170def : SchedAlias<WriteMAC64Hi, R52WriteMACHi>; 171def : SchedAlias<WritePreLd, R52WriteLd>; 172def : SchedAlias<WriteLd, R52WriteLd>; 173def : SchedAlias<WriteST, R52WriteST>; 174 175def R52WriteFPALU_F3 : SchedWriteRes<[R52UnitFPALU]> { let Latency = 4; } 176def R52Write2FPALU_F3 : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> { 177 let Latency = 4; 178} 179def R52WriteFPALU_F4 : SchedWriteRes<[R52UnitFPALU]> { let Latency = 5; } 180def R52Write2FPALU_F4 : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> { 181 let Latency = 5; 182} 183def R52WriteFPALU_F5 : SchedWriteRes<[R52UnitFPALU]> { let Latency = 6; } 184def R52Write2FPALU_F5 : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> { 185 let Latency = 6; 186} 187def R52WriteFPMUL_F5 : SchedWriteRes<[R52UnitFPMUL]> { let Latency = 6; } 188def R52Write2FPMUL_F5 : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL]> { 189 let Latency = 6; 190} 191def R52WriteFPMAC_F5 : SchedWriteRes<[R52UnitFPMUL, R52UnitFPALU]> { 192 let Latency = 11; // as it is internally two insns (MUL then ADD) 193} 194def R52Write2FPMAC_F5 : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL, 195 R52UnitFPALU, R52UnitFPALU]> { 196 let Latency = 11; 197} 198 199def R52WriteFPLd_F4 : SchedWriteRes<[R52UnitLd]> { let Latency = 5; } 200def R52WriteFPST_F4 : SchedWriteRes<[R52UnitLd]> { let Latency = 5; } 201 202//===----------------------------------------------------------------------===// 203// Floating-point. Map target defined SchedReadWrites to processor specific ones 204// 205def : SchedAlias<WriteFPCVT, R52WriteFPALU_F5>; 206def : SchedAlias<WriteFPMOV, R52WriteFPALU_F3>; 207def : SchedAlias<WriteFPALU32, R52WriteFPALU_F5>; 208def : SchedAlias<WriteFPALU64, R52WriteFPALU_F5>; 209 210//===----------------------------------------------------------------------===// 211// Subtarget-specific overrides. Map opcodes to list of SchedReadWrites types. 212// 213def : InstRW<[WriteALU], (instrs COPY)>; 214 215def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS], 216 (instregex "SXTB", "SXTH", "SXTB16", "UXTB", "UXTH", "UXTB16", 217 "t2SXTB", "t2SXTH", "t2SXTB16", "t2UXTB", "t2UXTH", "t2UXTB16")>; 218 219def : InstRW<[R52WriteALU_EX1, R52Read_ISS], 220 (instregex "MOVCCi32imm", "MOVi32imm", "t2MOVCCi", "t2MOVi")>; 221def : InstRW<[R52WriteALU_EX2, R52Read_EX1], 222 (instregex "MOV_ga_pcrel$")>; 223def : InstRW<[R52WriteLd,R52Read_ISS], 224 (instregex "MOV_ga_pcrel_ldr")>; 225 226def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], (instregex "SEL", "t2SEL")>; 227 228def : InstRW< [R52WriteALU_EX2, R52Read_ISS, R52Read_ISS], 229 (instregex "BFC", "BFI", "UBFX", "SBFX", "(t|t2)BFC", "(t|t2)BFI", 230 "(t|t2)UBFX", "(t|t2)SBFX")>; 231 232// Saturating arithmetic 233def : InstRW< [R52WriteALU_WRI, R52Read_EX1, R52Read_EX1], 234 (instregex "QADD", "QSUB", "QDADD", "QDSUB", "SSAT", "SSAT16", "USAT", 235 "QADD8", "QADD16", "QSUB8", "QSUB16", "QASX", "QSAX", 236 "UQADD8", "UQADD16","UQSUB8","UQSUB16","UQASX","UQSAX", "t2QADD", 237 "t2QSUB", "t2QDADD", "t2QDSUB", "t2SSAT", "t2SSAT16", "t2USAT", 238 "t2QADD8", "t2QADD16", "t2QSUB8", "t2QSUB16", "t2QASX", "t2QSAX", 239 "t2UQADD8", "t2UQADD16","t2UQSUB8","t2UQSUB16","t2UQASX","t2UQSAX","t2ABS")>; 240 241// Parallel arithmetic 242def : InstRW< [R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], 243 (instregex "SADD8", "SADD16", "SSUB8", "SSUB16", "SASX", "SSAX", 244 "UADD8", "UADD16", "USUB8", "USUB16", "UASX", "USAX", "t2SADD8", 245 "t2SADD16", "t2SSUB8", "t2SSUB16", "t2SASX", "t2SSAX", "t2UADD8", 246 "t2UADD16", "t2USUB8", "t2USUB16", "t2UASX", "t2USAX")>; 247 248// Flag setting. 249def : InstRW< [R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], 250 (instregex "SHADD8", "SHADD16", "SHSUB8", "SHSUB16", "SHASX", "SHSAX", 251 "SXTAB", "SXTAB16", "SXTAH", "UHADD8", "UHADD16", "UHSUB8", "UHSUB16", 252 "UHASX", "UHSAX", "UXTAB", "UXTAB16", "UXTAH", "t2SHADD8", "t2SHADD16", 253 "t2SHSUB8", "t2SHSUB16", "t2SHASX", "t2SHSAX", "t2SXTAB", "t2SXTAB16", 254 "t2SXTAH", "t2UHADD8", "t2UHADD16", "t2UHSUB8", "t2UHSUB16", "t2UHASX", 255 "t2UHSAX", "t2UXTAB", "t2UXTAB16", "t2UXTAH")>; 256 257// Sum of Absolute Difference 258def : InstRW< [R52WriteALU_WRI, R52Read_ISS, R52Read_ISS, R52Read_ISS], 259 (instregex "USAD8", "t2USAD8", "USADA8", "t2USADA8") >; 260 261// Integer Multiply 262def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS], 263 (instregex "MUL", "SMMUL", "SMMULR", "SMULBB", "SMULBT", 264 "SMULTB", "SMULTT", "SMULWB", "SMULWT", "SMUSD", "SMUSDX", "t2MUL", 265 "t2SMMUL", "t2SMMULR", "t2SMULBB", "t2SMULBT", "t2SMULTB", "t2SMULTT", 266 "t2SMULWB", "t2SMULWT", "t2SMUSD")>; 267 268// Multiply Accumulate 269// Even for 64-bit accumulation (or Long), the single MAC is used (not ALUs). 270// The store pipeline is used partly for 64-bit operations. 271def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS, R52Read_ISS], 272 (instregex "MLA", "MLS", "SMMLA", "SMMLAR", "SMMLS", "SMMLSR", 273 "t2MLA", "t2MLS", "t2SMMLA", "t2SMMLAR", "t2SMMLS", "t2SMMLSR", 274 "SMUAD", "SMUADX", "t2SMUAD", "t2SMUADX", 275 "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLSD", "SMLSDX", 276 "SMLAWB", "SMLAWT", "t2SMLABB", "t2SMLABT", "t2SMLATB", "t2SMLATT", 277 "t2SMLSD", "t2SMLSDX", "t2SMLAWB", "t2SMLAWT", 278 "SMLAD", "SMLADX", "t2SMLAD", "t2SMLADX", 279 "SMULL$", "UMULL$", "t2SMULL$", "t2UMULL$", 280 "SMLAL", "UMLAL", "SMLALBT", 281 "SMLALTB", "SMLALTT", "SMLALD", "SMLALDX", "SMLSLD", "SMLSLDX", 282 "UMAAL", "t2SMLAL", "t2UMLAL", 283 "t2SMLALBT", "t2SMLALTB", "t2SMLALTT", "t2SMLALD", "t2SMLALDX", 284 "t2SMLSLD", "t2SMLSLDX", "t2UMAAL")>; 285 286def : InstRW <[R52WriteDIV, R52Read_ISS, R52Read_ISS], 287 (instregex "t2SDIV", "t2UDIV")>; 288 289// Loads (except POST) with SHL > 2, or ror, require 2 extra cycles. 290// However, that's non-trivial to specify, so we keep it uniform 291def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_ISS], 292 (instregex "LDR(i12|rs)$", "LDRB(i12|rs)$", "t2LDR(i8|i12|s|pci)", 293 "t2LDR(H|B)(i8|i12|s|pci)", "LDREX", "t2LDREX", 294 "tLDR[BH](r|i|spi|pci|pciASM)", "tLDR(r|i|spi|pci|pciASM)", 295 "LDRH$", "PICLDR$", "PICLDR(H|B)$", "LDRcp$", 296 "PICLDRS(H|B)$", "t2LDRS(H|B)(i|r|p|s)", "LDRS(H|B)$", 297 "t2LDRpci_pic", "tLDRS(B|H)", "t2LDRDi8", "LDRD$", "LDA", "t2LDA")>; 298def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_ISS], 299 (instregex "LD(RB|R)(_|T_)(POST|PRE)_(IMM|REG)", "LDRH(_PRE|_POST)", 300 "LDRBT_POST$", "LDR(T|BT)_POST_(REG|IMM)", "LDRHT(i|r)", 301 "t2LD(R|RB|RH)_(PRE|POST)", "t2LD(R|RB|RH)T", 302 "LDR(SH|SB)(_POST|_PRE)", "t2LDR(SH|SB)(_POST|_PRE)", 303 "LDRS(B|H)T(i|r)", "t2LDRS(B|H)T(i|r)?", 304 "LDRD_(POST|PRE)", "t2LDRD_(POST|PRE)")>; 305 306def : InstRW<[R52WriteALU_EX2, R52Read_EX1], (instregex "MOVS?sr", "t2MOVS?sr")>; 307def : InstRW<[R52WriteALU_WRI, R52Read_EX2], (instregex "MOVT", "t2MOVT")>; 308 309def : InstRW<[R52WriteALU_EX2, R52Read_EX1], (instregex "AD(C|D)S?ri", "ANDS?ri", 310 "BICS?ri", "CLZ", "EORri", "MVNS?r", "ORRri", "RSBS?ri", "RSCri", "SBCri", 311 "t2AD(C|D)S?ri", "t2ANDS?ri", "t2BICS?ri","t2CLZ", "t2EORri", "t2MVN", 312 "t2ORRri", "t2RSBS?ri", "t2SBCri")>; 313 314def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], (instregex "AD(C|D)S?rr", 315 "ANDS?rr", "BICS?rr", "CRC", "EORrr", "ORRrr", "RSBrr", "RSCrr", "SBCrr", 316 "t2AD(C|D)S?rr", "t2ANDS?rr", "t2BICS?rr", "t2CRC", "t2EORrr", "t2SBCrr")>; 317 318def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS], (instregex "AD(C|D)S?rsi", 319 "ANDS?rsi", "BICS?rsi", "EORrsi", "ORRrsi", "RSBrsi", "RSCrsi", "SBCrsi", 320 "t2AD(C|D)S?rs", "t2ANDS?rs", "t2BICS?rs", "t2EORrs", "t2ORRrs", "t2RSBrs", "t2SBCrs")>; 321 322def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS, R52Read_ISS], 323 (instregex "AD(C|D)S?rsr", "ANDS?rsr", "BICS?rsr", "EORrsr", "MVNS?sr", 324 "ORRrsr", "RSBrsr", "RSCrsr", "SBCrsr")>; 325 326def : InstRW<[R52WriteALU_EX1], 327 (instregex "ADR", "MOVsi", "MVNS?s?i", "t2MOVS?si")>; 328 329def : InstRW<[R52WriteALU_EX1, R52Read_ISS], (instregex "ASRi", "RORS?i")>; 330def : InstRW<[R52WriteALU_EX1, R52Read_ISS, R52Read_ISS], 331 (instregex "ASRr", "RORS?r", "LSR", "LSL")>; 332 333def : InstRW<[R52WriteCC, R52Read_EX1], (instregex "CMPri", "CMNri")>; 334def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_EX1], (instregex "CMPrr", "CMNzrr")>; 335def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_ISS], (instregex "CMPrsi", "CMNzrsi")>; 336def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_ISS, R52Read_ISS], (instregex "CMPrsr", "CMNzrsr")>; 337 338def : InstRW<[R52WriteALU_EX2, R52Read_ISS], 339 (instregex "t2LDC", "RBIT", "REV", "REV16", "REVSH", "RRX")>; 340 341def : InstRW<[R52WriteCC, R52Read_ISS], (instregex "TST")>; 342 343def : InstRW<[R52WriteLd], (instregex "MRS", "MRSbanked")>; 344def : InstRW<[R52WriteLd, R52Read_EX1], (instregex "MSR", "MSRbanked")>; 345 346// Integer Load, Multiple. 347foreach Lat = 3-25 in { 348 def R52WriteILDM#Lat#Cy : SchedWriteRes<[R52UnitLd]> { 349 let Latency = Lat; 350 } 351 def R52WriteILDM#Lat#CyNo : SchedWriteRes<[]> { 352 let Latency = Lat; 353 let NumMicroOps = 0; 354 } 355} 356foreach NAddr = 1-16 in { 357 def R52ILDMAddr#NAddr#Pred : SchedPredicate<"TII->getNumLDMAddresses(*MI) == "#NAddr>; 358} 359def R52WriteILDMAddrNoWB : SchedWriteRes<[R52UnitLd]> { let Latency = 0; } 360def R52WriteILDMAddrWB : SchedWriteRes<[R52UnitLd]>; 361def R52WriteILDM : SchedWriteVariant<[ 362 SchedVar<R52ILDMAddr2Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy]>, 363 364 SchedVar<R52ILDMAddr3Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy, 365 R52WriteILDM6Cy]>, 366 SchedVar<R52ILDMAddr4Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy, 367 R52WriteILDM6Cy, R52WriteILDM7Cy]>, 368 369 SchedVar<R52ILDMAddr5Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy, 370 R52WriteILDM6Cy, R52WriteILDM7Cy, 371 R52WriteILDM8Cy]>, 372 SchedVar<R52ILDMAddr6Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy, 373 R52WriteILDM6Cy, R52WriteILDM7Cy, 374 R52WriteILDM8Cy, R52WriteILDM9Cy]>, 375 376 SchedVar<R52ILDMAddr7Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy, 377 R52WriteILDM6Cy, R52WriteILDM7Cy, 378 R52WriteILDM8Cy, R52WriteILDM9Cy, 379 R52WriteILDM10Cy]>, 380 SchedVar<R52ILDMAddr8Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy, 381 R52WriteILDM6Cy, R52WriteILDM7Cy, 382 R52WriteILDM8Cy, R52WriteILDM9Cy, 383 R52WriteILDM10Cy, R52WriteILDM11Cy]>, 384 385 SchedVar<R52ILDMAddr9Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy, 386 R52WriteILDM6Cy, R52WriteILDM7Cy, 387 R52WriteILDM8Cy, R52WriteILDM9Cy, 388 R52WriteILDM10Cy, R52WriteILDM11Cy, 389 R52WriteILDM12Cy]>, 390 SchedVar<R52ILDMAddr10Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy, 391 R52WriteILDM6Cy, R52WriteILDM7Cy, 392 R52WriteILDM8Cy, R52WriteILDM9Cy, 393 R52WriteILDM10Cy, R52WriteILDM11Cy, 394 R52WriteILDM12Cy, R52WriteILDM13Cy]>, 395 396 SchedVar<R52ILDMAddr11Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy, 397 R52WriteILDM6Cy, R52WriteILDM7Cy, 398 R52WriteILDM8Cy, R52WriteILDM9Cy, 399 R52WriteILDM10Cy, R52WriteILDM11Cy, 400 R52WriteILDM12Cy, R52WriteILDM13Cy, 401 R52WriteILDM14Cy]>, 402 SchedVar<R52ILDMAddr12Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy, 403 R52WriteILDM6Cy, R52WriteILDM7Cy, 404 R52WriteILDM8Cy, R52WriteILDM9Cy, 405 R52WriteILDM10Cy, R52WriteILDM11Cy, 406 R52WriteILDM12Cy, R52WriteILDM13Cy, 407 R52WriteILDM14Cy, R52WriteILDM15Cy]>, 408 409 SchedVar<R52ILDMAddr13Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy, 410 R52WriteILDM6Cy, R52WriteILDM7Cy, 411 R52WriteILDM8Cy, R52WriteILDM9Cy, 412 R52WriteILDM10Cy, R52WriteILDM11Cy, 413 R52WriteILDM12Cy, R52WriteILDM13Cy, 414 R52WriteILDM14Cy, R52WriteILDM15Cy, 415 R52WriteILDM16Cy]>, 416 SchedVar<R52ILDMAddr14Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy, 417 R52WriteILDM6Cy, R52WriteILDM7Cy, 418 R52WriteILDM8Cy, R52WriteILDM9Cy, 419 R52WriteILDM10Cy, R52WriteILDM11Cy, 420 R52WriteILDM12Cy, R52WriteILDM13Cy, 421 R52WriteILDM14Cy, R52WriteILDM15Cy, 422 R52WriteILDM16Cy, R52WriteILDM17Cy]>, 423 424 SchedVar<R52ILDMAddr15Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy, 425 R52WriteILDM6Cy, R52WriteILDM7Cy, 426 R52WriteILDM8Cy, R52WriteILDM9Cy, 427 R52WriteILDM10Cy, R52WriteILDM11Cy, 428 R52WriteILDM12Cy, R52WriteILDM13Cy, 429 R52WriteILDM14Cy, R52WriteILDM15Cy, 430 R52WriteILDM16Cy, R52WriteILDM17Cy, 431 R52WriteILDM18Cy]>, 432 SchedVar<R52ILDMAddr15Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy, 433 R52WriteILDM6Cy, R52WriteILDM7Cy, 434 R52WriteILDM8Cy, R52WriteILDM9Cy, 435 R52WriteILDM10Cy, R52WriteILDM11Cy, 436 R52WriteILDM12Cy, R52WriteILDM13Cy, 437 R52WriteILDM14Cy, R52WriteILDM15Cy, 438 R52WriteILDM16Cy, R52WriteILDM17Cy, 439 R52WriteILDM18Cy, R52WriteILDM19Cy]>, 440 441// Unknown number of registers, just use resources for two registers. 442 SchedVar<NoSchedPred, [R52WriteILDM4Cy, R52WriteILDM5Cy, 443 R52WriteILDM6CyNo, R52WriteILDM7CyNo, 444 R52WriteILDM8CyNo, R52WriteILDM9CyNo, 445 R52WriteILDM10CyNo, R52WriteILDM11CyNo, 446 R52WriteILDM12CyNo, R52WriteILDM13CyNo, 447 R52WriteILDM14CyNo, R52WriteILDM15CyNo, 448 R52WriteILDM16CyNo, R52WriteILDM17CyNo, 449 R52WriteILDM18Cy, R52WriteILDM19Cy]> 450]> { let Variadic=1; } 451 452// Integer Store, Multiple 453def R52WriteIStIncAddr : SchedWriteRes<[R52UnitLd]> { 454 let Latency = 4; 455 let NumMicroOps = 2; 456} 457foreach NumAddr = 1-16 in { 458 def R52WriteISTM#NumAddr : WriteSequence<[R52WriteIStIncAddr], NumAddr>; 459} 460def R52WriteISTM : SchedWriteVariant<[ 461 SchedVar<R52ILDMAddr2Pred, [R52WriteISTM2]>, 462 SchedVar<R52ILDMAddr3Pred, [R52WriteISTM3]>, 463 SchedVar<R52ILDMAddr4Pred, [R52WriteISTM4]>, 464 SchedVar<R52ILDMAddr5Pred, [R52WriteISTM5]>, 465 SchedVar<R52ILDMAddr6Pred, [R52WriteISTM6]>, 466 SchedVar<R52ILDMAddr7Pred, [R52WriteISTM7]>, 467 SchedVar<R52ILDMAddr8Pred, [R52WriteISTM8]>, 468 SchedVar<R52ILDMAddr9Pred, [R52WriteISTM9]>, 469 SchedVar<R52ILDMAddr10Pred,[R52WriteISTM10]>, 470 SchedVar<R52ILDMAddr11Pred,[R52WriteISTM11]>, 471 SchedVar<R52ILDMAddr12Pred,[R52WriteISTM12]>, 472 SchedVar<R52ILDMAddr13Pred,[R52WriteISTM13]>, 473 SchedVar<R52ILDMAddr14Pred,[R52WriteISTM14]>, 474 SchedVar<R52ILDMAddr15Pred,[R52WriteISTM15]>, 475 SchedVar<R52ILDMAddr16Pred,[R52WriteISTM16]>, 476 // Unknow number of registers, just use resources for two registers. 477 SchedVar<NoSchedPred, [R52WriteISTM2]> 478]>; 479 480def : InstRW<[R52WriteILDM, R52Read_ISS], 481 (instregex "LDM(IA|DA|DB|IB)$", "t2LDM(IA|DA|DB|IB)$", 482 "(t|sys)LDM(IA|DA|DB|IB)$")>; 483def : InstRW<[R52WriteILDM, R52WriteAdr, R52Read_ISS], 484 (instregex "LDM(IA|DA|DB|IB)_UPD", "(t2|sys|t)LDM(IA|DA|DB|IB)_UPD")>; 485def : InstRW<[R52WriteILDM, R52WriteAdr, R52Read_ISS], 486 (instregex "LDMIA_RET", "(t|t2)LDMIA_RET", "tPOP")>; 487 488// Integer Store, Single Element 489def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_EX2], 490 (instregex "PICSTR", "STR(i12|rs)", "STRB(i12|rs)", "STRH$", "STREX", "SRS", "t2SRS", 491 "t2SRSDB", "t2STREX", "t2STREXB", "t2STREXD", "t2STREXH", "t2STR(i12|i8|s)$", 492 "RFE", "t2RFE", "t2STR[BH](i12|i8|s)$", "tSTR[BH](i|r)", "tSTR(i|r)", "tSTRspi")>; 493 494def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_EX2], 495 (instregex "STR(B_|_|BT_|T_)(PRE_IMM|PRE_REG|POST_REG|POST_IMM)", 496 "STR(i|r)_preidx", "STRB(i|r)_preidx", "STRH_preidx", "STR(H_|HT_)(PRE|POST)", 497 "STR(BT|HT|T)", "t2STR_(PRE|POST)", "t2STR[BH]_(PRE|POST)", 498 "t2STR_preidx", "t2STR[BH]_preidx", "t2ST(RB|RH|R)T")>; 499 500// Integer Store, Dual 501def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_EX2], 502 (instregex "STRD$", "t2STRDi8", "STL", "t2STL")>; 503def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_EX2], 504 (instregex "(t2|t)STRD_(POST|PRE)", "STRD_(POST|PRE)")>; 505 506def : InstRW<[R52WriteISTM, R52Read_ISS, R52Read_EX2], 507 (instregex "STM(IB|IA|DB|DA)$", "(t2|sys|t)STM(IB|IA|DB|DA)$")>; 508def : InstRW<[R52WriteISTM, R52WriteAdr, R52Read_ISS, R52Read_EX2], 509 (instregex "STM(IB|IA|DB|DA)_UPD", "(t2|sys|t)STM(IB|IA|DB|DA)_UPD", 510 "tPUSH")>; 511 512// LDRLIT pseudo instructions, they expand to LDR + PICADD 513def : InstRW<[R52WriteLd], 514 (instregex "t?LDRLIT_ga_abs", "t?LDRLIT_ga_pcrel$")>; 515// LDRLIT_ga_pcrel_ldr expands to LDR + PICLDR 516def : InstRW<[R52WriteLd], (instregex "LDRLIT_ga_pcrel_ldr")>; 517 518 519 520//===----------------------------------------------------------------------===// 521// VFP, Floating Point Support 522def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "VABD(fd|hd)")>; 523def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "VABD(fq|hq)")>; 524 525def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VABS(D|S|H)")>; 526def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VABS(fd|hd)")>; 527def : InstRW<[R52Write2FPALU_F5, R52Read_F1], (instregex "VABS(fq|hq)")>; 528 529def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VACGT)(fd|hd)")>; 530def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VACGT)(fq|hq)")>; 531 532def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(D|S|H|fd|hd)$")>; 533def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(fq|hq)")>; 534 535def : InstRW<[R52WriteFPLd_F4, R52Read_ISS, R52Read_F1], (instregex "VLDR")>; 536def : InstRW<[R52WriteFPST_F4, R52Read_ISS, R52Read_F1], (instregex "VSTR")>; 537 538 539//===----------------------------------------------------------------------===// 540// Neon Support 541 542// vector multiple load stores 543foreach NumAddr = 1-16 in { 544 def R52LMAddrPred#NumAddr : 545 SchedPredicate<"MI->getNumOperands() == "#NumAddr>; 546} 547foreach Lat = 1-32 in { 548 def R52WriteLM#Lat#Cy : SchedWriteRes<[]> { 549 let Latency = Lat; 550 } 551} 552foreach Num = 1-32 in { // reserve LdSt resource, no dual-issue 553 def R52ReserveLd#Num#Cy : SchedWriteRes<[R52UnitLd]> { 554 let Latency = 0; 555 let NumMicroOps = Num; 556 let ResourceCycles = [Num]; 557 } 558} 559def R52WriteVLDM : SchedWriteVariant<[ 560 // 1 D reg 561 SchedVar<R52LMAddrPred1, [R52WriteLM5Cy, 562 R52ReserveLd5Cy]>, 563 SchedVar<R52LMAddrPred2, [R52WriteLM5Cy, 564 R52ReserveLd5Cy]>, 565 566 // 2 D reg 567 SchedVar<R52LMAddrPred3, [R52WriteLM5Cy, R52WriteLM6Cy, 568 R52ReserveLd6Cy]>, 569 SchedVar<R52LMAddrPred4, [R52WriteLM5Cy, R52WriteLM6Cy, 570 R52ReserveLd6Cy]>, 571 572 // 3 D reg 573 SchedVar<R52LMAddrPred5, [R52WriteLM5Cy, R52WriteLM6Cy, 574 R52WriteLM7Cy, 575 R52ReserveLd4Cy]>, 576 SchedVar<R52LMAddrPred6, [R52WriteLM5Cy, R52WriteLM6Cy, 577 R52WriteLM7Cy, 578 R52ReserveLd7Cy]>, 579 580 // 4 D reg 581 SchedVar<R52LMAddrPred7, [R52WriteLM5Cy, R52WriteLM6Cy, 582 R52WriteLM7Cy, R52WriteLM8Cy, 583 R52ReserveLd8Cy]>, 584 SchedVar<R52LMAddrPred8, [R52WriteLM5Cy, R52WriteLM6Cy, 585 R52WriteLM7Cy, R52WriteLM8Cy, 586 R52ReserveLd8Cy]>, 587 588 // 5 D reg 589 SchedVar<R52LMAddrPred9, [R52WriteLM5Cy, R52WriteLM6Cy, 590 R52WriteLM7Cy, R52WriteLM8Cy, 591 R52WriteLM9Cy, 592 R52ReserveLd9Cy]>, 593 SchedVar<R52LMAddrPred10, [R52WriteLM5Cy, R52WriteLM6Cy, 594 R52WriteLM7Cy, R52WriteLM8Cy, 595 R52WriteLM9Cy, 596 R52ReserveLd9Cy]>, 597 598 // 6 D reg 599 SchedVar<R52LMAddrPred11, [R52WriteLM5Cy, R52WriteLM6Cy, 600 R52WriteLM7Cy, R52WriteLM8Cy, 601 R52WriteLM9Cy, R52WriteLM10Cy, 602 R52ReserveLd10Cy]>, 603 SchedVar<R52LMAddrPred12, [R52WriteLM5Cy, R52WriteLM6Cy, 604 R52WriteLM7Cy, R52WriteLM8Cy, 605 R52WriteLM9Cy, R52WriteLM10Cy, 606 R52ReserveLd10Cy]>, 607 608 // 7 D reg 609 SchedVar<R52LMAddrPred13, [R52WriteLM5Cy, R52WriteLM6Cy, 610 R52WriteLM7Cy, R52WriteLM8Cy, 611 R52WriteLM9Cy, R52WriteLM10Cy, 612 R52WriteLM11Cy, 613 R52ReserveLd11Cy]>, 614 SchedVar<R52LMAddrPred14, [R52WriteLM5Cy, R52WriteLM6Cy, 615 R52WriteLM7Cy, R52WriteLM8Cy, 616 R52WriteLM9Cy, R52WriteLM10Cy, 617 R52WriteLM11Cy, 618 R52ReserveLd11Cy]>, 619 620 // 8 D reg 621 SchedVar<R52LMAddrPred14, [R52WriteLM5Cy, R52WriteLM6Cy, 622 R52WriteLM7Cy, R52WriteLM8Cy, 623 R52WriteLM9Cy, R52WriteLM10Cy, 624 R52WriteLM11Cy, R52WriteLM12Cy, 625 R52ReserveLd12Cy]>, 626 SchedVar<R52LMAddrPred15, [R52WriteLM5Cy, R52WriteLM6Cy, 627 R52WriteLM7Cy, R52WriteLM8Cy, 628 R52WriteLM9Cy, R52WriteLM10Cy, 629 R52WriteLM11Cy, R52WriteLM12Cy, 630 R52ReserveLd12Cy]>, 631 // unknown number of reg. 632 SchedVar<NoSchedPred, [R52WriteLM5Cy, R52WriteLM6Cy, 633 R52WriteLM7Cy, R52WriteLM8Cy, 634 R52WriteLM9Cy, R52WriteLM10Cy, 635 R52WriteLM11Cy, R52WriteLM12Cy, 636 R52ReserveLd5Cy]> 637]> { let Variadic=1;} 638 639// variable stores. Cannot dual-issue 640def R52WriteSTM5 : SchedWriteRes<[R52UnitLd]> { 641 let Latency = 5; 642 let NumMicroOps = 2; 643 let ResourceCycles = [1]; 644} 645def R52WriteSTM6 : SchedWriteRes<[R52UnitLd]> { 646 let Latency = 6; 647 let NumMicroOps = 4; 648 let ResourceCycles = [2]; 649} 650def R52WriteSTM7 : SchedWriteRes<[R52UnitLd]> { 651 let Latency = 7; 652 let NumMicroOps = 6; 653 let ResourceCycles = [3]; 654} 655def R52WriteSTM8 : SchedWriteRes<[R52UnitLd]> { 656 let Latency = 8; 657 let NumMicroOps = 8; 658 let ResourceCycles = [4]; 659} 660def R52WriteSTM9 : SchedWriteRes<[R52UnitLd]> { 661 let Latency = 9; 662 let NumMicroOps = 10; 663 let ResourceCycles = [5]; 664} 665def R52WriteSTM10 : SchedWriteRes<[R52UnitLd]> { 666 let Latency = 10; 667 let NumMicroOps = 12; 668 let ResourceCycles = [6]; 669} 670def R52WriteSTM11 : SchedWriteRes<[R52UnitLd]> { 671 let Latency = 11; 672 let NumMicroOps = 14; 673 let ResourceCycles = [7]; 674} 675def R52WriteSTM12 : SchedWriteRes<[R52UnitLd]> { 676 let Latency = 12; 677 let NumMicroOps = 16; 678 let ResourceCycles = [8]; 679} 680def R52WriteSTM13 : SchedWriteRes<[R52UnitLd]> { 681 let Latency = 13; 682 let NumMicroOps = 18; 683 let ResourceCycles = [9]; 684} 685def R52WriteSTM14 : SchedWriteRes<[R52UnitLd]> { 686 let Latency = 14; 687 let NumMicroOps = 20; 688 let ResourceCycles = [10]; 689} 690def R52WriteSTM15 : SchedWriteRes<[R52UnitLd]> { 691 let Latency = 15; 692 let NumMicroOps = 22; 693 let ResourceCycles = [11]; 694} 695 696def R52WriteSTM : SchedWriteVariant<[ 697 SchedVar<R52LMAddrPred1, [R52WriteSTM5]>, 698 SchedVar<R52LMAddrPred2, [R52WriteSTM5]>, 699 SchedVar<R52LMAddrPred3, [R52WriteSTM6]>, 700 SchedVar<R52LMAddrPred4, [R52WriteSTM6]>, 701 SchedVar<R52LMAddrPred5, [R52WriteSTM7]>, 702 SchedVar<R52LMAddrPred6, [R52WriteSTM7]>, 703 SchedVar<R52LMAddrPred7, [R52WriteSTM8]>, 704 SchedVar<R52LMAddrPred8, [R52WriteSTM8]>, 705 SchedVar<R52LMAddrPred9, [R52WriteSTM9]>, 706 SchedVar<R52LMAddrPred10, [R52WriteSTM9]>, 707 SchedVar<R52LMAddrPred11, [R52WriteSTM10]>, 708 SchedVar<R52LMAddrPred12, [R52WriteSTM10]>, 709 SchedVar<R52LMAddrPred13, [R52WriteSTM11]>, 710 SchedVar<R52LMAddrPred14, [R52WriteSTM11]>, 711 SchedVar<R52LMAddrPred15, [R52WriteSTM12]>, 712 SchedVar<R52LMAddrPred16, [R52WriteSTM12]>, 713 // unknown number of registers, just use resources for two 714 SchedVar<NoSchedPred, [R52WriteSTM6]> 715]>; 716 717// Vector Load/Stores. Can issue only in slot-0. Can dual-issue with 718// another instruction in slot-1, but only in the last issue. 719def : WriteRes<WriteVLD1, [R52UnitLd]> { let Latency = 5;} 720def : WriteRes<WriteVLD2, [R52UnitLd]> { 721 let Latency = 6; 722 let NumMicroOps = 3; 723 let ResourceCycles = [2]; 724 let SingleIssue = 1; 725} 726def : WriteRes<WriteVLD3, [R52UnitLd]> { 727 let Latency = 7; 728 let NumMicroOps = 5; 729 let ResourceCycles = [3]; 730 let SingleIssue = 1; 731} 732def : WriteRes<WriteVLD4, [R52UnitLd]> { 733 let Latency = 8; 734 let NumMicroOps = 7; 735 let ResourceCycles = [4]; 736 let SingleIssue = 1; 737} 738def R52WriteVST1Mem : SchedWriteRes<[R52UnitLd]> { 739 let Latency = 5; 740 let NumMicroOps = 1; 741 let ResourceCycles = [1]; 742} 743def R52WriteVST2Mem : SchedWriteRes<[R52UnitLd]> { 744 let Latency = 6; 745 let NumMicroOps = 3; 746 let ResourceCycles = [2]; 747} 748def R52WriteVST3Mem : SchedWriteRes<[R52UnitLd]> { 749 let Latency = 7; 750 let NumMicroOps = 5; 751 let ResourceCycles = [3]; 752} 753def R52WriteVST4Mem : SchedWriteRes<[R52UnitLd]> { 754 let Latency = 8; 755 let NumMicroOps = 7; 756 let ResourceCycles = [4]; 757} 758def R52WriteVST5Mem : SchedWriteRes<[R52UnitLd]> { 759 let Latency = 9; 760 let NumMicroOps = 9; 761 let ResourceCycles = [5]; 762} 763 764 765def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABA(u|s)(v8i8|v4i16|v2i32)")>; 766def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABA(u|s)(v16i8|v8i16|v4i32)")>; 767def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABAL(u|s)(v8i16|v4i32|v2i64)")>; 768 769def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABD(u|s)(v8i8|v4i16|v2i32)")>; 770def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABD(u|s)(v16i8|v8i16|v4i32)")>; 771def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABDL(u|s)(v16i8|v8i16|v4i32)")>; 772 773def : InstRW<[R52Write2FPALU_F4, R52Read_F1], (instregex "VABS(v16i8|v8i16|v4i32)")>; 774 775def : InstRW<[R52WriteFPALU_F4, R52Read_F2, R52Read_F2], 776 (instregex "(VADD|VSUB)(v8i8|v4i16|v2i32|v1i64)")>; 777def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2], 778 (instregex "(VADD|VSUB)(v16i8|v8i16|v4i32|v2i64)")>; 779def : InstRW<[R52Write2FPALU_F5, R52Read_F2, R52Read_F2], 780 (instregex "(VADDHN|VRADDHN|VSUBHN|VRSUBHN)(v8i8|v4i16|v2i32)")>; 781 782def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], 783 (instregex "VADDL", "VADDW", "VSUBL", "VSUBW")>; 784 785def : InstRW<[R52WriteFPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC|VEOR)d")>; 786def : InstRW<[R52Write2FPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC|VEOR)q")>; 787 788def : InstRW<[R52WriteFPALU_F3, R52Read_F2], (instregex "VBICi(v4i16|v2i32)")>; 789def : InstRW<[R52Write2FPALU_F3, R52Read_F2], (instregex "VBICi(v8i16|v4i32)")>; 790 791def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)d")>; 792def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)q")>; 793 794def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], 795 (instregex "(VCEQ|VCGE|VCGT|VCLE|VCLT|VCLZ|VCMP|VCMPE|VCNT)")>; 796def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], 797 (instregex "VCVT", "VSITO", "VUITO", "VTO")>; 798 799def : InstRW<[R52WriteFPALU_F3, R52Read_ISS], (instregex "VDUP(8|16|32)d")>; 800def : InstRW<[R52Write2FPALU_F3, R52Read_ISS], (instregex "VDUP(8|16|32)q")>; 801def : InstRW<[R52WriteFPALU_F3, R52Read_F1], (instregex "VDUPLN(8|16|32)d")>; 802def : InstRW<[R52Write2FPALU_F3, R52Read_F1], (instregex "VDUPLN(8|16|32)q")>; 803 804def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VEXTd(8|16|32)", "VSEL")>; 805def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "VEXTq(8|16|32|64)")>; 806 807def : InstRW<[R52WriteFPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "(VFMA|VFMS)(f|h)d")>; 808def : InstRW<[R52Write2FPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "(VFMA|VFMS)(f|h)q")>; 809 810def : InstRW<[R52WriteFPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHSUB)(u|s)(v8i8|v4i16|v2i32)")>; 811def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHSUB)(u|s)(v16i8|v8i16|v4i32)")>; 812 813def : InstRW<[R52WriteVLDM], (instregex "VLDM[SD](IA|DB)$")>; 814def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VMAX", "VMIN", "VPMAX", "VPMIN")>; 815def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VORR", "VORN", "VREV")>; 816def : InstRW<[R52WriteNoRSRC_WRI], (instregex "VMRS")>; 817def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VNEG")>; 818def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADDi")>; 819def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADAL", "VPADDL")>; 820def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VQABS(v8i8|v4i16|v2i32|v1i64)")>; 821def : InstRW<[R52Write2FPALU_F5, R52Read_F1], (instregex "VQABS(v16i8|v8i16|v4i32|v2i64)")>; 822def : InstRW<[R52WriteFPALU_F5, R52Read_F2, R52Read_F2], 823 (instregex "(VQADD|VQSUB)(u|s)(v8i8|v4i16|v2i32|v1i64)")>; 824def : InstRW<[R52Write2FPALU_F5, R52Read_F2, R52Read_F2], 825 (instregex "(VQADD|VQSUB)(u|s)(v16i8|v8i16|v4i32|v2i64)")>; 826def : InstRW<[R52Write2FPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VQDMLAL", "VQDMLSL")>; 827def : InstRW<[R52WriteFPMUL_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VQDMUL","VQRDMUL")>; 828def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], 829 (instregex "VQMOVN", "VQNEG", "VQSHL", "VQSHRN")>; 830def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VRSHL", "VRSHR", "VRSHRN", "VTB")>; 831def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VSWP", "VTRN", "VUZP", "VZIP")>; 832 833//--- 834// VSTx. Vector Stores 835//--- 836// 1-element structure store 837def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)$")>; 838def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST1q(8|16|32|64)$")>; 839def : InstRW<[R52WriteVST3Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)T$")>; 840def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Q$")>; 841def : InstRW<[R52WriteVST3Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d64TPseudo$")>; 842def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d64QPseudo$")>; 843 844def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNd(8|16|32)$")>; 845def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNdAsm_(8|16|32)$")>; 846def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNq(8|16|32)Pseudo$")>; 847 848def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)wb")>; 849def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1q(8|16|32|64)wb")>; 850def : InstRW<[R52WriteVST3Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Twb")>; 851def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Qwb")>; 852def : InstRW<[R52WriteVST3Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d64TPseudoWB")>; 853def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d64QPseudoWB")>; 854 855def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNd(8|16|32)_UPD")>; 856def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNdWB_(fixed|register)_Asm_(8|16|32)")>; 857def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNq(8|16|32)Pseudo_UPD")>; 858 859// 2-element structure store 860def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST2(d|b)(8|16|32)$")>; 861def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)$")>; 862def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)Pseudo$")>; 863 864def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)$")>; 865def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNdAsm_(8|16|32)$")>; 866def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)Pseudo$")>; 867def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)$")>; 868def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNqAsm_(16|32)$")>; 869def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)Pseudo$")>; 870 871def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2(d|b)(8|16|32)wb")>; 872def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)wb")>; 873def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)PseudoWB")>; 874 875def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)_UPD")>; 876def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNdWB_(fixed|register)_Asm_(8|16|32)")>; 877def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)Pseudo_UPD")>; 878def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)_UPD")>; 879def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNqWB_(fixed|register)_Asm_(16|32)")>; 880def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)Pseudo_UPD")>; 881 882// 3-element structure store 883def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)$")>; 884def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)Asm_(8|16|32)$")>; 885def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3d(8|16|32)(oddP|P)seudo$")>; 886 887def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)$")>; 888def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNdAsm_(8|16|32)$")>; 889def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)Pseudo$")>; 890def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)$")>; 891def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNqAsm_(16|32)$")>; 892def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)Pseudo$")>; 893 894def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)_UPD$")>; 895def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)WB_(fixed|register)_Asm_(8|16|32)$")>; 896def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)(oddP|P)seudo_UPD$")>; 897 898def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)_UPD$")>; 899def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNdWB_(fixed|register)_Asm_(8|16|32)")>; 900def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)Pseudo_UPD$")>; 901def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)_UPD$")>; 902def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNqWB_(fixed|register)_Asm_(16|32)$")>; 903def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)Pseudo_UPD$")>; 904 905// 4-element structure store 906def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)$")>; 907def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)Asm_(8|16|32)$")>; 908def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4d(8|16|32)Pseudo$")>; 909 910def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)$")>; 911def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNdAsm_(8|16|32)$")>; 912def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)Pseudo$")>; 913def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)$")>; 914def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNqAsm_(16|32)$")>; 915def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)Pseudo$")>; 916 917def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)_UPD")>; 918def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)WB_(fixed|register)_Asm_(8|16|32)")>; 919def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)(oddP|P)seudo_UPD")>; 920 921def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)_UPD")>; 922def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNdWB_(fixed|register)_Asm_(8|16|32)")>; 923def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)Pseudo_UPD")>; 924def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)_UPD")>; 925def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNqWB_(fixed|register)_Asm_(16|32)")>; 926def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)Pseudo_UPD")>; 927 928} // R52 SchedModel 929