//==- ARMScheduleM4.td - Cortex-M4 Scheduling Definitions -*- tablegen -*-====// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file defines the SchedRead/Write data for the ARM Cortex-M4 processor. // //===----------------------------------------------------------------------===// def CortexM4Model : SchedMachineModel { let IssueWidth = 1; // Only IT can be dual-issued, so assume single-issue let MicroOpBufferSize = 0; // In-order let LoadLatency = 2; // Latency when not pipelined, not pc-relative let MispredictPenalty = 2; // Best case branch taken cost let PostRAScheduler = 1; let CompleteModel = 0; let UnsupportedFeatures = [IsARM, HasNEON, HasDotProd, HasZCZ, HasMVEInt, IsNotMClass, HasDPVFP, HasFPARMv8, HasFullFP16, Has8MSecExt, HasV8, HasV8_3a, HasTrustZone, HasDFB, IsWindows]; } // We model the entire cpu as a single pipeline with a BufferSize = 0 since // Cortex-M4 is in-order. def M4Unit : ProcResource<1> { let BufferSize = 0; } let SchedModel = CortexM4Model in { // Some definitions of latencies we apply to different instructions class M4UnitL1 : WriteRes { let Latency = 1; } class M4UnitL2 : WriteRes { let Latency = 2; } class M4UnitL3 : WriteRes { let Latency = 3; } class M4UnitL14 : WriteRes { let Latency = 14; } def M4UnitL1_wr : SchedWriteRes<[M4Unit]> { let Latency = 1; } def M4UnitL2_wr : SchedWriteRes<[M4Unit]> { let Latency = 2; } class M4UnitL1I : InstRW<[M4UnitL1_wr], instr>; class M4UnitL2I : InstRW<[M4UnitL2_wr], instr>; // Loads, MAC's and DIV all get a higher latency of 2 def : M4UnitL2; def : M4UnitL2; def : M4UnitL2; def : M4UnitL2; def : M4UnitL2; def : M4UnitL2; def : M4UnitL2I<(instregex "(t|t2)LDM")>; def : M4UnitL2I<(instregex "(t|t2)LDR")>; // Stores we use a latency of 1 as they have no outputs def : M4UnitL1; def : M4UnitL1I<(instregex "(t|t2)STM")>; // Everything else has a Latency of 1 def : M4UnitL1; def : M4UnitL1; def : M4UnitL1; def : M4UnitL1; def : M4UnitL1; def : M4UnitL1; def : M4UnitL1; def : M4UnitL1; def : M4UnitL1; def : M4UnitL1; def : M4UnitL1; def : M4UnitL1; def : M4UnitL1; def : M4UnitL1; def : M4UnitL1; def : M4UnitL1; def : M4UnitL1I<(instregex "(t|t2)MOV")>; def : M4UnitL1I<(instrs COPY)>; def : M4UnitL1I<(instregex "t2IT", "t2MSR", "t2MRS")>; def : M4UnitL1I<(instregex "t2CLREX")>; def : M4UnitL1I<(instregex "t2SEL", "t2USAD8", "t2SML[AS]", "t2(S|Q|SH|U|UQ|UH|QD)(ADD|ASX|SAX|SUB)", "t2USADA8", "(t|t2)REV")>; // These instructions are not of much interest to scheduling as they will not // be generated or it is not very useful to schedule them. They are here to make // the model more complete. def : M4UnitL1I<(instregex "t2CDP", "t2LDC", "t2MCR", "t2MRC", "t2MRRC", "t2STC")>; def : M4UnitL1I<(instregex "tCPS", "t2ISB", "t2DSB", "t2DMB", "t2?HINT$")>; def : M4UnitL1I<(instregex "t2?UDF$", "tBKPT", "t2DBG")>; def : M4UnitL1I<(instregex "t?2?Int_eh_sjlj_", "tADDframe", "t?ADJCALL")>; def : M4UnitL1I<(instregex "CMP_SWAP", "JUMPTABLE", "MEMCPY")>; def : M4UnitL1I<(instregex "VSETLNi32", "VGETLNi32")>; def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; // Most FP instructions are single-cycle latency, except MAC's, Div's and Sqrt's. // Loads still take 2 cycles. def : M4UnitL1; def : M4UnitL1; def : M4UnitL1; def : M4UnitL1; def : M4UnitL1; def : M4UnitL1; def : M4UnitL2I<(instregex "VLD")>; def : M4UnitL1I<(instregex "VST")>; def : M4UnitL3; def : M4UnitL3; def : M4UnitL14; def : M4UnitL14; def : M4UnitL14; def : M4UnitL14; def : M4UnitL1; def : M4UnitL1; def : M4UnitL1; def : M4UnitL1; def : M4UnitL1; def : M4UnitL1; def : M4UnitL1; def : M4UnitL1; def : M4UnitL1I<(instregex "VMOVS", "FCONSTS", "VCMP", "VNEG", "VABS")>; def : M4UnitL2I<(instregex "VMOVD")>; def : M4UnitL1I<(instregex "VMRS", "VMSR", "FMSTAT")>; def : ReadAdvance; def : ReadAdvance; }