1;; Scheduling description for Niagara-7 2;; Copyright (C) 2016-2020 Free Software Foundation, Inc. 3;; 4;; This file is part of GCC. 5;; 6;; GCC is free software; you can redistribute it and/or modify 7;; it under the terms of the GNU General Public License as published by 8;; the Free Software Foundation; either version 3, or (at your option) 9;; any later version. 10;; 11;; GCC is distributed in the hope that it will be useful, 12;; but WITHOUT ANY WARRANTY; without even the implied warranty of 13;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14;; GNU General Public License for more details. 15;; 16;; You should have received a copy of the GNU General Public License 17;; along with GCC; see the file COPYING3. If not see 18;; <http://www.gnu.org/licenses/>. 19 20(define_automaton "niagara7_0") 21 22;; The S4 core has a dual-issue queue. This queue is divided into two 23;; slots. One instruction can be issued each cycle to each slot, and 24;; up to 2 instructions are committed each cycle. Each slot serves 25;; several execution units, as depicted below: 26;; 27;; 28;; m7_slot0 - Integer unit. 29;; - Load/Store unit. 30;; === QUEUE ==> 31;; 32;; m7_slot1 - Integer unit. 33;; - Branch unit. 34;; - Floating-point and graphics unit. 35;; - 3-cycles crypto unit. 36 37(define_cpu_unit "n7_slot0,n7_slot1" "niagara7_0") 38 39;; Some instructions stall the pipeline and avoid any other 40;; instruction to be issued in the same cycle. We assume the same for 41;; multi-instruction insns. 42 43(define_reservation "n7_single_issue" "n7_slot0 + n7_slot1") 44 45(define_insn_reservation "n7_single" 1 46 (and (eq_attr "cpu" "niagara7") 47 (eq_attr "type" "multi,savew,flushw,trap")) 48 "n7_single_issue") 49 50;; Most of the instructions executing in the integer unit have a 51;; latency of 1. 52 53(define_insn_reservation "n7_integer" 1 54 (and (eq_attr "cpu" "niagara7") 55 (eq_attr "type" "ialu,ialuX,shift,cmove,compare")) 56 "(n7_slot0 | n7_slot1)") 57 58;; Flushing the instruction memory takes 27 cycles. 59 60(define_insn_reservation "n7_iflush" 27 61 (and (eq_attr "cpu" "niagara7") 62 (eq_attr "type" "iflush")) 63 "(n7_slot0 | n7_slot1), nothing*26") 64 65;; The integer multiplication instructions have a latency of 12 cycles 66;; and execute in the integer unit. 67;; 68;; Likewise for array*, edge* and pdistn instructions. 69 70(define_insn_reservation "n7_imul" 12 71 (and (eq_attr "cpu" "niagara7") 72 (eq_attr "type" "imul,array,edge,edgen,pdistn")) 73 "(n7_slot0 | n7_slot1), nothing*11") 74 75;; The integer division instructions have a latency of 35 cycles and 76;; execute in the integer unit. 77 78(define_insn_reservation "n7_idiv" 35 79 (and (eq_attr "cpu" "niagara7") 80 (eq_attr "type" "idiv")) 81 "(n7_slot0 | n7_slot1), nothing*34") 82 83;; Both integer and floating-point load instructions have a latency of 84;; 5 cycles, and execute in the slot0. 85;; 86;; The prefetch instruction also executes in the load/store unit, but 87;; its latency is only 1 cycle. 88 89(define_insn_reservation "n7_load" 5 90 (and (eq_attr "cpu" "niagara7") 91 (ior (eq_attr "type" "fpload,sload") 92 (and (eq_attr "type" "load") 93 (eq_attr "subtype" "regular")))) 94 "n7_slot0, nothing*4") 95 96(define_insn_reservation "n7_prefetch" 1 97 (and (eq_attr "cpu" "niagara7") 98 (eq_attr "type" "load") 99 (eq_attr "subtype" "prefetch")) 100 "n7_slot0") 101 102;; Both integer and floating-point store instructions have a latency 103;; of 1 cycle, and execute in the load/store unit in slot0. 104 105(define_insn_reservation "n7_store" 1 106 (and (eq_attr "cpu" "niagara7") 107 (eq_attr "type" "store,fpstore")) 108 "n7_slot0") 109 110;; Control-transfer instructions execute in the Branch Unit in the 111;; slot1. 112 113(define_insn_reservation "n7_cti" 1 114 (and (eq_attr "cpu" "niagara7") 115 (eq_attr "type" "cbcond,uncond_cbcond,branch,call,sibcall,call_no_delay_slot,uncond_branch,return")) 116 "n7_slot1") 117 118;; Many instructions executing in the Floating-point and Graphics unit 119;; in the slot1 feature a latency of 11 cycles. 120 121(define_insn_reservation "n7_fp" 11 122 (and (eq_attr "cpu" "niagara7") 123 (ior (eq_attr "type" "fpmove,fpcmove,fpcrmove,fp,fpcmp,fpmul,fgm_pack,fgm_mul,pdist") 124 (and (eq_attr "type" "fga") 125 (eq_attr "subtype" "fpu,maxmin")))) 126 "n7_slot1, nothing*10") 127 128;; Floating-point division and floating-point square-root instructions 129;; have high latencies. They execute in the floating-point and 130;; graphics unit in the slot1. 131 132 133(define_insn_reservation "n7_fpdivs" 24 134 (and (eq_attr "cpu" "niagara7") 135 (eq_attr "type" "fpdivs,fpsqrts")) 136 "n7_slot1, nothing*23") 137 138(define_insn_reservation "n7_fpdivd" 37 139 (and (eq_attr "cpu" "niagara7") 140 (eq_attr "type" "fpdivd,fpsqrtd")) 141 "n7_slot1, nothing*36") 142 143;; SIMD VIS instructions executing in the Floating-point and graphics 144;; unit (FPG) in slot1 usually have a latency of either 11 or 12 145;; cycles. 146;; 147;; However, the latency for many instructions is only 3 cycles if the 148;; consumer can also be executed in 3 cycles. We model this with a 149;; bypass. In these cases the instructions are executed in the 150;; 3-cycle crypto unit which also serves slot1. 151 152(define_insn_reservation "n7_vis_11cycles" 11 153 (and (eq_attr "cpu" "niagara7") 154 (ior (and (eq_attr "type" "fga") 155 (eq_attr "subtype" "addsub64,other")) 156 (and (eq_attr "type" "vismv") 157 (eq_attr "subtype" "double,single")) 158 (and (eq_attr "type" "visl") 159 (eq_attr "subtype" "double,single")))) 160 "n7_slot1, nothing*10") 161 162(define_insn_reservation "n7_vis_12cycles" 12 163 (and (eq_attr "cpu" "niagara7") 164 (ior (eq_attr "type" "bmask,viscmp") 165 (and (eq_attr "type" "fga") 166 (eq_attr "subtype" "cmask")) 167 (and (eq_attr "type" "vismv") 168 (eq_attr "subtype" "movstouw")))) 169 "n7_slot1, nothing*11") 170 171(define_bypass 3 "n7_vis_*" "n7_vis_*") 172 173;; Some other VIS instructions have a latency of 12 cycles, and won't 174;; be executed in the 3-cycle crypto pipe. 175 176(define_insn_reservation "n7_lzd" 12 177 (and (eq_attr "cpu" "niagara7") 178 (ior (eq_attr "type" "lzd,") 179 (and (eq_attr "type" "gsr") 180 (eq_attr "subtype" "alignaddr")))) 181 "n7_slot1, nothing*11") 182 183;; A couple of VIS instructions feature very low latencies in the M7. 184 185(define_insn_reservation "n7_single_vis" 1 186 (and (eq_attr "cpu" "niagara7") 187 (eq_attr "type" "vismv") 188 (eq_attr "subtype" "movxtod")) 189 "n7_slot1") 190 191(define_insn_reservation "n7_double_vis" 2 192 (and (eq_attr "cpu" "niagara7") 193 (eq_attr "type" "vismv") 194 (eq_attr "subtype" "movdtox")) 195 "n7_slot1, nothing") 196 197;; Reading and writing to the gsr register takes a high number of 198;; cycles that is not documented in the PRM. Let's use the same value 199;; than the M8. 200 201(define_insn_reservation "n7_gsr_reg" 70 202 (and (eq_attr "cpu" "niagara7") 203 (eq_attr "type" "gsr") 204 (eq_attr "subtype" "reg")) 205 "n7_slot1, nothing*70") 206