1;; Cavium ThunderX pipeline description 2;; Copyright (C) 2014-2022 Free Software Foundation, Inc. 3;; 4;; Written by Andrew Pinski <apinski@cavium.com> 5 6;; This file is part of GCC. 7 8;; GCC is free software; you can redistribute it and/or modify 9;; it under the terms of the GNU General Public License as published by 10;; the Free Software Foundation; either version 3, or (at your option) 11;; any later version. 12 13;; GCC is distributed in the hope that it will be useful, 14;; but WITHOUT ANY WARRANTY; without even the implied warranty of 15;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16;; GNU General Public License for more details. 17 18;; You should have received a copy of the GNU General Public License 19;; along with GCC; see the file COPYING3. If not see 20;; <http://www.gnu.org/licenses/>. 21 22 23;; Thunder is a dual-issue processor that can issue all instructions on 24;; pipe0 and a subset on pipe1. 25 26 27(define_automaton "thunderx_main, thunderx_mult, thunderx_divide, thunderx_simd") 28 29(define_cpu_unit "thunderx_pipe0" "thunderx_main") 30(define_cpu_unit "thunderx_pipe1" "thunderx_main") 31(define_cpu_unit "thunderx_mult" "thunderx_mult") 32(define_cpu_unit "thunderx_divide" "thunderx_divide") 33(define_cpu_unit "thunderx_simd" "thunderx_simd") 34 35(define_insn_reservation "thunderx_add" 1 36 (and (eq_attr "tune" "thunderx") 37 (eq_attr "type" "adc_imm,adc_reg,adr,alu_imm,alu_sreg,alus_imm,alus_sreg,extend,logic_imm,logic_reg,logics_imm,logics_reg,mov_imm,mov_reg")) 38 "thunderx_pipe0 | thunderx_pipe1") 39 40(define_insn_reservation "thunderx_shift" 1 41 (and (eq_attr "tune" "thunderx") 42 (eq_attr "type" "bfm,bfx,extend,rotate_imm,shift_imm,shift_reg,rbit,rev")) 43 "thunderx_pipe0 | thunderx_pipe1") 44 45 46;; Arthimentic instructions with an extra shift or extend is two cycles. 47;; FIXME: This needs more attributes on aarch64 than what is currently there; 48;; this is conserative for now. 49;; Except this is not correct as this is only for !(LSL && shift by 0/1/2/3) 50;; Except this is not correct as this is only for !(zero extend) 51 52(define_insn_reservation "thunderx_arith_shift" 2 53 (and (eq_attr "tune" "thunderx") 54 (eq_attr "type" "alu_ext,alu_shift_imm_lsl_1to4,alu_shift_imm_other,alu_shift_reg,alus_ext,logic_shift_imm,logic_shift_reg,logics_shift_imm,logics_shift_reg,alus_shift_imm")) 55 "thunderx_pipe0 | thunderx_pipe1") 56 57(define_insn_reservation "thunderx_csel" 2 58 (and (eq_attr "tune" "thunderx") 59 (eq_attr "type" "csel")) 60 "thunderx_pipe0 | thunderx_pipe1") 61 62;; Multiply and mulitply accumulate and count leading zeros can only happen on pipe 1 63 64(define_insn_reservation "thunderx_mul" 4 65 (and (eq_attr "tune" "thunderx") 66 (eq_attr "type" "mul,muls,mla,mlas,clz,smull,umull,smlal,umlal")) 67 "thunderx_pipe1 + thunderx_mult") 68 69;; crcb,crch,crcw is 4 cycles and can only happen on pipe 1 70 71(define_insn_reservation "thunderx_crc32" 4 72 (and (eq_attr "tune" "thunderx") 73 (eq_attr "type" "crc")) 74 "thunderx_pipe1 + thunderx_mult") 75 76;; crcx is 5 cycles and only happen on pipe 1 77;(define_insn_reservation "thunderx_crc64" 5 78; (and (eq_attr "tune" "thunderx") 79; (eq_attr "type" "crc") 80; (eq_attr "mode" "DI")) 81; "thunderx_pipe1 + thunderx_mult") 82 83(define_insn_reservation "thunderx_div32" 22 84 (and (eq_attr "tune" "thunderx") 85 (eq_attr "type" "udiv,sdiv")) 86 "thunderx_pipe1 + thunderx_divide, thunderx_divide * 21") 87 88;(define_insn_reservation "thunderx_div64" 38 89; (and (eq_attr "tune" "thunderx") 90; (eq_attr "type" "udiv,sdiv") 91; (eq_attr "mode" "DI")) 92; "thunderx_pipe1 + thunderx_divide, thunderx_divide * 34") 93 94;; Stores take one cycle in pipe 0 95(define_insn_reservation "thunderx_store" 1 96 (and (eq_attr "tune" "thunderx") 97 (eq_attr "type" "store_4")) 98 "thunderx_pipe0") 99 100;; Store pair are single issued 101(define_insn_reservation "thunderx_storepair" 1 102 (and (eq_attr "tune" "thunderx") 103 (eq_attr "type" "store_8,store_16")) 104 "thunderx_pipe0 + thunderx_pipe1") 105 106;; Prefetch are single issued 107;(define_insn_reservation "thunderx_prefetch" 1 108; (and (eq_attr "tune" "thunderx") 109; (eq_attr "type" "prefetch")) 110; "thunderx_pipe0 + thunderx_pipe1") 111 112;; loads (and load pairs) from L1 take 3 cycles in pipe 0 113(define_insn_reservation "thunderx_load" 3 114 (and (eq_attr "tune" "thunderx") 115 (eq_attr "type" "load_4, load_8, load_16")) 116 "thunderx_pipe0") 117 118(define_insn_reservation "thunderx_brj" 1 119 (and (eq_attr "tune" "thunderx") 120 (eq_attr "type" "branch,trap,call")) 121 "thunderx_pipe1") 122 123;; FPU 124 125(define_insn_reservation "thunderx_fadd" 4 126 (and (eq_attr "tune" "thunderx") 127 (eq_attr "type" "faddd,fadds")) 128 "thunderx_pipe1") 129 130(define_insn_reservation "thunderx_fconst" 1 131 (and (eq_attr "tune" "thunderx") 132 (eq_attr "type" "fconsts,fconstd")) 133 "thunderx_pipe1") 134 135;; Moves between fp are 2 cycles including min/max 136(define_insn_reservation "thunderx_fmov" 2 137 (and (eq_attr "tune" "thunderx") 138 (eq_attr "type" "fmov,f_minmaxs,f_minmaxd")) 139 "thunderx_pipe1") 140 141;; ABS, and NEG are 1 cycle 142(define_insn_reservation "thunderx_fabs" 1 143 (and (eq_attr "tune" "thunderx") 144 (eq_attr "type" "ffariths,ffarithd")) 145 "thunderx_pipe1") 146 147(define_insn_reservation "thunderx_fcsel" 3 148 (and (eq_attr "tune" "thunderx") 149 (eq_attr "type" "fcsel")) 150 "thunderx_pipe1") 151 152(define_insn_reservation "thunderx_fmovgpr" 2 153 (and (eq_attr "tune" "thunderx") 154 (eq_attr "type" "f_mrc, f_mcr")) 155 "thunderx_pipe1") 156 157(define_insn_reservation "thunderx_fcmp" 3 158 (and (eq_attr "tune" "thunderx") 159 (eq_attr "type" "fcmps,fcmpd,fccmps,fccmpd")) 160 "thunderx_pipe1") 161 162(define_insn_reservation "thunderx_fmul" 6 163 (and (eq_attr "tune" "thunderx") 164 (eq_attr "type" "fmacs,fmacd,fmuls,fmuld")) 165 "thunderx_pipe1") 166 167(define_insn_reservation "thunderx_fdivs" 12 168 (and (eq_attr "tune" "thunderx") 169 (eq_attr "type" "fdivs")) 170 "thunderx_pipe1 + thunderx_divide, thunderx_divide*8") 171 172(define_insn_reservation "thunderx_fdivd" 22 173 (and (eq_attr "tune" "thunderx") 174 (eq_attr "type" "fdivd")) 175 "thunderx_pipe1 + thunderx_divide, thunderx_divide*18") 176 177(define_insn_reservation "thunderx_fsqrts" 17 178 (and (eq_attr "tune" "thunderx") 179 (eq_attr "type" "fsqrts")) 180 "thunderx_pipe1 + thunderx_divide, thunderx_divide*13") 181 182(define_insn_reservation "thunderx_fsqrtd" 31 183 (and (eq_attr "tune" "thunderx") 184 (eq_attr "type" "fsqrtd")) 185 "thunderx_pipe1 + thunderx_divide, thunderx_divide*27") 186 187;; The rounding conversion inside fp is 4 cycles 188(define_insn_reservation "thunderx_frint" 4 189 (and (eq_attr "tune" "thunderx") 190 (eq_attr "type" "f_cvt,f_rints,f_rintd")) 191 "thunderx_pipe1") 192 193;; Float to integer with a move from int to/from float is 6 cycles 194(define_insn_reservation "thunderx_f_cvt" 6 195 (and (eq_attr "tune" "thunderx") 196 (eq_attr "type" "f_cvtf2i,f_cvti2f")) 197 "thunderx_pipe1") 198 199;; FP/SIMD load/stores happen in pipe 0 200;; 64bit Loads register/pairs are 4 cycles from L1 201(define_insn_reservation "thunderx_64simd_fp_load" 4 202 (and (eq_attr "tune" "thunderx") 203 (eq_attr "type" "f_loadd,f_loads,neon_load1_1reg,\ 204 neon_load1_1reg_q,neon_load1_2reg")) 205 "thunderx_pipe0") 206 207;; 128bit load pair is singled issue and 4 cycles from L1 208(define_insn_reservation "thunderx_128simd_pair_load" 4 209 (and (eq_attr "tune" "thunderx") 210 (eq_attr "type" "neon_load1_2reg_q")) 211 "thunderx_pipe0+thunderx_pipe1") 212 213;; FP/SIMD Stores takes one cycle in pipe 0 214;; ST1 with one registers either multiple structures or single structure is 215;; also one cycle. 216(define_insn_reservation "thunderx_simd_fp_store" 1 217 (and (eq_attr "tune" "thunderx") 218 (eq_attr "type" "f_stored,f_stores,neon_store1_1reg,neon_store1_1reg_q, \ 219 neon_store1_one_lane, neon_store1_one_lane_q")) 220 "thunderx_pipe0") 221 222;; 64bit neon store pairs are single issue for one cycle 223(define_insn_reservation "thunderx_64neon_storepair" 1 224 (and (eq_attr "tune" "thunderx") 225 (eq_attr "type" "neon_store1_2reg")) 226 "thunderx_pipe0 + thunderx_pipe1") 227 228;; 128bit neon store pair are single issued for two cycles 229(define_insn_reservation "thunderx_128neon_storepair" 2 230 (and (eq_attr "tune" "thunderx") 231 (eq_attr "type" "neon_store1_2reg_q")) 232 "(thunderx_pipe0 + thunderx_pipe1)*2") 233 234;; LD1R/LD1 (with a single struct) takes 6 cycles and issued in pipe0 235(define_insn_reservation "thunderx_neon_ld1" 6 236 (and (eq_attr "tune" "thunderx") 237 (eq_attr "type" "neon_load1_all_lanes")) 238 "thunderx_pipe0") 239 240;; SIMD/NEON (q forms take an extra cycle) 241;; SIMD For ThunderX is 64bit wide, 242 243;; ThunderX simd move instruction types - 2/3 cycles 244;; ThunderX dup, ins is the same 245;; ThunderX SIMD fabs/fneg instruction types 246(define_insn_reservation "thunderx_neon_move" 2 247 (and (eq_attr "tune" "thunderx") 248 (eq_attr "type" "neon_logic, neon_bsl, neon_fp_compare_s, \ 249 neon_fp_compare_d, neon_move, neon_dup, \ 250 neon_ins, neon_from_gp, neon_to_gp, \ 251 neon_abs, neon_neg, \ 252 neon_fp_neg_s, neon_fp_abs_s")) 253 "thunderx_pipe1 + thunderx_simd") 254 255(define_insn_reservation "thunderx_neon_move_q" 3 256 (and (eq_attr "tune" "thunderx") 257 (eq_attr "type" "neon_logic_q, neon_bsl_q, neon_fp_compare_s_q, \ 258 neon_fp_compare_d_q, neon_move_q, neon_dup_q, \ 259 neon_ins_q, neon_from_gp_q, neon_to_gp_q, \ 260 neon_abs_q, neon_neg_q, \ 261 neon_fp_neg_s_q, neon_fp_neg_d_q, \ 262 neon_fp_abs_s_q, neon_fp_abs_d_q")) 263 "thunderx_pipe1 + thunderx_simd, thunderx_simd") 264 265;; ThunderX simd simple/add instruction types - 4/5 cycles 266 267(define_insn_reservation "thunderx_neon_add" 4 268 (and (eq_attr "tune" "thunderx") 269 (eq_attr "type" "neon_reduc_add, neon_reduc_minmax, neon_fp_reduc_add_s, \ 270 neon_fp_reduc_add_d, neon_fp_to_int_s, neon_fp_to_int_d, \ 271 neon_add_halve, neon_sub_halve, neon_qadd, neon_compare, \ 272 neon_compare_zero, neon_minmax, neon_abd, neon_add, neon_sub, \ 273 neon_fp_minmax_s, neon_fp_minmax_d, neon_reduc_add, neon_cls, \ 274 neon_qabs, neon_qneg, neon_fp_addsub_s, neon_fp_addsub_d, \ 275 neon_arith_acc, neon_rev, neon_fp_abd_s, neon_fp_abd_d, \ 276 neon_fp_reduc_minmax_s")) 277 "thunderx_pipe1 + thunderx_simd") 278 279;; BIG NOTE: neon_add_long/neon_sub_long don't have a q form which is incorrect 280 281(define_insn_reservation "thunderx_neon_add_q" 5 282 (and (eq_attr "tune" "thunderx") 283 (eq_attr "type" "neon_reduc_add_q, neon_reduc_minmax_q, neon_fp_reduc_add_s_q, \ 284 neon_fp_reduc_add_d_q, neon_fp_to_int_s_q, neon_fp_to_int_d_q, \ 285 neon_add_halve_q, neon_sub_halve_q, neon_qadd_q, neon_compare_q, \ 286 neon_compare_zero_q, neon_minmax_q, neon_abd_q, neon_add_q, neon_sub_q, \ 287 neon_fp_minmax_s_q, neon_fp_minmax_d_q, neon_reduc_add_q, neon_cls_q, \ 288 neon_qabs_q, neon_qneg_q, neon_fp_addsub_s_q, neon_fp_addsub_d_q, \ 289 neon_add_long, neon_sub_long, neon_fp_abd_s_q, neon_fp_abd_d_q, \ 290 neon_arith_acc_q, neon_rev_q, \ 291 neon_fp_reduc_minmax_s_q, neon_fp_reduc_minmax_d_q")) 292 "thunderx_pipe1 + thunderx_simd, thunderx_simd") 293 294;; Multiplies (float and integer) and shifts and permutes (except for TBL) and float conversions 295;; are 6/7 cycles 296(define_insn_reservation "thunderx_neon_mult" 6 297 (and (eq_attr "tune" "thunderx") 298 (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_d, neon_fp_mla_s, neon_fp_mla_d, \ 299 neon_mla_b, neon_mla_h, neon_mla_s, \ 300 neon_mla_h_scalar, neon_mla_s_scalar, \ 301 neon_ext, neon_shift_imm, neon_permute, \ 302 neon_int_to_fp_s, neon_int_to_fp_d, neon_shift_reg, \ 303 neon_sat_shift_reg, neon_shift_acc, \ 304 neon_mul_b, neon_mul_h, neon_mul_s, \ 305 neon_mul_h_scalar, neon_mul_s_scalar, \ 306 neon_fp_mul_s_scalar, \ 307 neon_fp_mla_s_scalar")) 308 "thunderx_pipe1 + thunderx_simd") 309 310(define_insn_reservation "thunderx_neon_mult_q" 7 311 (and (eq_attr "tune" "thunderx") 312 (eq_attr "type" "neon_fp_mul_s_q, neon_fp_mul_d_q, neon_fp_mla_s_q, neon_fp_mla_d_q, \ 313 neon_mla_b_q, neon_mla_h_q, neon_mla_s_q, \ 314 neon_mla_h_scalar_q, neon_mla_s_scalar_q, \ 315 neon_ext_q, neon_shift_imm_q, neon_permute_q, \ 316 neon_int_to_fp_s_q, neon_int_to_fp_d_q, neon_shift_reg_q, \ 317 neon_sat_shift_reg_q, neon_shift_acc_q, \ 318 neon_shift_imm_long, \ 319 neon_mul_b_q, neon_mul_h_q, neon_mul_s_q, \ 320 neon_mul_h_scalar_q, neon_mul_s_scalar_q, \ 321 neon_fp_mul_s_scalar_q, neon_fp_mul_d_scalar_q, \ 322 neon_mul_b_long, neon_mul_h_long, neon_mul_s_long, \ 323 neon_shift_imm_narrow_q, neon_fp_cvt_widen_s, neon_fp_cvt_narrow_d_q, \ 324 neon_fp_mla_s_scalar_q, neon_fp_mla_d_scalar_q")) 325 "thunderx_pipe1 + thunderx_simd, thunderx_simd") 326 327 328;; AES[ED] is 5 cycles 329(define_insn_reservation "thunderx_crypto_aese" 5 330 (and (eq_attr "tune" "thunderx") 331 (eq_attr "type" "crypto_aese")) 332 "thunderx_pipe1 + thunderx_simd, thunderx_simd") 333 334;; AES{,I}MC is 3 cycles 335(define_insn_reservation "thunderx_crypto_aesmc" 3 336 (and (eq_attr "tune" "thunderx") 337 (eq_attr "type" "crypto_aesmc")) 338 "thunderx_pipe1 + thunderx_simd, thunderx_simd") 339 340 341;; Thunder 128bit SIMD reads the upper halve in cycle 2 and writes upper halve in the last cycle 342(define_bypass 2 "thunderx_neon_move_q" "thunderx_neon_move_q, thunderx_neon_add_q, thunderx_neon_mult_q") 343(define_bypass 4 "thunderx_neon_add_q" "thunderx_neon_move_q, thunderx_neon_add_q, thunderx_neon_mult_q") 344(define_bypass 6 "thunderx_neon_mult_q" "thunderx_neon_move_q, thunderx_neon_add_q, thunderx_neon_mult_q") 345 346;; 64bit TBL is emulated and takes 160 cycles 347(define_insn_reservation "thunderx_tbl" 160 348 (and (eq_attr "tune" "thunderx") 349 (eq_attr "type" "neon_tbl1")) 350 "(thunderx_pipe1+thunderx_pipe0)*160") 351 352;; 128bit TBL is emulated and takes 320 cycles 353(define_insn_reservation "thunderx_tblq" 320 354 (and (eq_attr "tune" "thunderx") 355 (eq_attr "type" "neon_tbl1_q")) 356 "(thunderx_pipe1+thunderx_pipe0)*320") 357 358;; Assume both pipes are needed for unknown and multiple-instruction 359;; patterns. 360 361(define_insn_reservation "thunderx_unknown" 1 362 (and (eq_attr "tune" "thunderx") 363 (eq_attr "type" "untyped,multiple")) 364 "thunderx_pipe0 + thunderx_pipe1") 365 366 367