1;; Cavium ThunderX 3 CN11xx pipeline description 2;; Copyright (C) 2020 Free Software Foundation, Inc. 3;; 4;; Contributed by Marvell 5 6;; This file is part of GCC. 7 8;; GCC is free software; you can redistribute it and/or modify 9;; it under the terms of the GNU General Public License as published by 10;; the Free Software Foundation; either version 3, or (at your option) 11;; any later version. 12 13;; GCC is distributed in the hope that it will be useful, 14;; but WITHOUT ANY WARRANTY; without even the implied warranty of 15;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16;; GNU General Public License for more details. 17 18;; You should have received a copy of the GNU General Public License 19;; along with GCC; see the file COPYING3. If not see 20;; <http://www.gnu.org/licenses/>. 21 22(define_automaton "thunderx3t110, thunderx3t110_advsimd, thunderx3t110_ldst") 23(define_automaton "thunderx3t110_mult") 24 25(define_cpu_unit "thunderx3t110_i0" "thunderx3t110") 26(define_cpu_unit "thunderx3t110_i1" "thunderx3t110") 27(define_cpu_unit "thunderx3t110_i2" "thunderx3t110") 28(define_cpu_unit "thunderx3t110_i3" "thunderx3t110") 29 30(define_cpu_unit "thunderx3t110_ls0" "thunderx3t110_ldst") 31(define_cpu_unit "thunderx3t110_ls1" "thunderx3t110_ldst") 32(define_cpu_unit "thunderx3t110_sd" "thunderx3t110_ldst") 33 34; Pseudo-units for multiply pipeline. 35; unchanged from TX2, occupies I1 for four (1 + 3 additional) slots 36 37(define_cpu_unit "thunderx3t110_i1m1" "thunderx3t110_mult") 38(define_cpu_unit "thunderx3t110_i1m2" "thunderx3t110_mult") 39(define_cpu_unit "thunderx3t110_i1m3" "thunderx3t110_mult") 40 41; Pseudo-units for load delay (assuming dcache hit). 42 43(define_cpu_unit "thunderx3t110_ls0d1" "thunderx3t110_ldst") 44(define_cpu_unit "thunderx3t110_ls0d2" "thunderx3t110_ldst") 45(define_cpu_unit "thunderx3t110_ls0d3" "thunderx3t110_ldst") 46 47(define_cpu_unit "thunderx3t110_ls1d1" "thunderx3t110_ldst") 48(define_cpu_unit "thunderx3t110_ls1d2" "thunderx3t110_ldst") 49(define_cpu_unit "thunderx3t110_ls1d3" "thunderx3t110_ldst") 50 51; Define FP units f0/f1/f2/f3. 52(define_cpu_unit "thunderx3t110_f0" "thunderx3t110_advsimd") 53(define_cpu_unit "thunderx3t110_f1" "thunderx3t110_advsimd") 54(define_cpu_unit "thunderx3t110_f2" "thunderx3t110_advsimd") 55(define_cpu_unit "thunderx3t110_f3" "thunderx3t110_advsimd") 56 57(define_reservation "thunderx3t110_i23" "thunderx3t110_i2|thunderx3t110_i3") 58(define_reservation "thunderx3t110_i01" 59 "thunderx3t110_i0|thunderx3t110_i1") 60(define_reservation "thunderx3t110_i012" 61 "thunderx3t110_i0|thunderx3t110_i1|thunderx3t110_i2") 62(define_reservation "thunderx3t110_i0123" 63 "thunderx3t110_i0|thunderx3t110_i1|thunderx3t110_i2|thunderx3t110_i3") 64(define_reservation "thunderx3t110_ls01" "thunderx3t110_ls0|thunderx3t110_ls1") 65(define_reservation "thunderx3t110_f01" "thunderx3t110_f0|thunderx3t110_f1") 66(define_reservation "thunderx3t110_f23" "thunderx3t110_f2|thunderx3t110_f3") 67(define_reservation "thunderx3t110_f0123" 68 "thunderx3t110_f0|thunderx3t110_f1|thunderx3t110_f2|thunderx3t110_f3") 69 70; A load with delay in the ls0/ls1 pipes. 71; this is always a delay of four 72(define_reservation "thunderx3t110_l0delay" 73 "thunderx3t110_ls0,thunderx3t110_ls0d1,thunderx3t110_ls0d2,\ 74 thunderx3t110_ls0d3") 75(define_reservation "thunderx3t110_l1delay" 76 "thunderx3t110_ls1,thunderx3t110_ls1d1,thunderx3t110_ls1d2,\ 77 thunderx3t110_ls1d3") 78(define_reservation "thunderx3t110_l01delay" 79 "thunderx3t110_l0delay|thunderx3t110_l1delay") 80;; Branch and call instructions. 81 82(define_insn_reservation "thunderx3t110_branch" 1 83 (and (eq_attr "tune" "thunderx3t110") 84 (eq_attr "type" "call,branch,trap")) 85 "thunderx3t110_i23") 86 87;; Misc instructions. 88 89; Speculation barrier 90(define_insn_reservation "thunderx3t110_nothing" 0 91 (and (eq_attr "tune" "thunderx3t110") 92 (eq_attr "type" "block")) 93 "nothing") 94 95(define_insn_reservation "thunderx3t110_mrs" 0 96 (and (eq_attr "tune" "thunderx3t110") 97 (eq_attr "type" "mrs")) 98 "thunderx3t110_i2") 99 100(define_insn_reservation "thunderx3t110_multiple" 1 101 (and (eq_attr "tune" "thunderx3t110") 102 (eq_attr "type" "multiple")) 103 "thunderx3t110_i0+thunderx3t110_i1+thunderx3t110_i3+thunderx3t110_ls0+\ 104 thunderx3t110_ls1+thunderx3t110_sd+thunderx3t110_i1m1+thunderx3t110_i1m2+\ 105 thunderx3t110_i1m3+thunderx3t110_f0+thunderx3t110_f1") 106 107;; Integer arithmetic/logic instructions. 108 109; Plain register moves are handled by renaming, 110; and don't create any uops. 111(define_insn_reservation "thunderx3t110_regmove" 0 112 (and (eq_attr "tune" "thunderx3t110") 113 (eq_attr "type" "mov_reg")) 114 "nothing") 115 116(define_insn_reservation "thunderx3t110_alu_basic" 1 117 (and (eq_attr "tune" "thunderx3t110") 118 (eq_attr "type" "alu_imm,alu_sreg,alus_imm,alus_sreg,\ 119 adc_reg,adc_imm,adcs_reg,adcs_imm,\ 120 logic_reg,logic_imm,logics_reg,logics_imm,\ 121 csel,adr,mov_imm,shift_reg,shift_imm,bfm,\ 122 bfx,rbit,rev,extend,rotate_imm")) 123 "thunderx3t110_i0123") 124 125; distinguish between latency 1|2 and throughput 1/4|2/4? 126; is it actually 1,1/2,{i0,i1} vs 2,1/4,{i0,i1,i2,i3} 127(define_insn_reservation "thunderx3t110_alu_shift" 2 128 (and (eq_attr "tune" "thunderx3t110") 129 (eq_attr "type" "alu_shift_imm,alu_ext,\ 130 alus_shift_imm,alus_ext,\ 131 logic_shift_imm,logics_shift_imm")) 132 "thunderx3t110_i0123") 133 134(define_insn_reservation "thunderx3t110_alu_shift1" 1 135 (and (eq_attr "tune" "thunderx3t110") 136 (eq_attr "type" "alu_shift_imm,alu_ext,\ 137 alus_shift_imm,alus_ext,\ 138 logic_shift_imm,logics_shift_imm")) 139 "thunderx3t110_i01") 140 141; we are going for the the optimistic answer (13) 142; for now, the worst case is 23 143(define_insn_reservation "thunderx3t110_div" 13 144 (and (eq_attr "tune" "thunderx3t110") 145 (eq_attr "type" "sdiv,udiv")) 146 "thunderx3t110_i1*3") 147 148(define_insn_reservation "thunderx3t110_madd" 5 149 (and (eq_attr "tune" "thunderx3t110") 150 (eq_attr "type" "mla,smlal,umlal")) 151 "thunderx3t110_i0123,thunderx3t110_i1m1,thunderx3t110_i1m2,thunderx3t110_i1m3,\ 152 thunderx3t110_i012") 153 154; NOTE: smull, umull are used for "high part" multiplies too. 155; mul is alias for MADD 156; it has to be distinguished between smulh, umulh (4,1) and 157; other (5,1) but there is no such a type, so, we go for the 158; conservative approach of (5,1) for now 159; smulh, umulh only runs on I1 160(define_insn_reservation "thunderx3t110_mul" 5 161 (and (eq_attr "tune" "thunderx3t110") 162 (eq_attr "type" "mul,smull,umull")) 163 "thunderx3t110_i0123,thunderx3t110_i1m1,thunderx3t110_i1m2,thunderx3t110_i1m3") 164 165(define_insn_reservation "thunderx3t110_countbits" 3 166 (and (eq_attr "tune" "thunderx3t110") 167 (eq_attr "type" "clz")) 168 "thunderx3t110_i1") 169 170;; Integer loads and stores. 171 172; load_4 matches prefetch, a multitude of move/str/dup variants, 173; sign extend 174(define_insn_reservation "thunderx3t110_load_basic" 4 175 (and (eq_attr "tune" "thunderx3t110") 176 (eq_attr "type" "load_4")) 177 "thunderx3t110_ls01") 178 179; model use of I0/I1/I2 for index versions only, model 4|8 2nd on load 180(define_insn_reservation "thunderx3t110_loadpair" 5 181 (and (eq_attr "tune" "thunderx3t110") 182 (eq_attr "type" "load_8,load_16")) 183 "thunderx3t110_i012,thunderx3t110_ls01") 184 185(define_insn_reservation "thunderx3t110_store_basic" 1 186 (and (eq_attr "tune" "thunderx3t110") 187 (eq_attr "type" "store_4")) 188 "thunderx3t110_ls01,thunderx3t110_sd") 189 190; model use of I0/I1/I2/I3 for index versions, model differing 191; throughputs 192(define_insn_reservation "thunderx3t110_storepair_basic" 1 193 (and (eq_attr "tune" "thunderx3t110") 194 (eq_attr "type" "store_8,store_16")) 195 "thunderx3t110_ls01,thunderx3t110_sd") 196 197;; FP data processing instructions. 198 199(define_insn_reservation "thunderx3t110_fp_simple" 5 200 (and (eq_attr "tune" "thunderx3t110") 201 (eq_attr "type" "ffariths,ffarithd,f_minmaxs,f_minmaxd")) 202 "thunderx3t110_f0123") 203 204; distinguish latency 3/4 throughput 1/2|1/4 205(define_insn_reservation "thunderx3t110_fp_addsub3" 3 206 (and (eq_attr "tune" "thunderx3t110") 207 (eq_attr "type" "fadds,faddd")) 208 "thunderx3t110_f23") 209(define_insn_reservation "thunderx3t110_fp_addsub4" 4 210 (and (eq_attr "tune" "thunderx3t110") 211 (eq_attr "type" "fadds,faddd")) 212 "thunderx3t110_f0123") 213 214(define_insn_reservation "thunderx3t110_fp_cmp" 4 215 (and (eq_attr "tune" "thunderx3t110") 216 (eq_attr "type" "fcmps,fcmpd,fccmps,fccmpd")) 217 "thunderx3t110_f0123") 218 219; need to split out latency 23 throughput 23/4: F64 from 220; latency 16 throughput 16/4: FDIV F32 221(define_insn_reservation "thunderx3t110_fp_divsqrt_s" 16 222 (and (eq_attr "tune" "thunderx3t110") 223 (eq_attr "type" "fdivs,fsqrts")) 224 "thunderx3t110_f0*3|thunderx3t110_f1*3|\ 225 thunderx3t110_f2*3|thunderx3t110_f3*3") 226 227(define_insn_reservation "thunderx3t110_fp_divsqrt_d" 23 228 (and (eq_attr "tune" "thunderx3t110") 229 (eq_attr "type" "fdivd,fsqrtd")) 230 "thunderx3t110_f0*5|thunderx3t110_f1*5|\ 231 thunderx3t110_f2*5|thunderx3t110_f3*5") 232 233(define_insn_reservation "thunderx3t110_fp_mul_mac" 5 234 (and (eq_attr "tune" "thunderx3t110") 235 (eq_attr "type" "fmuls,fmuld,fmacs,fmacd")) 236 "thunderx3t110_f01") 237 238(define_insn_reservation "thunderx3t110_frint" 5 239 (and (eq_attr "tune" "thunderx3t110") 240 (eq_attr "type" "f_rints,f_rintd")) 241 "thunderx3t110_f0123") 242 243; mimic latency 3|4 throughput 1/2|1/4 244(define_insn_reservation "thunderx3t110_fcsel3" 3 245 (and (eq_attr "tune" "thunderx3t110") 246 (eq_attr "type" "fcsel")) 247 "thunderx3t110_f23") 248 249(define_insn_reservation "thunderx3t110_fcsel4" 4 250 (and (eq_attr "tune" "thunderx3t110") 251 (eq_attr "type" "fcsel")) 252 "thunderx3t110_f0123") 253 254;; FP miscellaneous instructions. 255 256(define_insn_reservation "thunderx3t110_fp_cvt" 5 257 (and (eq_attr "tune" "thunderx3t110") 258 (eq_attr "type" "f_cvtf2i,f_cvt,f_cvti2f")) 259 "thunderx3t110_f0123") 260 261; even though f_mrc has to belong to fp_mov_to_gen 262; we retain this for the sake of legacy as codegen 263; doesn't use it anyway 264(define_insn_reservation "thunderx3t110_fp_mov3" 3 265 (and (eq_attr "tune" "thunderx3t110") 266 (eq_attr "type" "fconsts,fconstd,fmov,f_mrc")) 267 "thunderx3t110_f23") 268 269(define_insn_reservation "thunderx3t110_fp_mov" 4 270 (and (eq_attr "tune" "thunderx3t110") 271 (eq_attr "type" "fconsts,fconstd,fmov,f_mrc")) 272 "thunderx3t110_f0123") 273 274(define_insn_reservation "thunderx3t110_fp_mov_to_gen" 4 275 (and (eq_attr "tune" "thunderx3t110") 276 (eq_attr "type" "f_mcr")) 277 "thunderx3t110_f0123") 278 279;; FP loads and stores. 280; model use of I0/I1/I2 for post/pre index modes 281 282(define_insn_reservation "thunderx3t110_fp_load_basic" 4 283 (and (eq_attr "tune" "thunderx3t110") 284 (eq_attr "type" "f_loads,f_loadd")) 285 "thunderx3t110_ls01") 286 287; model throughput 1 288(define_insn_reservation "thunderx3t110_fp_store_basic" 1 289 (and (eq_attr "tune" "thunderx3t110") 290 (eq_attr "type" "f_stores,f_stored")) 291 "thunderx3t110_ls01,thunderx3t110_sd") 292 293;; ASIMD integer instructions. 294 295(define_insn_reservation "thunderx3t110_asimd_int" 5 296 (and (eq_attr "tune" "thunderx3t110") 297 (eq_attr "type" "neon_abd,neon_abd_q,\ 298 neon_arith_acc,neon_arith_acc_q,\ 299 neon_abs,neon_abs_q,\ 300 neon_add,neon_add_q,\ 301 neon_sub,neon_sub_q,\ 302 neon_neg,neon_neg_q,\ 303 neon_add_long,neon_add_widen,\ 304 neon_add_halve,neon_add_halve_q,\ 305 neon_sub_long,neon_sub_widen,\ 306 neon_sub_halve,neon_sub_halve_q,\ 307 neon_add_halve_narrow_q,neon_sub_halve_narrow_q,\ 308 neon_qabs,neon_qabs_q,\ 309 neon_qadd,neon_qadd_q,\ 310 neon_qneg,neon_qneg_q,\ 311 neon_qsub,neon_qsub_q,\ 312 neon_minmax,neon_minmax_q,\ 313 neon_reduc_minmax,neon_reduc_minmax_q,\ 314 neon_mul_b,neon_mul_h,neon_mul_s,\ 315 neon_mul_b_q,neon_mul_h_q,neon_mul_s_q,\ 316 neon_sat_mul_b,neon_sat_mul_h,neon_sat_mul_s,\ 317 neon_sat_mul_b_q,neon_sat_mul_h_q,neon_sat_mul_s_q,\ 318 neon_mla_b,neon_mla_h,neon_mla_s,\ 319 neon_mla_b_q,neon_mla_h_q,neon_mla_s_q,\ 320 neon_mul_b_long,neon_mul_h_long,\ 321 neon_mul_s_long,neon_mul_d_long,\ 322 neon_sat_mul_b_long,neon_sat_mul_h_long,\ 323 neon_sat_mul_s_long,\ 324 neon_mla_b_long,neon_mla_h_long,neon_mla_s_long,\ 325 neon_sat_mla_b_long,neon_sat_mla_h_long,\ 326 neon_sat_mla_s_long,\ 327 neon_shift_acc,neon_shift_acc_q,\ 328 neon_shift_imm,neon_shift_imm_q,\ 329 neon_shift_reg,neon_shift_reg_q,\ 330 neon_shift_imm_long,neon_shift_imm_narrow_q,\ 331 neon_sat_shift_imm,neon_sat_shift_imm_q,\ 332 neon_sat_shift_reg,neon_sat_shift_reg_q,\ 333 neon_sat_shift_imm_narrow_q")) 334 "thunderx3t110_f0123") 335 336; neon_reduc_add is used for both addp and [su]adalp 337(define_insn_reservation "thunderx3t110_asimd_reduc_add" 5 338 (and (eq_attr "tune" "thunderx3t110") 339 (eq_attr "type" "neon_reduc_add,neon_reduc_add_q")) 340 "thunderx3t110_f01") 341 342(define_insn_reservation "thunderx3t110_asimd_cmp" 5 343 (and (eq_attr "tune" "thunderx3t110") 344 (eq_attr "type" "neon_compare,neon_compare_q,neon_compare_zero,\ 345 neon_tst,neon_tst_q")) 346 "thunderx3t110_f0123") 347 348; neon_logic used in ldr, str, mov, umov, fmov, mov; orn; bic; and, 349; simd mov immediate; orr, simd mov immediate; eor; not (mvn) 350; latency 4 throughput 1/2 LS0/LS1: ldr 351; latency 1 throughput 1 LS0/LS1,SDI,I0/I1/I2: str 352; latency 3|4 throughput 1/2|1/4 F2/F3 F0/F1/F2/F3: fmov immed, orn, 353; bic, and, orr, eor, not (mvn) 354; latency 4 throughput 1/4 F0/F1/F2/F3: fmov register, fmov gen to vec 355; latency 5 throughput 1/4 F0/F1/F2/F3: fmov vec to gen, umov, fmov 356(define_insn_reservation "thunderx3t110_asimd_logic4" 4 357 (and (eq_attr "tune" "thunderx3t110") 358 (eq_attr "type" "neon_logic,neon_logic_q")) 359 "thunderx3t110_f23") 360 361(define_insn_reservation "thunderx3t110_asimd_logic5" 5 362 (and (eq_attr "tune" "thunderx3t110") 363 (eq_attr "type" "neon_logic,neon_logic_q")) 364 "thunderx3t110_f0123") 365 366;; ASIMD floating-point instructions. 367 368; Distinguish between latency 5 throughput 1/4: fabs, fmax, fmin, fneg 369; latency 4 throughput 1/4: fcmp 370(define_insn_reservation "thunderx3t110_asimd_fp_simple" 5 371 (and (eq_attr "tune" "thunderx3t110") 372 (eq_attr "type" "neon_fp_abs_s,neon_fp_abs_d,\ 373 neon_fp_abs_s_q,neon_fp_abs_d_q,\ 374 neon_fp_compare_s,neon_fp_compare_d,\ 375 neon_fp_compare_s_q,neon_fp_compare_d_q,\ 376 neon_fp_minmax_s,neon_fp_minmax_d,\ 377 neon_fp_minmax_s_q,neon_fp_minmax_d_q,\ 378 neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_d,\ 379 neon_fp_reduc_minmax_s_q,neon_fp_reduc_minmax_d_q,\ 380 neon_fp_neg_s,neon_fp_neg_d,\ 381 neon_fp_neg_s_q,neon_fp_neg_d_q")) 382 "thunderx3t110_f0123") 383 384; distinguish between latency 3 throughput 1/2, 385; latency 4 throughput 1/4 386; neon_fp_reduc_add_<stype><q> is used for both faddp and 387; vector reduction add. On TX3, faddp is 3|4 1/2|1/4 and reduction is 5 1/4 388(define_insn_reservation "thunderx3t110_asimd_fp_arith3" 3 389 (and (eq_attr "tune" "thunderx3t110") 390 (eq_attr "type" "neon_fp_abd_s,neon_fp_abd_d,\ 391 neon_fp_abd_s_q,neon_fp_abd_d_q,\ 392 neon_fp_addsub_s,neon_fp_addsub_d,\ 393 neon_fp_addsub_s_q,neon_fp_addsub_d_q,\ 394 neon_fp_reduc_add_s,neon_fp_reduc_add_d,\ 395 neon_fp_reduc_add_s_q,neon_fp_reduc_add_d_q")) 396 "thunderx3t110_f23") 397 398(define_insn_reservation "thunderx3t110_asimd_fp_arith4" 4 399 (and (eq_attr "tune" "thunderx3t110") 400 (eq_attr "type" "neon_fp_abd_s,neon_fp_abd_d,\ 401 neon_fp_abd_s_q,neon_fp_abd_d_q,\ 402 neon_fp_addsub_s,neon_fp_addsub_d,\ 403 neon_fp_addsub_s_q,neon_fp_addsub_d_q,\ 404 neon_fp_reduc_add_s,neon_fp_reduc_add_d,\ 405 neon_fp_reduc_add_s_q,neon_fp_reduc_add_d_q")) 406 "thunderx3t110_f0123") 407 408(define_insn_reservation "thunderx3t110_asimd_fp_arith5" 5 409 (and (eq_attr "tune" "thunderx3t110") 410 (eq_attr "type" "neon_fp_mul_s,neon_fp_mul_d,\ 411 neon_fp_mul_s_q,neon_fp_mul_d_q,\ 412 neon_fp_mul_s_scalar_q,neon_fp_mul_d_scalar_q,\ 413 neon_fp_mla_s,neon_fp_mla_d,\ 414 neon_fp_mla_s_q,neon_fp_mla_d_q")) 415 "thunderx3t110_f0123") 416 417; neon_fp_cvt_widen_s,neon_fp_cvt_narrow_d_q: fcvtl,fctvl2,fcvtn,fcvtn2 418; neon_fp_to_int_s,neon_fp_to_int_d: fcvt{<frint_suffix><su>,z<su>} 419; where frint_suffix: zpmixan, su: su (plus other sign/unsign/extract... 420; neon_fp_to_int_s_q,neon_fp_to_int_d_q: fcvtz<su> other 421; The int_to_fp* is complicated 422; neon_int_to_fp_s,neon_int_to_fp_d: <su_optab>cvtf 423; neon_int_to_fp_s_q,neon_int_to_fp_d_q 424; Round matches single define_insn, frint<frint_suffix> 425; neon_fp_round_s,neon_fp_round_d,neon_fp_round_s_q, 426; neon_fp_round_d_q: frint<frint_suffix> 427; FCVT*,VCVTAU,[SU]CVTF: latency 5 throughput 1/4 428; FRINT*: latency 5 throughput 1/4 429(define_insn_reservation "thunderx3t110_asimd_fp_conv" 5 430 (and (eq_attr "tune" "thunderx3t110") 431 (eq_attr "type" "neon_fp_cvt_widen_s,neon_fp_cvt_narrow_d_q,\ 432 neon_fp_to_int_s,neon_fp_to_int_d,\ 433 neon_fp_to_int_s_q,neon_fp_to_int_d_q,\ 434 neon_int_to_fp_s,neon_int_to_fp_d,\ 435 neon_int_to_fp_s_q,neon_int_to_fp_d_q,\ 436 neon_fp_round_s,neon_fp_round_d,\ 437 neon_fp_round_s_q,neon_fp_round_d_q")) 438 "thunderx3t110_f0123") 439 440; model that pipeline is occupied the whole time D/F32, Q/F32: 16/4 441; Q/F64: 23/4 442(define_insn_reservation "thunderx3t110_asimd_fp_div_s" 16 443 (and (eq_attr "tune" "thunderx3t110") 444 (eq_attr "type" "neon_fp_div_s,neon_fp_div_s_q")) 445 "thunderx3t110_f0123") 446 447(define_insn_reservation "thunderx3t110_asimd_fp_div_d" 23 448 (and (eq_attr "tune" "thunderx3t110") 449 (eq_attr "type" "neon_fp_div_d,neon_fp_div_d_q")) 450 "thunderx3t110_f0123") 451 452;; ASIMD miscellaneous instructions. 453 454; divided out: 455; rbit,bsl,bsl_q,cls,cls_q,cnt,cnt_q,move,move_q: 3|4 1/2 | 1/4 456; from_gp,from_gp_q : 4 | 1/4 457; dup,dup_q,ext,ext_q,ins,ins_q,all recpe forms, rev,rev_q: 5 1/4 458; permute,permute_q needs to depend on aarch64_expand_vec_perm_const does 459; on TX3 460(define_insn_reservation "thunderx3t110_asimd_misc3" 3 461 (and (eq_attr "tune" "thunderx3t110") 462 (eq_attr "type" "neon_rbit,\ 463 neon_bsl,neon_bsl_q,\ 464 neon_cls,neon_cls_q,\ 465 neon_cnt,neon_cnt_q,\ 466 neon_move,neon_move_q")) 467 "thunderx3t110_f23") 468 469(define_insn_reservation "thunderx3t110_asimd_misc4" 4 470 (and (eq_attr "tune" "thunderx3t110") 471 (eq_attr "type" "neon_rbit,\ 472 neon_bsl,neon_bsl_q,\ 473 neon_cls,neon_cls_q,\ 474 neon_cnt,neon_cnt_q,\ 475 neon_from_gp,neon_from_gp_q,\ 476 neon_move,neon_move_q")) 477 "thunderx3t110_f0123") 478 479(define_insn_reservation "thunderx3t110_asimd_misc" 5 480 (and (eq_attr "tune" "thunderx3t110") 481 (eq_attr "type" " 482 neon_dup,neon_dup_q,\ 483 neon_ext,neon_ext_q,\ 484 neon_ins,neon_ins_q,\ 485 neon_move,neon_move_q,\ 486 neon_fp_recpe_s,neon_fp_recpe_d,\ 487 neon_fp_recpe_s_q,neon_fp_recpe_d_q,\ 488 neon_fp_recpx_s,neon_fp_recpx_d,\ 489 neon_fp_recpx_s_q,neon_fp_recpx_d_q,\ 490 neon_rev,neon_rev_q,\ 491 neon_permute,neon_permute_q")) 492 "thunderx3t110_f0123") 493 494(define_insn_reservation "thunderx3t110_asimd_recip_step" 5 495 (and (eq_attr "tune" "thunderx3t110") 496 (eq_attr "type" "neon_fp_recps_s,neon_fp_recps_s_q,\ 497 neon_fp_recps_d,neon_fp_recps_d_q,\ 498 neon_fp_sqrt_s,neon_fp_sqrt_s_q,\ 499 neon_fp_sqrt_d,neon_fp_sqrt_d_q,\ 500 neon_fp_rsqrte_s, neon_fp_rsqrte_s_q,\ 501 neon_fp_rsqrte_d, neon_fp_rsqrte_d_q,\ 502 neon_fp_rsqrts_s, neon_fp_rsqrts_s_q,\ 503 neon_fp_rsqrts_d, neon_fp_rsqrts_d_q")) 504 "thunderx3t110_f0123") 505 506(define_insn_reservation "thunderx3t110_asimd_lut1" 5 507 (and (eq_attr "tune" "thunderx3t110") 508 (eq_attr "type" "neon_tbl1,neon_tbl1_q")) 509 "thunderx3t110_f0123") 510 511(define_insn_reservation "thunderx3t110_asimd_lut2" 10 512 (and (eq_attr "tune" "thunderx3t110") 513 (eq_attr "type" "neon_tbl2,neon_tbl2_q")) 514 "thunderx3t110_f0123") 515 516(define_insn_reservation "thunderx3t110_asimd_lut3" 15 517 (and (eq_attr "tune" "thunderx3t110") 518 (eq_attr "type" "neon_tbl3,neon_tbl3_q")) 519 "thunderx3t110_f0123") 520 521(define_insn_reservation "thunderx3t110_asimd_lut4" 20 522 (and (eq_attr "tune" "thunderx3t110") 523 (eq_attr "type" "neon_tbl4,neon_tbl4_q")) 524 "thunderx3t110_f0123") 525 526(define_insn_reservation "thunderx3t110_asimd_elt_to_gr" 5 527 (and (eq_attr "tune" "thunderx3t110") 528 (eq_attr "type" "neon_to_gp,neon_to_gp_q")) 529 "thunderx3t110_f0123") 530 531;; ASIMD load instructions. 532 533; NOTE: These reservations attempt to model latency and throughput 534; correctly, but the cycle timing of unit allocation is not 535; necessarily accurate (because insns are split into uops, and those 536; may be issued out-of-order). 537 538; the LDP/LDNP imm-offset S/D/Q suppplies the first arg with latency 4 539; and the 2nd at 5 (Q form) or 8 (S/D form). Can this be modeled? These 540;forms, as documented, do not use the I0/I1/I2 units (no I3), but the 541; other LDP ones do. 542(define_insn_reservation "thunderx3t110_asimd_load1_ldp" 5 543 (and (eq_attr "tune" "thunderx3t110") 544 (eq_attr "type" "neon_ldp,neon_ldp_q")) 545 "thunderx3t110_i012,thunderx3t110_ls01") 546 547; Need to distinguish latency 6 throughput 2: 4 reg D/Q 548; latency 5 throughput 3/2: 3 reg D/Q 549; latency 4 throughput 1: 2 reg D/Q 550; latency 4 throughput 1/2: 1 reg D/Q 551(define_insn_reservation "thunderx3t110_asimd_load1" 4 552 (and (eq_attr "tune" "thunderx3t110") 553 (eq_attr "type" "neon_load1_1reg,neon_load1_1reg_q,\ 554 neon_load1_2reg,neon_load1_2reg_q,\ 555 neon_load1_3reg,neon_load1_3reg_q,\ 556 neon_load1_4reg,neon_load1_4reg_q")) 557 "thunderx3t110_ls01") 558 559(define_insn_reservation "thunderx3t110_asimd_load1_onelane" 5 560 (and (eq_attr "tune" "thunderx3t110") 561 (eq_attr "type" "neon_load1_one_lane,neon_load1_one_lane_q")) 562 "thunderx3t110_l01delay,thunderx3t110_f0123") 563 564(define_insn_reservation "thunderx3t110_asimd_load1_all" 5 565 (and (eq_attr "tune" "thunderx3t110") 566 (eq_attr "type" "neon_load1_all_lanes,neon_load1_all_lanes_q")) 567 "thunderx3t110_l01delay,thunderx3t110_f0123") 568 569(define_insn_reservation "thunderx3t110_asimd_load2" 5 570 (and (eq_attr "tune" "thunderx3t110") 571 (eq_attr "type" "neon_load2_2reg,neon_load2_2reg_q,\ 572 neon_load2_one_lane,neon_load2_one_lane_q,\ 573 neon_load2_all_lanes,neon_load2_all_lanes_q")) 574 "thunderx3t110_l01delay,thunderx3t110_f0123") 575 576(define_insn_reservation "thunderx3t110_asimd_load3" 7 577 (and (eq_attr "tune" "thunderx3t110") 578 (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q,\ 579 neon_load3_one_lane,neon_load3_one_lane_q,\ 580 neon_load3_all_lanes,neon_load3_all_lanes_q")) 581 "thunderx3t110_l01delay,thunderx3t110_f0123") 582 583(define_insn_reservation "thunderx3t110_asimd_load4" 8 584 (and (eq_attr "tune" "thunderx3t110") 585 (eq_attr "type" "neon_load4_4reg,neon_load4_4reg_q,\ 586 neon_load4_one_lane,neon_load4_one_lane_q,\ 587 neon_load4_all_lanes,neon_load4_all_lanes_q")) 588 "thunderx3t110_l01delay,thunderx3t110_f0123") 589 590;; ASIMD store instructions. 591 592; Same note applies as for ASIMD load instructions. 593 594; Vector Store pair Need to distinguish: 595; 5 throughput: imm-offset S/D; imm-postindex S/D; imm-preindex S/D 596; 2 throughput: imm-offset Q; imm-postindex Q; imm-preindex Q 597; all index modes use I0/I1/I2 598(define_insn_reservation "thunderx3t110_asimd_store_stp" 1 599 (and (eq_attr "tune" "thunderx3t110") 600 (eq_attr "type" "neon_stp,neon_stp_q")) 601 "thunderx3t110_ls01,thunderx3t110_sd") 602 603; There are multiple forms of ST1 604; The following two groups, as documented, do not use the FP pipelines. 605; multiple, 1 reg, D-form ST1 606; tx2_ltp: x 1/2 LS0/LS1 607; tx3_ltp: x 1/2 LS0/LS1 608; multiple, 1 reg, Q-form ST1 609; tx2_ltp: x 1/2 LS0/LS1 610; tx3_ltp: x 1/2 LS0/LS1 611; 612; one lane, B/H/S ST1 613; tx2_ltp: x 1/2 LS0/LS1,F0/F1 614; tx3_ltp: x 1/2 LS0/LS1,F0/F1/F2/F3 615; one lane, D ST1 616; tx2_ltp: x 1/2 LS0/LS1,F0/F1 617; tx3_ltp: x 1/2 LS0/LS1,F0/F1/F2/F3 618;; Model for st1 insn needs refinement for different register forms 619; multiple, 2 reg, D-form ST1 x 1 LS0/LS1 620; multiple, 2 reg, Q-form ST1 x 1 LS0/LS1 621; multiple, 3 reg, D-form ST1 x 3/2 LS0/LS1 622; multiple, 3 reg, Q-form ST1 x 3/2 LS0/LS1 623; multiple,4 reg, D-form ST1 x 2 LS0/LS1 624; multiple,4 reg, Q-form ST1 x 2 LS0/LS1 625(define_insn_reservation "thunderx3t110_asimd_store1" 1 626 (and (eq_attr "tune" "thunderx3t110") 627 (eq_attr "type" "neon_store1_1reg,neon_store1_1reg_q,\ 628 neon_store1_2reg,neon_store1_2reg_q,\ 629 neon_store1_3reg,neon_store1_4reg")) 630 "thunderx3t110_ls01") 631 632(define_insn_reservation "thunderx3t110_asimd_store1_onelane" 1 633 (and (eq_attr "tune" "thunderx3t110") 634 (eq_attr "type" "neon_store1_one_lane,neon_store1_one_lane_q")) 635 "thunderx3t110_ls01,thunderx3t110_f0123") 636 637; distinguish between throughput 1: D/Q-form B/H/S, Q-form D and 638; throughput 1/2: one lane B/H/S/D 639(define_insn_reservation "thunderx3t110_asimd_store2" 1 640 (and (eq_attr "tune" "thunderx3t110") 641 (eq_attr "type" "neon_store2_2reg,neon_store2_2reg_q,\ 642 neon_store2_one_lane,neon_store2_one_lane_q")) 643 "thunderx3t110_ls01,thunderx3t110_f0123") 644 645; distinguish between throughput 3: D/Q-form B/H/S, Q-form D and 646; throughput 1: one lane B/H/S/D 647(define_insn_reservation "thunderx3t110_asimd_store3" 1 648 (and (eq_attr "tune" "thunderx3t110") 649 (eq_attr "type" "neon_store3_3reg,neon_store3_3reg_q,\ 650 neon_store3_one_lane,neon_store3_one_lane_q")) 651 "thunderx3t110_ls01,thunderx3t110_f0123") 652 653; distinguish between throughput 4: D/Q-form B/H/S, Q-form D and 654; throughput 1: one lane B/H/S/D? (not in doc) 655(define_insn_reservation "thunderx3t110_asimd_store4" 1 656 (and (eq_attr "tune" "thunderx3t110") 657 (eq_attr "type" "neon_store4_4reg,neon_store4_4reg_q,\ 658 neon_store4_one_lane,neon_store4_one_lane_q")) 659 "thunderx3t110_ls01,thunderx3t110_f0123") 660 661;; Crypto extensions. 662 663(define_insn_reservation "thunderx3t110_aes" 4 664 (and (eq_attr "tune" "thunderx3t110") 665 (eq_attr "type" "crypto_aese,crypto_aesmc")) 666 "thunderx3t110_f0123") 667 668(define_insn_reservation "thunderx3t110_sha" 5 669 (and (eq_attr "tune" "thunderx3t110") 670 (eq_attr "type" "crypto_sha1_fast,crypto_sha1_xor,crypto_sha1_slow,\ 671 crypto_sha256_fast,crypto_sha256_slow")) 672 "thunderx3t110_f0123") 673 674;; CRC extension. 675 676(define_insn_reservation "thunderx3t110_crc" 3 677 (and (eq_attr "tune" "thunderx3t110") 678 (eq_attr "type" "crc")) 679 "thunderx3t110_i1") 680 681;; PMULL extension. 682 683(define_insn_reservation "thunderx3t110_pmull" 5 684 (and (eq_attr "tune" "thunderx3t110") 685 (eq_attr "type" "crypto_pmull")) 686 "thunderx3t110_f0123") 687