1;; Samsung Exynos M1 pipeline description 2;; Copyright (C) 2014-2021 Free Software Foundation, Inc. 3;; 4;; This file is part of GCC. 5;; 6;; GCC is free software; you can redistribute it and/or modify it 7;; under the terms of the GNU General Public License as published by 8;; the Free Software Foundation; either version 3, or (at your option) 9;; any later version. 10;; 11;; GCC is distributed in the hope that it will be useful, but 12;; WITHOUT ANY WARRANTY; without even the implied warranty of 13;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14;; General Public License for more details. 15;; 16;; You should have received a copy of the GNU General Public License 17;; along with GCC; see the file COPYING3. If not see 18;; <http://www.gnu.org/licenses/>. 19 20(define_attr "exynos_m1_neon_type" 21 "neon_arith_simple, neon_arith_basic, neon_arith_complex, 22 neon_multiply, neon_mla, neon_mla_q, neon_mla_long, neon_sat_mla_long, 23 neon_shift_acc, neon_shift_imm_basic, neon_shift_imm_complex, 24 neon_shift_reg_basic, neon_shift_reg_basic_q, 25 neon_shift_reg_complex, neon_shift_reg_complex_q, 26 neon_fp_unary, neon_fp_add, neon_fp_abd, neon_fp_compare, 27 neon_fp_reduc_minmax, neon_fp_reduc_add, neon_fp_round, neon_fp_cvt, 28 neon_fp_minmax, neon_fp_mul, neon_fp_mul_q, neon_fp_mla, neon_fp_mla_q, 29 neon_fp_estimate, neon_fp_estimatex, neon_fp_step, 30 neon_bitops, neon_bitops_q, neon_bitins, 31 neon_to_gp, neon_from_gp, neon_move, neon_tbl, 32 neon_load1_1, neon_load1_2, neon_load1_3, neon_load1_4, 33 neon_load1_one, neon_load1_all, 34 neon_load2_2, neon_load2_one, neon_load2_all, 35 neon_load3_3, neon_load3_one, neon_load3_all, 36 neon_load4_4, neon_load4_one, neon_load4_all, 37 neon_store, 38 neon_store1_1, neon_store1_2, neon_store1_3, neon_store1_4, neon_store1_one, 39 neon_store2_2, neon_store2_one, 40 neon_store3_3, neon_store3_one, 41 neon_store4_4, neon_store4_one, 42 unknown" 43 (cond [ 44 (eq_attr "type" "neon_abd, neon_abd_q, neon_abd_long,\ 45 neon_abs, neon_abs_q,\ 46 neon_minmax, neon_minmax_q") 47 (const_string "neon_arith_simple") 48 49 (eq_attr "type" "neon_add, neon_add_q, neon_add_long,\ 50 neon_neg, neon_neg_q,\ 51 neon_sub, neon_sub_q, neon_sub_long, neon_sub_widen,\ 52 neon_logic, neon_logic_q, neon_tst, neon_tst_q,\ 53 neon_compare_zero, neon_compare_zero_q") 54 (const_string "neon_arith_basic") 55 56 (eq_attr "type" "neon_add_widen, neon_arith_acc, neon_arith_acc_q,\ 57 neon_reduc_add, neon_reduc_add_q,\ 58 neon_reduc_add_acc, neon_reduc_add_acc_q,\ 59 neon_reduc_add_long, neon_add_halve_narrow_q,\ 60 neon_add_halve, neon_add_halve_q,\ 61 neon_sub_halve, neon_sub_halve_q, neon_qabs,\ 62 neon_qabs_q, neon_qadd, neon_qadd_q, neon_qneg,\ 63 neon_qneg_q, neon_qsub, neon_qsub_q,\ 64 neon_sub_halve_narrow_q,\ 65 neon_compare, neon_compare_q,\ 66 neon_reduc_minmax, neon_reduc_minmax_q") 67 (const_string "neon_arith_complex") 68 69 (eq_attr "type" "neon_mul_b, neon_mul_b_q, neon_mul_h, neon_mul_h_q,\ 70 neon_mul_s, neon_mul_s_q,\ 71 neon_mul_h_scalar, neon_mul_h_scalar_q,\ 72 neon_mul_s_scalar, neon_mul_s_scalar_q,\ 73 neon_mul_h_scalar_long, neon_mul_s_scalar_long,\ 74 neon_sat_mul_b, neon_sat_mul_b_q,\ 75 neon_sat_mul_h, neon_sat_mul_h_q,\ 76 neon_sat_mul_s, neon_sat_mul_s_q,\ 77 neon_sat_mul_h_scalar, neon_sat_mul_h_scalar_q,\ 78 neon_sat_mul_s_scalar, neon_sat_mul_s_scalar_q,\ 79 neon_sat_mul_b_long, neon_sat_mul_h_long,\ 80 neon_sat_mul_s_long, neon_sat_mul_h_scalar_long,\ 81 neon_sat_mul_s_scalar_long, crypto_pmull") 82 (const_string "neon_multiply") 83 84 (eq_attr "type" "neon_mla_b, neon_mla_h, neon_mla_s,\ 85 neon_mla_h_scalar, neon_mla_s_scalar,\ 86 neon_mla_b_long, neon_mla_h_long,\ 87 neon_mla_s_long,\ 88 neon_mla_h_scalar_long, neon_mla_s_scalar_long,\ 89 neon_mla_b_q, neon_mla_h_q, neon_mla_s_q,\ 90 neon_mla_h_scalar_q, neon_mla_s_scalar_q") 91 (const_string "neon_mla") 92 93 (eq_attr "type" "neon_sat_mla_b_long, neon_sat_mla_h_long,\ 94 neon_sat_mla_s_long, neon_sat_mla_h_scalar_long,\ 95 neon_sat_mla_s_scalar_long") 96 (const_string "neon_sat_mla_long") 97 98 (eq_attr "type" "neon_shift_acc, neon_shift_acc_q") 99 (const_string "neon_shift_acc") 100 101 (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\ 102 neon_shift_imm_narrow_q, neon_shift_imm_long") 103 (const_string "neon_shift_imm_basic") 104 105 (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q,\ 106 neon_sat_shift_imm_narrow_q") 107 (const_string "neon_shift_imm_complex") 108 109 (eq_attr "type" "neon_shift_reg, neon_shift_reg_q") 110 (const_string "neon_shift_reg_basic") 111 112 (eq_attr "type" "neon_sat_shift_reg, neon_sat_shift_reg_q") 113 (const_string "neon_shift_reg_complex") 114 115 (eq_attr "type" "neon_fp_neg_s, neon_fp_neg_s_q,\ 116 neon_fp_abs_s, neon_fp_abs_s_q,\ 117 neon_fp_neg_d, neon_fp_neg_d_q,\ 118 neon_fp_abs_d, neon_fp_abs_d_q") 119 (const_string "neon_fp_unary") 120 121 (eq_attr "type" "neon_fp_addsub_s, neon_fp_addsub_s_q,\ 122 neon_fp_addsub_d, neon_fp_addsub_d_q") 123 (const_string "neon_fp_add") 124 125 (eq_attr "type" "neon_fp_abd_s, neon_fp_abd_s_q,\ 126 neon_fp_abd_d, neon_fp_abd_d_q") 127 (const_string "neon_fp_abd") 128 129 (eq_attr "type" "neon_fp_compare_s, neon_fp_compare_s_q,\ 130 neon_fp_compare_d, neon_fp_compare_d_q,\ 131 neon_fp_minmax_s, neon_fp_minmax_s_q,\ 132 neon_fp_minmax_d, neon_fp_minmax_d_q") 133 (const_string "neon_fp_compare") 134 135 (eq_attr "type" "neon_fp_reduc_minmax_s, neon_fp_reduc_minmax_s_q,\ 136 neon_fp_reduc_minmax_d, neon_fp_reduc_minmax_d_q") 137 (const_string "neon_fp_reduc_minmax") 138 139 (eq_attr "type" "neon_fp_reduc_add_s, neon_fp_reduc_add_s_q,\ 140 neon_fp_reduc_add_d, neon_fp_reduc_add_d_q") 141 (const_string "neon_fp_reduc_add") 142 143 (eq_attr "type" "neon_fp_round_s, neon_fp_round_s_q,\ 144 neon_fp_round_d, neon_fp_round_d_q") 145 (const_string "neon_fp_round") 146 147 (eq_attr "type" "neon_fp_cvt_narrow_s_q, neon_fp_cvt_widen_h, 148 neon_fp_to_int_s, neon_fp_to_int_s_q,\ 149 neon_fp_to_int_d_q, neon_fp_to_int_d,\ 150 neon_int_to_fp_s, neon_int_to_fp_s_q,\ 151 neon_int_to_fp_d, neon_int_to_fp_d_q") 152 (const_string "neon_fp_cvt") 153 154 (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_s_q,\ 155 neon_fp_mul_s_scalar, neon_fp_mul_s_scalar_q,\ 156 neon_fp_mul_d, neon_fp_mul_d_q,\ 157 neon_fp_mul_d_scalar_q") 158 (const_string "neon_fp_mul") 159 160 (eq_attr "type" "neon_fp_mla_s, neon_fp_mla_s_q,\ 161 neon_fp_mla_s_scalar,neon_fp_mla_s_scalar_q,\ 162 neon_fp_mla_d, neon_fp_mla_d_q,\ 163 neon_fp_mla_d_scalar_q") 164 (const_string "neon_fp_mla") 165 166 (eq_attr "type" "neon_fp_recpe_s, neon_fp_recpe_s_q,\ 167 neon_fp_rsqrte_s, neon_fp_rsqrte_s_q,\ 168 neon_fp_recpe_d, neon_fp_recpe_d_q,\ 169 neon_fp_rsqrte_d, neon_fp_rsqrte_d_q") 170 (const_string "neon_fp_estimate") 171 172 (eq_attr "type" "neon_fp_recpx_s, neon_fp_recpx_s_q,\ 173 neon_fp_recpx_d, neon_fp_recpx_d_q") 174 (const_string "neon_fp_estimatex") 175 176 (eq_attr "type" "neon_fp_recps_s, neon_fp_recps_s_q,\ 177 neon_fp_rsqrts_s, neon_fp_rsqrts_s_q,\ 178 neon_fp_recps_d, neon_fp_recps_d_q,\ 179 neon_fp_rsqrts_d, neon_fp_rsqrts_d_q") 180 (const_string "neon_fp_step") 181 182 (eq_attr "type" "neon_rbit, neon_rbit_q,\ 183 neon_cls, neon_cls_q, neon_cnt, neon_cnt_q,\ 184 neon_dup, neon_dup_q,\ 185 neon_rev, neon_rev_q,\ 186 neon_move, neon_move_q, 187 neon_ext, neon_permute, neon_zip") 188 (const_string "neon_bitops") 189 190 (eq_attr "type" "neon_ext_q, neon_permute_q, neon_zip_q") 191 (const_string "neon_bitops_q") 192 193 (eq_attr "type" "neon_bsl, neon_bsl_q") 194 (const_string "neon_bitins") 195 196 (eq_attr "type" "neon_tbl1, neon_tbl2, neon_tbl3, neon_tbl4") 197 (const_string "neon_tbl") 198 199 (eq_attr "type" "neon_from_gp, neon_from_gp_q, f_mcr, f_mcrr") 200 (const_string "neon_from_gp") 201 202 (eq_attr "type" "neon_to_gp, neon_to_gp_q, f_mrc, f_mrrc") 203 (const_string "neon_to_gp") 204 205 (eq_attr "type" "neon_load1_1reg, neon_load1_1reg_q") 206 (const_string "neon_load1_1") 207 208 (eq_attr "type" "neon_load1_2reg, neon_load1_2reg_q") 209 (const_string "neon_load1_2") 210 211 (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q") 212 (const_string "neon_load1_3") 213 214 (eq_attr "type" "neon_load1_4reg, neon_load1_4reg_q") 215 (const_string "neon_load1_4") 216 217 (eq_attr "type" "neon_load1_one_lane, neon_load1_one_lane_q") 218 (const_string "neon_load1_one") 219 220 (eq_attr "type" "neon_load1_all_lanes, neon_load1_all_lanes_q") 221 (const_string "neon_load1_all") 222 223 (eq_attr "type" "neon_load2_2reg, neon_load2_2reg_q,\ 224 neon_load2_4reg, neon_load2_4reg_q") 225 (const_string "neon_load2_2") 226 227 (eq_attr "type" "neon_load2_one_lane, neon_load2_one_lane_q") 228 (const_string "neon_load2_one") 229 230 (eq_attr "type" "neon_load2_all_lanes, neon_load2_all_lanes_q") 231 (const_string "neon_load2_all") 232 233 (eq_attr "type" "neon_load3_3reg, neon_load3_3reg_q") 234 (const_string "neon_load3_3") 235 236 (eq_attr "type" "neon_load3_one_lane, neon_load3_one_lane_q") 237 (const_string "neon_load3_one") 238 239 (eq_attr "type" "neon_load3_all_lanes, neon_load3_all_lanes_q") 240 (const_string "neon_load3_all") 241 242 (eq_attr "type" "neon_load4_4reg, neon_load4_4reg_q") 243 (const_string "neon_load4_4") 244 245 (eq_attr "type" "neon_load4_one_lane, neon_load4_one_lane_q") 246 (const_string "neon_load4_one") 247 248 (eq_attr "type" "neon_load4_all_lanes, neon_load4_all_lanes_q") 249 (const_string "neon_load4_all") 250 251 (eq_attr "type" "neon_store1_1reg, neon_store1_1reg_q") 252 (const_string "neon_store1_1") 253 254 (eq_attr "type" "neon_store1_2reg, neon_store1_2reg_q") 255 (const_string "neon_store1_2") 256 257 (eq_attr "type" "neon_store1_3reg, neon_store1_3reg_q") 258 (const_string "neon_store1_3") 259 260 (eq_attr "type" "neon_store1_4reg, neon_store1_4reg_q") 261 (const_string "neon_store1_4") 262 263 (eq_attr "type" "neon_store1_one_lane, neon_store1_one_lane_q") 264 (const_string "neon_store1_one") 265 266 (eq_attr "type" "neon_store2_2reg, neon_store2_2reg_q,\ 267 neon_store2_4reg, neon_store2_4reg_q") 268 (const_string "neon_store2_2") 269 270 (eq_attr "type" "neon_store2_one_lane, neon_store2_one_lane_q") 271 (const_string "neon_store2_one") 272 273 (eq_attr "type" "neon_store3_3reg, neon_store3_3reg_q") 274 (const_string "neon_store3_3") 275 276 (eq_attr "type" "neon_store3_one_lane, neon_store3_one_lane_q") 277 (const_string "neon_store3_one") 278 279 (eq_attr "type" "neon_store4_4reg, neon_store4_4reg_q") 280 (const_string "neon_store4_4") 281 282 (eq_attr "type" "neon_store4_one_lane, neon_store4_one_lane_q") 283 (const_string "neon_store4_one")] 284 285 (const_string "unknown"))) 286 287;; The Exynos M1 core is modeled as a triple issue pipeline that has 288;; the following functional units. 289 290(define_automaton "exynos_m1_gp") 291(define_automaton "exynos_m1_ls") 292(define_automaton "exynos_m1_fp") 293 294;; 1. Two pipelines for simple integer operations: A, B 295;; 2. One pipeline for simple or complex integer operations: C 296 297(define_cpu_unit "em1_xa, em1_xb, em1_xc" "exynos_m1_gp") 298 299(define_reservation "em1_alu" "(em1_xa | em1_xb | em1_xc)") 300(define_reservation "em1_c" "em1_xc") 301 302;; 3. Two asymmetric pipelines for Neon and FP operations: F0, F1 303 304(define_cpu_unit "em1_f0, em1_f1" "exynos_m1_fp") 305 306(define_reservation "em1_fmac" "em1_f0") 307(define_reservation "em1_fcvt" "em1_f0") 308(define_reservation "em1_nalu" "(em1_f0 | em1_f1)") 309(define_reservation "em1_nalu0" "em1_f0") 310(define_reservation "em1_nalu1" "em1_f1") 311(define_reservation "em1_nmisc" "em1_f0") 312(define_reservation "em1_ncrypt" "em1_f0") 313(define_reservation "em1_fadd" "em1_f1") 314(define_reservation "em1_fvar" "em1_f1") 315(define_reservation "em1_fst" "em1_f1") 316 317;; 4. One pipeline for branch operations: BX 318 319(define_cpu_unit "em1_bx" "exynos_m1_gp") 320 321(define_reservation "em1_br" "em1_bx") 322 323;; 5. One AGU for loads: L 324;; One AGU for stores and one pipeline for stores: S, SD 325 326(define_cpu_unit "em1_lx" "exynos_m1_ls") 327(define_cpu_unit "em1_sx, em1_sd" "exynos_m1_ls") 328 329(define_reservation "em1_ld" "em1_lx") 330(define_reservation "em1_st" "(em1_sx + em1_sd)") 331 332;; Common occurrences 333(define_reservation "em1_sfst" "(em1_fst + em1_st)") 334(define_reservation "em1_lfst" "(em1_fst + em1_ld)") 335 336;; Branches 337;; 338;; No latency as there is no result 339;; TODO: Unconditional branches use no units; 340;; conditional branches add the BX unit; 341;; indirect branches add the C unit. 342(define_insn_reservation "exynos_m1_branch" 0 343 (and (eq_attr "tune" "exynosm1") 344 (eq_attr "type" "branch")) 345 "em1_br") 346 347(define_insn_reservation "exynos_m1_call" 1 348 (and (eq_attr "tune" "exynosm1") 349 (eq_attr "type" "call")) 350 "em1_alu") 351 352;; Basic ALU 353;; 354;; Simple ALU without shift, non-predicated 355(define_insn_reservation "exynos_m1_alu" 1 356 (and (eq_attr "tune" "exynosm1") 357 (and (not (eq_attr "predicated" "yes")) 358 (eq_attr "type" "alu_imm, alus_imm, logic_imm, logics_imm,\ 359 alu_sreg, alus_sreg, logic_reg, logics_reg,\ 360 adc_imm, adcs_imm, adc_reg, adcs_reg,\ 361 adr, bfm, bfx, clz, rbit, rev, csel, alu_dsp_reg,\ 362 shift_imm, shift_reg, rotate_imm, extend,\ 363 mov_imm, mov_reg,\ 364 mvn_imm, mvn_reg,\ 365 mrs, multiple"))) 366 "em1_alu") 367 368;; Simple ALU without shift, predicated 369(define_insn_reservation "exynos_m1_alu_p" 1 370 (and (eq_attr "tune" "exynosm1") 371 (and (eq_attr "predicated" "yes") 372 (eq_attr "type" "alu_imm, alus_imm, logic_imm, logics_imm,\ 373 alu_sreg, alus_sreg, logic_reg, logics_reg,\ 374 adc_imm, adcs_imm, adc_reg, adcs_reg,\ 375 adr, bfm, bfx, clz, rbit, rev, alu_dsp_reg,\ 376 shift_imm, shift_reg, rotate_imm, extend,\ 377 mov_imm, mov_reg,\ 378 mvn_imm, mvn_reg,\ 379 mrs, multiple"))) 380 "em1_c") 381 382;; ALU ops with immediate shift 383;; TODO: if the shift value is between 0 and 3, the latency is just 1 cycle; 384;; otherwise it takes 2 cycles and the unit is blocked; 385;; for now, assume the latter's latency and the former's units. 386(define_insn_reservation "exynos_m1_alu_shift" 2 387 (and (eq_attr "tune" "exynosm1") 388 (eq_attr "type" "alu_ext, alus_ext,\ 389 alu_shift_imm_lsl_1to4,alu_shift_imm_other, alus_shift_imm,\ 390 logic_shift_imm, logics_shift_imm,\ 391 mov_shift, mvn_shift")) 392 "(em1_alu)") 393 394;; ALU ops with register controlled shift, non-predicated 395(define_insn_reservation "exynos_m1_alu_shift_reg" 2 396 (and (eq_attr "tune" "exynosm1") 397 (and (not (eq_attr "predicated" "yes")) 398 (eq_attr "type" "alu_shift_reg, alus_shift_reg,\ 399 logic_shift_reg, logics_shift_reg,\ 400 mov_shift_reg, mvn_shift_reg"))) 401 "(em1_alu * 2)") 402 403;; ALU ops with register controlled shift, predicated 404(define_insn_reservation "exynos_m1_alu_shift_reg_p" 2 405 (and (eq_attr "tune" "exynosm1") 406 (and (eq_attr "predicated" "yes") 407 (eq_attr "type" "alu_shift_reg, alus_shift_reg,\ 408 logic_shift_reg, logics_shift_reg,\ 409 mov_shift_reg, mvn_shift_reg"))) 410 "(em1_alu, em1_c)") 411 412;; Integer multiply 413(define_insn_reservation "exynos_m1_mla" 3 414 (and (eq_attr "tune" "exynosm1") 415 (eq_attr "mul32" "yes")) 416 "em1_c") 417 418(define_insn_reservation "exynos_m1_mlal" 4 419 (and (eq_attr "tune" "exynosm1") 420 (eq_attr "widen_mul64" "yes")) 421 "em1_alu, em1_c") 422 423;; Integer divide 424;; TODO: assume the median latency; blocks other divisions 425(define_insn_reservation "exynos_m1_div" 13 426 (and (eq_attr "tune" "exynosm1") 427 (eq_attr "type" "udiv, sdiv")) 428 "em1_c") 429 430;; Load-store execution Unit 431;; 432;; Loads of up to 2 words. 433(define_insn_reservation "exynos_m1_load" 4 434 (and (eq_attr "tune" "exynosm1") 435 (eq_attr "type" "load_byte, load_4, load_8")) 436 "em1_ld") 437 438;; Loads of 3 or 4 words. 439(define_insn_reservation "exynos_m1_loadm" 6 440 (and (eq_attr "tune" "exynosm1") 441 (eq_attr "type" "load_12, load_16")) 442 "(em1_ld * 3)") 443 444;; Stores of up to 2 words. 445(define_insn_reservation "exynos_m1_store" 1 446 (and (eq_attr "tune" "exynosm1") 447 (eq_attr "type" "store_4, store_8")) 448 "em1_st") 449 450;; Stores of 3 or 4 words. 451(define_insn_reservation "exynos_m1_storem" 3 452 (and (eq_attr "tune" "exynosm1") 453 (eq_attr "type" "store_12, store_16")) 454 "(em1_st * 3)") 455 456;; Advanced SIMD Unit 457;; 458;; Integer Arithmetic Instructions. 459 460(define_insn_reservation "exynos_m1_arith_simple" 1 461 (and (eq_attr "tune" "exynosm1") 462 (eq_attr "exynos_m1_neon_type" "neon_arith_simple")) 463 "em1_nmisc") 464 465(define_insn_reservation "exynos_m1_neon_arith_basic" 2 466 (and (eq_attr "tune" "exynosm1") 467 (eq_attr "exynos_m1_neon_type" "neon_arith_basic")) 468 "em1_nalu") 469 470(define_insn_reservation "exynos_m1_neon_arith_complex" 3 471 (and (eq_attr "tune" "exynosm1") 472 (eq_attr "exynos_m1_neon_type" "neon_arith_complex")) 473 "em1_nmisc") 474 475;; Integer Multiply Instructions. 476 477(define_insn_reservation "exynos_m1_neon_multiply" 4 478 (and (eq_attr "tune" "exynosm1") 479 (eq_attr "exynos_m1_neon_type" 480 "neon_multiply, neon_mla, neon_sat_mla_long")) 481 "em1_nmisc") 482 483;; Integer Shift Instructions. 484 485(define_insn_reservation 486 "exynos_m1_neon_shift_acc" 4 487 (and (eq_attr "tune" "exynosm1") 488 (eq_attr "exynos_m1_neon_type" "neon_shift_acc")) 489 "em1_nalu1") 490 491(define_insn_reservation 492 "exynos_m1_neon_shift_basic" 2 493 (and (eq_attr "tune" "exynosm1") 494 (eq_attr "exynos_m1_neon_type" 495 "neon_shift_imm_basic, neon_shift_reg_basic")) 496 "em1_nalu") 497 498(define_insn_reservation 499 "exynos_m1_neon_shift_complex" 4 500 (and (eq_attr "tune" "exynosm1") 501 (eq_attr "exynos_m1_neon_type" 502 "neon_shift_imm_complex, neon_shift_reg_complex")) 503 "em1_nalu1") 504 505;; Floating Point Instructions. 506 507(define_insn_reservation 508 "exynos_m1_neon_fp_unary" 2 509 (and (eq_attr "tune" "exynosm1") 510 (eq_attr "exynos_m1_neon_type" "neon_fp_unary")) 511 "em1_nalu") 512 513(define_insn_reservation 514 "exynos_m1_neon_fp_add" 4 515 (and (eq_attr "tune" "exynosm1") 516 (eq_attr "exynos_m1_neon_type" "neon_fp_add")) 517 "em1_fadd") 518 519(define_insn_reservation 520 "exynos_m1_neon_fp_abd" 3 521 (and (eq_attr "tune" "exynosm1") 522 (eq_attr "exynos_m1_neon_type" "neon_fp_abd")) 523 "em1_nmisc") 524 525(define_insn_reservation 526 "exynos_m1_neon_fp_compare" 1 527 (and (eq_attr "tune" "exynosm1") 528 (eq_attr "exynos_m1_neon_type" "neon_fp_compare")) 529 "em1_nmisc") 530 531;; TODO: the latency and throughput of reduce insns actually varies between 532;; 3-5 and 1/4-1, but picked the median values. 533(define_insn_reservation 534 "exynos_m1_neon_fp_reduc" 5 535 (and (eq_attr "tune" "exynosm1") 536 (eq_attr "exynos_m1_neon_type" "neon_fp_reduc_minmax")) 537 "(em1_nmisc * 4)") 538 539(define_insn_reservation 540 "exynos_m1_neon_fp_reduc_add" 10 541 (and (eq_attr "tune" "exynosm1") 542 (eq_attr "exynos_m1_neon_type" "neon_fp_reduc_add")) 543 "((em1_nalu * 2), em1_fadd)") 544 545(define_insn_reservation 546 "exynos_m1_neon_fp_round" 4 547 (and (eq_attr "tune" "exynosm1") 548 (eq_attr "exynos_m1_neon_type" "neon_fp_round")) 549 "em1_fcvt") 550 551(define_insn_reservation 552 "exynos_m1_neon_fp_cvt" 4 553 (and (eq_attr "tune" "exynosm1") 554 (eq_attr "exynos_m1_neon_type" "neon_fp_cvt")) 555 "em1_fcvt") 556 557(define_insn_reservation 558 "exynos_m1_neon_fp_mul" 5 559 (and (eq_attr "tune" "exynosm1") 560 (eq_attr "exynos_m1_neon_type" "neon_fp_mul")) 561 "em1_fmac") 562 563(define_insn_reservation 564 "exynos_m1_neon_fp_mla" 6 565 (and (eq_attr "tune" "exynosm1") 566 (eq_attr "exynos_m1_neon_type" "neon_fp_mla")) 567 "em1_fmac") 568 569(define_insn_reservation 570 "exynos_m1_neon_fp_estimate" 5 571 (and (eq_attr "tune" "exynosm1") 572 (eq_attr "exynos_m1_neon_type" "neon_fp_estimate")) 573 "em1_fcvt") 574 575(define_insn_reservation 576 "exynos_m1_neon_fp_estimatex" 1 577 (and (eq_attr "tune" "exynosm1") 578 (eq_attr "exynos_m1_neon_type" "neon_fp_estimatex")) 579 "em1_nmisc") 580 581(define_insn_reservation 582 "exynos_m1_neon_fp_step" 6 583 (and (eq_attr "tune" "exynosm1") 584 (eq_attr "exynos_m1_neon_type" "neon_fp_step")) 585 "em1_fmac") 586 587;; Miscellaneous Instructions. 588 589(define_insn_reservation 590 "exynos_m1_neon_bitops" 2 591 (and (eq_attr "tune" "exynosm1") 592 (eq_attr "exynos_m1_neon_type" "neon_bitops")) 593 "em1_nalu") 594 595(define_insn_reservation 596 "exynos_m1_neon_bitops_q" 3 597 (and (eq_attr "tune" "exynosm1") 598 (eq_attr "exynos_m1_neon_type" "neon_bitops_q")) 599 "(em1_nalu, em1_nalu)") 600 601(define_insn_reservation 602 "exynos_m1_neon_bitins" 2 603 (and (eq_attr "tune" "exynosm1") 604 (eq_attr "exynos_m1_neon_type" "neon_bitins")) 605 "em1_nalu1") 606 607;; TODO: it is more complicated than this. 608(define_insn_reservation 609 "exynos_m1_neon_tbl" 2 610 (and (eq_attr "tune" "exynosm1") 611 (eq_attr "exynos_m1_neon_type" "neon_tbl")) 612 "em1_nalu1") 613 614(define_insn_reservation 615 "exynos_m1_neon_from_gp" 4 616 (and (eq_attr "tune" "exynosm1") 617 (eq_attr "exynos_m1_neon_type" "neon_from_gp")) 618 "em1_st") 619 620(define_insn_reservation 621 "exynos_m1_neon_to_gp" 9 622 (and (eq_attr "tune" "exynosm1") 623 (eq_attr "exynos_m1_neon_type" "neon_to_gp")) 624 "em1_lfst") 625 626;; Load Instructions. 627 628(define_insn_reservation 629 "exynos_m1_neon_load" 5 630 (and (eq_attr "tune" "exynosm1") 631 (eq_attr "type" "f_loads, f_loadd, neon_ldp")) 632 "em1_ld") 633 634(define_insn_reservation 635 "exynos_m1_neon_load_q" 6 636 (and (eq_attr "tune" "exynosm1") 637 (eq_attr "type" "neon_ldp_q")) 638 "(em1_ld, em1_ld)") 639 640(define_insn_reservation 641 "exynos_m1_neon_load1_1" 6 642 (and (eq_attr "tune" "exynosm1") 643 (eq_attr "exynos_m1_neon_type" "neon_load1_1, neon_load1_all")) 644 "em1_ld") 645 646(define_insn_reservation 647 "exynos_m1_neon_load1_2" 6 648 (and (eq_attr "tune" "exynosm1") 649 (eq_attr "exynos_m1_neon_type" "neon_load1_2")) 650 "(em1_ld * 2)") 651 652(define_insn_reservation 653 "exynos_m1_neon_load1_3" 7 654 (and (eq_attr "tune" "exynosm1") 655 (eq_attr "exynos_m1_neon_type" "neon_load1_3")) 656 "(em1_ld * 3)") 657 658(define_insn_reservation 659 "exynos_m1_neon_load1_4" 8 660 (and (eq_attr "tune" "exynosm1") 661 (eq_attr "exynos_m1_neon_type" "neon_load1_4")) 662 "(em1_ld * 4)") 663 664(define_insn_reservation 665 "exynos_m1_neon_load1_one" 7 666 (and (eq_attr "tune" "exynosm1") 667 (eq_attr "exynos_m1_neon_type" "neon_load1_one")) 668 "((em1_ld * 2), em1_nalu)") 669 670(define_insn_reservation 671 "exynos_m1_neon_load2_2" 10 672 (and (eq_attr "tune" "exynosm1") 673 (eq_attr "exynos_m1_neon_type" "neon_load2_2")) 674 "(em1_ld * 5)") 675 676(define_insn_reservation 677 "exynos_m1_neon_load2_one" 7 678 (and (eq_attr "tune" "exynosm1") 679 (eq_attr "exynos_m1_neon_type" "neon_load2_one")) 680 "((em1_ld * 2), (em1_nalu * 2))") 681 682(define_insn_reservation 683 "exynos_m1_neon_load2_all" 6 684 (and (eq_attr "tune" "exynosm1") 685 (eq_attr "exynos_m1_neon_type" "neon_load2_all")) 686 "(em1_ld * 2)") 687 688(define_insn_reservation 689 "exynos_m1_neon_load3_3" 12 690 (and (eq_attr "tune" "exynosm1") 691 (eq_attr "exynos_m1_neon_type" "neon_load3_3")) 692 "(em1_ld * 6)") 693 694(define_insn_reservation 695 "exynos_m1_neon_load3_one" 9 696 (and (eq_attr "tune" "exynosm1") 697 (eq_attr "exynos_m1_neon_type" "neon_load3_one")) 698 "((em1_ld * 4), (em1_nalu * 3))") 699 700(define_insn_reservation 701 "exynos_m1_neon_load3_all" 7 702 (and (eq_attr "tune" "exynosm1") 703 (eq_attr "exynos_m1_neon_type" "neon_load3_all")) 704 "(em1_ld * 3)") 705 706(define_insn_reservation 707 "exynos_m1_neon_load4_4" 14 708 (and (eq_attr "tune" "exynosm1") 709 (eq_attr "exynos_m1_neon_type" "neon_load4_4")) 710 "(em1_ld * 7)") 711 712(define_insn_reservation 713 "exynos_m1_neon_load4_one" 9 714 (and (eq_attr "tune" "exynosm1") 715 (eq_attr "exynos_m1_neon_type" "neon_load4_one")) 716 "((em1_ld * 4), (em1_nalu * 4))") 717 718(define_insn_reservation 719 "exynos_m1_neon_load4_all" 8 720 (and (eq_attr "tune" "exynosm1") 721 (eq_attr "exynos_m1_neon_type" "neon_load4_all")) 722 "(em1_ld * 4)") 723 724;; Store Instructions. 725 726(define_insn_reservation 727 "exynos_m1_neon_store" 1 728 (and (eq_attr "tune" "exynosm1") 729 (eq_attr "type" "f_stores, f_stored, neon_stp")) 730 "em1_sfst") 731 732(define_insn_reservation 733 "exynos_m1_neon_store_q" 3 734 (and (eq_attr "tune" "exynosm1") 735 (eq_attr "type" "neon_stp_q")) 736 "(em1_sfst * 2)") 737 738(define_insn_reservation 739 "exynos_m1_neon_store1_1" 1 740 (and (eq_attr "tune" "exynosm1") 741 (eq_attr "exynos_m1_neon_type" "neon_store1_1")) 742 "em1_sfst") 743 744(define_insn_reservation 745 "exynos_m1_neon_store1_2" 2 746 (and (eq_attr "tune" "exynosm1") 747 (eq_attr "exynos_m1_neon_type" "neon_store1_2")) 748 "(em1_sfst * 2)") 749 750(define_insn_reservation 751 "exynos_m1_neon_store1_3" 3 752 (and (eq_attr "tune" "exynosm1") 753 (eq_attr "exynos_m1_neon_type" "neon_store1_3")) 754 "(em1_sfst * 3)") 755 756(define_insn_reservation 757 "exynos_m1_neon_store1_4" 4 758 (and (eq_attr "tune" "exynosm1") 759 (eq_attr "exynos_m1_neon_type" "neon_store1_4")) 760 "(em1_sfst * 4)") 761 762(define_insn_reservation 763 "exynos_m1_neon_store1_one" 7 764 (and (eq_attr "tune" "exynosm1") 765 (eq_attr "exynos_m1_neon_type" "neon_store1_one")) 766 "em1_sfst") 767 768(define_insn_reservation 769 "exynos_m1_neon_store2" 7 770 (and (eq_attr "tune" "exynosm1") 771 (eq_attr "exynos_m1_neon_type" "neon_store2_2, neon_store2_one")) 772 "em1_sfst, em1_fst") 773 774(define_insn_reservation 775 "exynos_m1_neon_store3" 16 776 (and (eq_attr "tune" "exynosm1") 777 (eq_attr "exynos_m1_neon_type" "neon_store3_3, neon_store3_one")) 778 "((em1_sfst * 3), (em1_fst * 2), em1_nalu)") 779 780(define_insn_reservation 781 "exynos_m1_neon_store4" 17 782 (and (eq_attr "tune" "exynosm1") 783 (eq_attr "exynos_m1_neon_type" "neon_store4_4, neon_store4_one")) 784 "((em1_sfst * 4), (em1_fst * 2), em1_nalu)") 785 786;; Floating-Point Operations. 787 788(define_insn_reservation "exynos_m1_fp_const" 2 789 (and (eq_attr "tune" "exynosm1") 790 (eq_attr "type" "fconsts, fconstd")) 791 "em1_nalu") 792 793(define_insn_reservation "exynos_m1_fp_add" 4 794 (and (eq_attr "tune" "exynosm1") 795 (eq_attr "type" "fadds, faddd")) 796 "em1_fadd") 797 798(define_insn_reservation "exynos_m1_fp_mul" 5 799 (and (eq_attr "tune" "exynosm1") 800 (eq_attr "type" "fmuls, fmuld")) 801 "em1_fmac") 802 803(define_insn_reservation "exynos_m1_fp_mac" 6 804 (and (eq_attr "tune" "exynosm1") 805 (eq_attr "type" "fmacs, ffmas, fmacd, ffmad")) 806 "em1_fmac") 807 808(define_insn_reservation "exynos_m1_fp_cvt" 4 809 (and (eq_attr "tune" "exynosm1") 810 (eq_attr "type" "f_cvt, f_rints, f_rintd")) 811 "em1_fcvt") 812 813(define_insn_reservation "exynos_m1_fp_cvt_i" 13 814 (and (eq_attr "tune" "exynosm1") 815 (eq_attr "type" "f_cvtf2i")) 816 "(em1_fcvt, em1_lfst)") 817 818(define_insn_reservation "exynos_m1_i_cvt_fp" 9 819 (and (eq_attr "tune" "exynosm1") 820 (eq_attr "type" "f_cvti2f")) 821 "(em1_st, em1_fcvt)") 822 823(define_insn_reservation "exynos_m1_fp_cmp" 4 824 (and (eq_attr "tune" "exynosm1") 825 (eq_attr "type" "fcmps, fcmpd")) 826 "em1_nmisc") 827 828(define_insn_reservation "exynos_m1_fp_ccmp" 7 829 (and (eq_attr "tune" "exynosm1") 830 (eq_attr "type" "fccmps, fccmpd")) 831 "(em1_st, em1_nmisc)") 832 833(define_insn_reservation "exynos_m1_fp_sel" 4 834 (and (eq_attr "tune" "exynosm1") 835 (eq_attr "type" "fcsel")) 836 "(em1_st + em1_nalu0)") 837 838(define_insn_reservation "exynos_m1_fp_arith" 2 839 (and (eq_attr "tune" "exynosm1") 840 (eq_attr "type" "ffariths, ffarithd")) 841 "em1_nalu") 842 843(define_insn_reservation "exynos_m1_fp_cpy" 2 844 (and (eq_attr "tune" "exynosm1") 845 (eq_attr "type" "fmov")) 846 "em1_nalu") 847 848(define_insn_reservation "exynos_m1_fp_divs" 15 849 (and (eq_attr "tune" "exynosm1") 850 (eq_attr "type" "fdivs, neon_fp_div_s, neon_fp_div_s_q,\ 851 fsqrts, neon_fp_sqrt_s, neon_fp_sqrt_s_q")) 852 "(em1_fvar * 9)") 853 854(define_insn_reservation "exynos_m1_fp_divd" 22 855 (and (eq_attr "tune" "exynosm1") 856 (eq_attr "type" "fdivd, neon_fp_div_d, neon_fp_div_d_q,\ 857 fsqrtd, neon_fp_sqrt_d, neon_fp_sqrt_d_q")) 858 "(em1_fvar * 9)") 859 860(define_insn_reservation "exynos_m1_fp_minmax" 2 861 (and (eq_attr "tune" "exynosm1") 862 (eq_attr "type" "f_minmaxs, f_minmaxd")) 863 "(em1_nmisc * 2)") 864 865;; Crypto Operations. 866 867(define_insn_reservation "exynos_m1_crypto_simple" 2 868 (and (eq_attr "tune" "exynosm1") 869 (eq_attr "type" "crypto_aese, crypto_aesmc,\ 870 crypto_sha1_xor, crypto_sha1_fast, crypto_sha256_fast")) 871 "em1_ncrypt") 872 873(define_insn_reservation "exynos_m1_crypto_complex" 6 874 (and (eq_attr "tune" "exynosm1") 875 (eq_attr "type" "crypto_sha1_slow, crypto_sha256_slow")) 876 "em1_ncrypt") 877 878(define_insn_reservation "exynos_m1_crypto_poly" 2 879 (and (eq_attr "tune" "exynosm1") 880 (eq_attr "type" "neon_mul_b_long, neon_mul_h_long, neon_mul_s_long")) 881 "em1_ncrypt") 882 883(define_insn_reservation "exynos_m1_crypto_polyl" 4 884 (and (eq_attr "tune" "exynosm1") 885 (eq_attr "type" "neon_mul_d_long")) 886 "em1_ncrypt") 887 888(define_insn_reservation "exynos_m1_crc" 2 889 (and (eq_attr "tune" "exynosm1") 890 (eq_attr "type" "crc")) 891 "em1_c") 892 893;; Simple execution unit bypasses 894 895;; Pre-decrement and post-increment addressing modes update the register quickly. 896;; TODO: figure out how to tell the addressing mode register from the loaded one. 897(define_bypass 1 "exynos_m1_store*, exynos_m1_neon_store*" 898 "exynos_m1_store*, exynos_m1_neon_store*, 899 exynos_m1_load*, exynos_m1_neon_load*") 900 901;; MLAs can feed other MLAs quickly. 902(define_bypass 1 "exynos_m1_mla*" "exynos_m1_mla*") 903 904;; Insns in FMAC or FADD can feed other such insns quickly. 905(define_bypass 4 "exynos_m1_fp_mul" 906 "exynos_m1_fp_add, exynos_m1_fp_mul, exynos_m1_fp_mac") 907(define_bypass 5 "exynos_m1_fp_mac" 908 "exynos_m1_fp_add, exynos_m1_fp_mul, exynos_m1_fp_mac") 909(define_bypass 4 "exynos_m1_neon_fp_mul" 910 "exynos_m1_neon_fp_add, exynos_m1_neon_fp_mul,\ 911 exynos_m1_neon_fp_mla, exynos_m1_neon_fp_step") 912(define_bypass 5 "exynos_m1_neon_fp_mla, exynos_m1_neon_fp_step" 913 "exynos_m1_neon_fp_add, exynos_m1_neon_fp_mul,\ 914 exynos_m1_neon_fp_mla, exynos_m1_neon_fp_step") 915(define_bypass 3 "exynos_m1_fp_add" 916 "exynos_m1_fp_add, exynos_m1_fp_mul, exynos_m1_fp_mac") 917(define_bypass 3 "exynos_m1_neon_fp_add" 918 "exynos_m1_neon_fp_add, exynos_m1_neon_fp_mul,\ 919 exynos_m1_neon_fp_mla, exynos_m1_neon_fp_step") 920 921;; Insns in NALU can feed other such insns quickly. 922(define_bypass 1 "exynos_m1_fp_const, exynos_m1_fp_arith, exynos_m1_fp_cpy" 923 "exynos_m1_fp_const, exynos_m1_fp_arith, exynos_m1_fp_cpy,\ 924 exynos_m1_fp_sel") 925(define_bypass 3 "exynos_m1_fp_sel" 926 "exynos_m1_fp_const, exynos_m1_fp_arith, exynos_m1_fp_cpy,\ 927 exynos_m1_fp_sel") 928(define_bypass 1 "exynos_m1_neon_arith_basic, exynos_m1_neon_shift_basic,\ 929 exynos_m1_neon_bitops, exynos_m1_neon_bitins,\ 930 exynos_m1_neon_tbl" 931 "exynos_m1_neon_arith_basic, exynos_m1_neon_shift_basic,\ 932 exynos_m1_neon_shift_acc, exynos_m1_neon_shift_complex,\ 933 exynos_m1_neon_bitops*, exynos_m1_neon_bitins,\ 934 exynos_m1_neon_tbl") 935(define_bypass 3 "exynos_m1_neon_shift_acc, exynos_m1_neon_shift_complex" 936 "exynos_m1_neon_arith_basic, exynos_m1_neon_shift_basic,\ 937 exynos_m1_neon_shift_acc, exynos_m1_neon_shift_complex,\ 938 exynos_m1_neon_bitops*, exynos_m1_neon_bitins,\ 939 exynos_m1_neon_tbl") 940(define_bypass 1 "exynos_m1_neon_fp_unary" "exynos_m1_neon_fp_unary") 941 942;; Insns in NCRYPT can feed other such insns quickly. 943(define_bypass 1 "exynos_m1_crypto_simple, exynos_m1_crypto_poly" 944 "exynos_m1_crypto_simple, exynos_m1_crypto_complex,\ 945 exynos_m1_crypto_poly*") 946(define_bypass 3 "exynos_m1_crypto_polyl" 947 "exynos_m1_crypto_simple, exynos_m1_crypto_complex,\ 948 exynos_m1_crypto_poly*") 949(define_bypass 5 "exynos_m1_crypto_complex" 950 "exynos_m1_crypto_simple, exynos_m1_crypto_complex,\ 951 exynos_m1_crypto_poly*") 952 953;; Predicted branches take no time, but mispredicted ones take forever anyway. 954(define_bypass 1 "exynos_m1_*" 955 "exynos_m1_call, exynos_m1_branch") 956