1;; Samsung Exynos M1 pipeline description
2;; Copyright (C) 2014-2021 Free Software Foundation, Inc.
3;;
4;; This file is part of GCC.
5;;
6;; GCC is free software; you can redistribute it and/or modify it
7;; under the terms of the GNU General Public License as published by
8;; the Free Software Foundation; either version 3, or (at your option)
9;; any later version.
10;;
11;; GCC is distributed in the hope that it will be useful, but
12;; WITHOUT ANY WARRANTY; without even the implied warranty of
13;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14;; General Public License for more details.
15;;
16;; You should have received a copy of the GNU General Public License
17;; along with GCC; see the file COPYING3.  If not see
18;; <http://www.gnu.org/licenses/>.
19
20(define_attr "exynos_m1_neon_type"
21  "neon_arith_simple, neon_arith_basic, neon_arith_complex,
22   neon_multiply, neon_mla, neon_mla_q, neon_mla_long, neon_sat_mla_long,
23   neon_shift_acc, neon_shift_imm_basic, neon_shift_imm_complex,
24   neon_shift_reg_basic, neon_shift_reg_basic_q,
25   neon_shift_reg_complex, neon_shift_reg_complex_q,
26   neon_fp_unary, neon_fp_add, neon_fp_abd, neon_fp_compare,
27   neon_fp_reduc_minmax, neon_fp_reduc_add, neon_fp_round, neon_fp_cvt,
28   neon_fp_minmax, neon_fp_mul, neon_fp_mul_q, neon_fp_mla, neon_fp_mla_q,
29   neon_fp_estimate, neon_fp_estimatex, neon_fp_step,
30   neon_bitops, neon_bitops_q, neon_bitins,
31   neon_to_gp, neon_from_gp, neon_move, neon_tbl,
32   neon_load1_1, neon_load1_2, neon_load1_3, neon_load1_4,
33   neon_load1_one, neon_load1_all,
34   neon_load2_2, neon_load2_one, neon_load2_all,
35   neon_load3_3, neon_load3_one, neon_load3_all,
36   neon_load4_4, neon_load4_one, neon_load4_all,
37   neon_store,
38   neon_store1_1, neon_store1_2, neon_store1_3, neon_store1_4, neon_store1_one,
39   neon_store2_2, neon_store2_one,
40   neon_store3_3, neon_store3_one,
41   neon_store4_4, neon_store4_one,
42   unknown"
43  (cond [
44	  (eq_attr "type" "neon_abd, neon_abd_q, neon_abd_long,\
45			   neon_abs, neon_abs_q,\
46			   neon_minmax, neon_minmax_q")
47	    (const_string "neon_arith_simple")
48
49	  (eq_attr "type" "neon_add, neon_add_q, neon_add_long,\
50			   neon_neg, neon_neg_q,\
51			   neon_sub, neon_sub_q, neon_sub_long, neon_sub_widen,\
52			   neon_logic, neon_logic_q, neon_tst, neon_tst_q,\
53			   neon_compare_zero, neon_compare_zero_q")
54	    (const_string "neon_arith_basic")
55
56	  (eq_attr "type" "neon_add_widen, neon_arith_acc, neon_arith_acc_q,\
57			   neon_reduc_add, neon_reduc_add_q,\
58			   neon_reduc_add_acc, neon_reduc_add_acc_q,\
59			   neon_reduc_add_long, neon_add_halve_narrow_q,\
60			   neon_add_halve, neon_add_halve_q,\
61			   neon_sub_halve, neon_sub_halve_q, neon_qabs,\
62			   neon_qabs_q, neon_qadd, neon_qadd_q, neon_qneg,\
63			   neon_qneg_q, neon_qsub, neon_qsub_q,\
64			   neon_sub_halve_narrow_q,\
65			   neon_compare, neon_compare_q,\
66			   neon_reduc_minmax, neon_reduc_minmax_q")
67	    (const_string "neon_arith_complex")
68
69	  (eq_attr "type" "neon_mul_b, neon_mul_b_q, neon_mul_h, neon_mul_h_q,\
70			   neon_mul_s, neon_mul_s_q,\
71			   neon_mul_h_scalar, neon_mul_h_scalar_q,\
72			   neon_mul_s_scalar, neon_mul_s_scalar_q,\
73			   neon_mul_h_scalar_long, neon_mul_s_scalar_long,\
74			   neon_sat_mul_b, neon_sat_mul_b_q,\
75			   neon_sat_mul_h, neon_sat_mul_h_q,\
76			   neon_sat_mul_s, neon_sat_mul_s_q,\
77			   neon_sat_mul_h_scalar, neon_sat_mul_h_scalar_q,\
78			   neon_sat_mul_s_scalar, neon_sat_mul_s_scalar_q,\
79			   neon_sat_mul_b_long, neon_sat_mul_h_long,\
80			   neon_sat_mul_s_long, neon_sat_mul_h_scalar_long,\
81			   neon_sat_mul_s_scalar_long, crypto_pmull")
82	    (const_string "neon_multiply")
83
84	  (eq_attr "type" "neon_mla_b, neon_mla_h, neon_mla_s,\
85			   neon_mla_h_scalar, neon_mla_s_scalar,\
86			   neon_mla_b_long, neon_mla_h_long,\
87			   neon_mla_s_long,\
88			   neon_mla_h_scalar_long, neon_mla_s_scalar_long,\
89			   neon_mla_b_q, neon_mla_h_q, neon_mla_s_q,\
90			   neon_mla_h_scalar_q, neon_mla_s_scalar_q")
91	    (const_string "neon_mla")
92
93	  (eq_attr "type" "neon_sat_mla_b_long, neon_sat_mla_h_long,\
94			   neon_sat_mla_s_long, neon_sat_mla_h_scalar_long,\
95			   neon_sat_mla_s_scalar_long")
96	    (const_string "neon_sat_mla_long")
97
98	  (eq_attr "type" "neon_shift_acc, neon_shift_acc_q")
99	    (const_string "neon_shift_acc")
100
101	  (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\
102			   neon_shift_imm_narrow_q, neon_shift_imm_long")
103	    (const_string "neon_shift_imm_basic")
104
105	  (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q,\
106			   neon_sat_shift_imm_narrow_q")
107	    (const_string "neon_shift_imm_complex")
108
109	  (eq_attr "type" "neon_shift_reg, neon_shift_reg_q")
110	    (const_string "neon_shift_reg_basic")
111
112	  (eq_attr "type" "neon_sat_shift_reg, neon_sat_shift_reg_q")
113	    (const_string "neon_shift_reg_complex")
114
115	  (eq_attr "type" "neon_fp_neg_s, neon_fp_neg_s_q,\
116			   neon_fp_abs_s, neon_fp_abs_s_q,\
117			   neon_fp_neg_d, neon_fp_neg_d_q,\
118			   neon_fp_abs_d, neon_fp_abs_d_q")
119	    (const_string "neon_fp_unary")
120
121	  (eq_attr "type" "neon_fp_addsub_s, neon_fp_addsub_s_q,\
122			   neon_fp_addsub_d, neon_fp_addsub_d_q")
123	    (const_string "neon_fp_add")
124
125	  (eq_attr "type" "neon_fp_abd_s, neon_fp_abd_s_q,\
126			   neon_fp_abd_d, neon_fp_abd_d_q")
127	    (const_string "neon_fp_abd")
128
129	  (eq_attr "type" "neon_fp_compare_s, neon_fp_compare_s_q,\
130			   neon_fp_compare_d, neon_fp_compare_d_q,\
131			   neon_fp_minmax_s, neon_fp_minmax_s_q,\
132			   neon_fp_minmax_d, neon_fp_minmax_d_q")
133	    (const_string "neon_fp_compare")
134
135	  (eq_attr "type" "neon_fp_reduc_minmax_s, neon_fp_reduc_minmax_s_q,\
136			   neon_fp_reduc_minmax_d, neon_fp_reduc_minmax_d_q")
137	    (const_string "neon_fp_reduc_minmax")
138
139	  (eq_attr "type" "neon_fp_reduc_add_s, neon_fp_reduc_add_s_q,\
140			   neon_fp_reduc_add_d, neon_fp_reduc_add_d_q")
141	    (const_string "neon_fp_reduc_add")
142
143	  (eq_attr "type" "neon_fp_round_s, neon_fp_round_s_q,\
144			   neon_fp_round_d, neon_fp_round_d_q")
145	    (const_string "neon_fp_round")
146
147	  (eq_attr "type" "neon_fp_cvt_narrow_s_q, neon_fp_cvt_widen_h,
148			   neon_fp_to_int_s, neon_fp_to_int_s_q,\
149			   neon_fp_to_int_d_q, neon_fp_to_int_d,\
150			   neon_int_to_fp_s, neon_int_to_fp_s_q,\
151			   neon_int_to_fp_d, neon_int_to_fp_d_q")
152	    (const_string "neon_fp_cvt")
153
154	  (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_s_q,\
155			   neon_fp_mul_s_scalar, neon_fp_mul_s_scalar_q,\
156			   neon_fp_mul_d, neon_fp_mul_d_q,\
157			   neon_fp_mul_d_scalar_q")
158	    (const_string "neon_fp_mul")
159
160	  (eq_attr "type" "neon_fp_mla_s, neon_fp_mla_s_q,\
161			   neon_fp_mla_s_scalar,neon_fp_mla_s_scalar_q,\
162			   neon_fp_mla_d, neon_fp_mla_d_q,\
163			   neon_fp_mla_d_scalar_q")
164	    (const_string "neon_fp_mla")
165
166	  (eq_attr "type" "neon_fp_recpe_s, neon_fp_recpe_s_q,\
167			   neon_fp_rsqrte_s, neon_fp_rsqrte_s_q,\
168			   neon_fp_recpe_d, neon_fp_recpe_d_q,\
169			   neon_fp_rsqrte_d, neon_fp_rsqrte_d_q")
170	    (const_string "neon_fp_estimate")
171
172	  (eq_attr "type" "neon_fp_recpx_s, neon_fp_recpx_s_q,\
173			   neon_fp_recpx_d, neon_fp_recpx_d_q")
174	    (const_string "neon_fp_estimatex")
175
176	  (eq_attr "type" "neon_fp_recps_s, neon_fp_recps_s_q,\
177			   neon_fp_rsqrts_s, neon_fp_rsqrts_s_q,\
178			   neon_fp_recps_d, neon_fp_recps_d_q,\
179			   neon_fp_rsqrts_d, neon_fp_rsqrts_d_q")
180	    (const_string "neon_fp_step")
181
182	  (eq_attr "type" "neon_rbit, neon_rbit_q,\
183			   neon_cls, neon_cls_q, neon_cnt, neon_cnt_q,\
184			   neon_dup, neon_dup_q,\
185			   neon_rev, neon_rev_q,\
186			   neon_move, neon_move_q,
187			   neon_ext, neon_permute, neon_zip")
188	    (const_string "neon_bitops")
189
190	  (eq_attr "type" "neon_ext_q, neon_permute_q, neon_zip_q")
191	    (const_string "neon_bitops_q")
192
193	  (eq_attr "type" "neon_bsl, neon_bsl_q")
194	    (const_string "neon_bitins")
195
196	  (eq_attr "type" "neon_tbl1, neon_tbl2, neon_tbl3, neon_tbl4")
197	    (const_string "neon_tbl")
198
199	  (eq_attr "type" "neon_from_gp, neon_from_gp_q, f_mcr, f_mcrr")
200	    (const_string "neon_from_gp")
201
202	  (eq_attr "type" "neon_to_gp, neon_to_gp_q, f_mrc, f_mrrc")
203	    (const_string "neon_to_gp")
204
205	  (eq_attr "type" "neon_load1_1reg, neon_load1_1reg_q")
206	    (const_string "neon_load1_1")
207
208	  (eq_attr "type" "neon_load1_2reg, neon_load1_2reg_q")
209	    (const_string "neon_load1_2")
210
211	  (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q")
212	    (const_string "neon_load1_3")
213
214	  (eq_attr "type" "neon_load1_4reg, neon_load1_4reg_q")
215	    (const_string "neon_load1_4")
216
217	  (eq_attr "type" "neon_load1_one_lane, neon_load1_one_lane_q")
218	    (const_string "neon_load1_one")
219
220	  (eq_attr "type" "neon_load1_all_lanes, neon_load1_all_lanes_q")
221	    (const_string "neon_load1_all")
222
223	  (eq_attr "type" "neon_load2_2reg, neon_load2_2reg_q,\
224			   neon_load2_4reg, neon_load2_4reg_q")
225	    (const_string "neon_load2_2")
226
227	  (eq_attr "type" "neon_load2_one_lane, neon_load2_one_lane_q")
228	    (const_string "neon_load2_one")
229
230	  (eq_attr "type" "neon_load2_all_lanes, neon_load2_all_lanes_q")
231	    (const_string "neon_load2_all")
232
233	  (eq_attr "type" "neon_load3_3reg, neon_load3_3reg_q")
234	    (const_string "neon_load3_3")
235
236	  (eq_attr "type" "neon_load3_one_lane, neon_load3_one_lane_q")
237	    (const_string "neon_load3_one")
238
239	  (eq_attr "type" "neon_load3_all_lanes, neon_load3_all_lanes_q")
240	    (const_string "neon_load3_all")
241
242	  (eq_attr "type" "neon_load4_4reg, neon_load4_4reg_q")
243	    (const_string "neon_load4_4")
244
245	  (eq_attr "type" "neon_load4_one_lane, neon_load4_one_lane_q")
246	    (const_string "neon_load4_one")
247
248	  (eq_attr "type" "neon_load4_all_lanes, neon_load4_all_lanes_q")
249	    (const_string "neon_load4_all")
250
251	  (eq_attr "type" "neon_store1_1reg, neon_store1_1reg_q")
252	    (const_string "neon_store1_1")
253
254	  (eq_attr "type" "neon_store1_2reg, neon_store1_2reg_q")
255	    (const_string "neon_store1_2")
256
257	  (eq_attr "type" "neon_store1_3reg, neon_store1_3reg_q")
258	    (const_string "neon_store1_3")
259
260	  (eq_attr "type" "neon_store1_4reg, neon_store1_4reg_q")
261	    (const_string "neon_store1_4")
262
263	  (eq_attr "type" "neon_store1_one_lane, neon_store1_one_lane_q")
264	    (const_string "neon_store1_one")
265
266	  (eq_attr "type" "neon_store2_2reg, neon_store2_2reg_q,\
267			   neon_store2_4reg, neon_store2_4reg_q")
268	    (const_string "neon_store2_2")
269
270	  (eq_attr "type" "neon_store2_one_lane, neon_store2_one_lane_q")
271	    (const_string "neon_store2_one")
272
273	  (eq_attr "type" "neon_store3_3reg, neon_store3_3reg_q")
274	    (const_string "neon_store3_3")
275
276	  (eq_attr "type" "neon_store3_one_lane, neon_store3_one_lane_q")
277	    (const_string "neon_store3_one")
278
279	  (eq_attr "type" "neon_store4_4reg, neon_store4_4reg_q")
280	    (const_string "neon_store4_4")
281
282	  (eq_attr "type" "neon_store4_one_lane, neon_store4_one_lane_q")
283	    (const_string "neon_store4_one")]
284
285	  (const_string "unknown")))
286
287;; The Exynos M1 core is modeled as a triple issue pipeline that has
288;; the following functional units.
289
290(define_automaton "exynos_m1_gp")
291(define_automaton "exynos_m1_ls")
292(define_automaton "exynos_m1_fp")
293
294;; 1.  Two pipelines for simple integer operations: A, B
295;; 2.  One pipeline for simple or complex integer operations: C
296
297(define_cpu_unit "em1_xa, em1_xb, em1_xc" "exynos_m1_gp")
298
299(define_reservation "em1_alu" "(em1_xa | em1_xb | em1_xc)")
300(define_reservation "em1_c" "em1_xc")
301
302;; 3.  Two asymmetric pipelines for Neon and FP operations: F0, F1
303
304(define_cpu_unit "em1_f0, em1_f1" "exynos_m1_fp")
305
306(define_reservation "em1_fmac" "em1_f0")
307(define_reservation "em1_fcvt" "em1_f0")
308(define_reservation "em1_nalu" "(em1_f0 | em1_f1)")
309(define_reservation "em1_nalu0" "em1_f0")
310(define_reservation "em1_nalu1" "em1_f1")
311(define_reservation "em1_nmisc" "em1_f0")
312(define_reservation "em1_ncrypt" "em1_f0")
313(define_reservation "em1_fadd" "em1_f1")
314(define_reservation "em1_fvar" "em1_f1")
315(define_reservation "em1_fst" "em1_f1")
316
317;; 4.  One pipeline for branch operations: BX
318
319(define_cpu_unit "em1_bx" "exynos_m1_gp")
320
321(define_reservation "em1_br" "em1_bx")
322
323;; 5.  One AGU for loads: L
324;;     One AGU for stores and one pipeline for stores: S, SD
325
326(define_cpu_unit "em1_lx" "exynos_m1_ls")
327(define_cpu_unit "em1_sx, em1_sd" "exynos_m1_ls")
328
329(define_reservation "em1_ld" "em1_lx")
330(define_reservation "em1_st" "(em1_sx + em1_sd)")
331
332;; Common occurrences
333(define_reservation "em1_sfst" "(em1_fst + em1_st)")
334(define_reservation "em1_lfst" "(em1_fst + em1_ld)")
335
336;; Branches
337;;
338;; No latency as there is no result
339;; TODO: Unconditional branches use no units;
340;; conditional branches add the BX unit;
341;; indirect branches add the C unit.
342(define_insn_reservation "exynos_m1_branch" 0
343  (and (eq_attr "tune" "exynosm1")
344       (eq_attr "type" "branch"))
345  "em1_br")
346
347(define_insn_reservation "exynos_m1_call" 1
348  (and (eq_attr "tune" "exynosm1")
349       (eq_attr "type" "call"))
350  "em1_alu")
351
352;; Basic ALU
353;;
354;; Simple ALU without shift, non-predicated
355(define_insn_reservation "exynos_m1_alu" 1
356  (and (eq_attr "tune" "exynosm1")
357       (and (not (eq_attr "predicated" "yes"))
358	    (eq_attr "type" "alu_imm, alus_imm, logic_imm, logics_imm,\
359			     alu_sreg, alus_sreg, logic_reg, logics_reg,\
360			     adc_imm, adcs_imm, adc_reg, adcs_reg,\
361			     adr, bfm, bfx, clz, rbit, rev, csel, alu_dsp_reg,\
362			     shift_imm, shift_reg, rotate_imm, extend,\
363			     mov_imm, mov_reg,\
364			     mvn_imm, mvn_reg,\
365			     mrs, multiple")))
366  "em1_alu")
367
368;; Simple ALU without shift, predicated
369(define_insn_reservation "exynos_m1_alu_p" 1
370  (and (eq_attr "tune" "exynosm1")
371       (and (eq_attr "predicated" "yes")
372	    (eq_attr "type" "alu_imm, alus_imm, logic_imm, logics_imm,\
373			     alu_sreg, alus_sreg, logic_reg, logics_reg,\
374			     adc_imm, adcs_imm, adc_reg, adcs_reg,\
375			     adr, bfm, bfx, clz, rbit, rev, alu_dsp_reg,\
376			     shift_imm, shift_reg, rotate_imm, extend,\
377			     mov_imm, mov_reg,\
378			     mvn_imm, mvn_reg,\
379			     mrs, multiple")))
380  "em1_c")
381
382;; ALU ops with immediate shift
383;; TODO: if the shift value is between 0 and 3, the latency is just 1 cycle;
384;;       otherwise it takes 2 cycles and the unit is blocked;
385;;       for now, assume the latter's latency and the former's units.
386(define_insn_reservation "exynos_m1_alu_shift" 2
387  (and (eq_attr "tune" "exynosm1")
388       (eq_attr "type" "alu_ext, alus_ext,\
389			alu_shift_imm_lsl_1to4,alu_shift_imm_other, alus_shift_imm,\
390			logic_shift_imm, logics_shift_imm,\
391			mov_shift, mvn_shift"))
392  "(em1_alu)")
393
394;; ALU ops with register controlled shift, non-predicated
395(define_insn_reservation "exynos_m1_alu_shift_reg" 2
396  (and (eq_attr "tune" "exynosm1")
397       (and (not (eq_attr "predicated" "yes"))
398	    (eq_attr "type" "alu_shift_reg, alus_shift_reg,\
399			     logic_shift_reg, logics_shift_reg,\
400			     mov_shift_reg, mvn_shift_reg")))
401   "(em1_alu * 2)")
402
403;; ALU ops with register controlled shift, predicated
404(define_insn_reservation "exynos_m1_alu_shift_reg_p" 2
405  (and (eq_attr "tune" "exynosm1")
406       (and (eq_attr "predicated" "yes")
407	    (eq_attr "type" "alu_shift_reg, alus_shift_reg,\
408			     logic_shift_reg, logics_shift_reg,\
409			     mov_shift_reg, mvn_shift_reg")))
410  "(em1_alu, em1_c)")
411
412;; Integer multiply
413(define_insn_reservation "exynos_m1_mla" 3
414  (and (eq_attr "tune" "exynosm1")
415       (eq_attr "mul32" "yes"))
416  "em1_c")
417
418(define_insn_reservation "exynos_m1_mlal" 4
419  (and (eq_attr "tune" "exynosm1")
420       (eq_attr "widen_mul64" "yes"))
421  "em1_alu, em1_c")
422
423;; Integer divide
424;; TODO: assume the median latency; blocks other divisions
425(define_insn_reservation "exynos_m1_div" 13
426  (and (eq_attr "tune" "exynosm1")
427       (eq_attr "type" "udiv, sdiv"))
428  "em1_c")
429
430;; Load-store execution Unit
431;;
432;; Loads of up to 2 words.
433(define_insn_reservation "exynos_m1_load" 4
434  (and (eq_attr "tune" "exynosm1")
435       (eq_attr "type" "load_byte, load_4, load_8"))
436  "em1_ld")
437
438;; Loads of 3 or 4 words.
439(define_insn_reservation "exynos_m1_loadm" 6
440  (and (eq_attr "tune" "exynosm1")
441       (eq_attr "type" "load_12, load_16"))
442  "(em1_ld * 3)")
443
444;; Stores of up to 2 words.
445(define_insn_reservation "exynos_m1_store" 1
446  (and (eq_attr "tune" "exynosm1")
447       (eq_attr "type" "store_4, store_8"))
448  "em1_st")
449
450;; Stores of 3 or 4 words.
451(define_insn_reservation "exynos_m1_storem" 3
452  (and (eq_attr "tune" "exynosm1")
453       (eq_attr "type" "store_12, store_16"))
454  "(em1_st * 3)")
455
456;; Advanced SIMD Unit
457;;
458;; Integer Arithmetic Instructions.
459
460(define_insn_reservation  "exynos_m1_arith_simple" 1
461  (and (eq_attr "tune" "exynosm1")
462       (eq_attr "exynos_m1_neon_type" "neon_arith_simple"))
463  "em1_nmisc")
464
465(define_insn_reservation  "exynos_m1_neon_arith_basic" 2
466  (and (eq_attr "tune" "exynosm1")
467       (eq_attr "exynos_m1_neon_type" "neon_arith_basic"))
468  "em1_nalu")
469
470(define_insn_reservation  "exynos_m1_neon_arith_complex" 3
471  (and (eq_attr "tune" "exynosm1")
472       (eq_attr "exynos_m1_neon_type" "neon_arith_complex"))
473  "em1_nmisc")
474
475;; Integer Multiply Instructions.
476
477(define_insn_reservation "exynos_m1_neon_multiply" 4
478  (and (eq_attr "tune" "exynosm1")
479       (eq_attr "exynos_m1_neon_type"
480		"neon_multiply, neon_mla, neon_sat_mla_long"))
481  "em1_nmisc")
482
483;; Integer Shift Instructions.
484
485(define_insn_reservation
486  "exynos_m1_neon_shift_acc" 4
487  (and (eq_attr "tune" "exynosm1")
488       (eq_attr "exynos_m1_neon_type" "neon_shift_acc"))
489  "em1_nalu1")
490
491(define_insn_reservation
492  "exynos_m1_neon_shift_basic" 2
493  (and (eq_attr "tune" "exynosm1")
494       (eq_attr "exynos_m1_neon_type"
495		"neon_shift_imm_basic, neon_shift_reg_basic"))
496  "em1_nalu")
497
498(define_insn_reservation
499  "exynos_m1_neon_shift_complex" 4
500  (and (eq_attr "tune" "exynosm1")
501       (eq_attr "exynos_m1_neon_type"
502		"neon_shift_imm_complex, neon_shift_reg_complex"))
503  "em1_nalu1")
504
505;; Floating Point Instructions.
506
507(define_insn_reservation
508  "exynos_m1_neon_fp_unary" 2
509  (and (eq_attr "tune" "exynosm1")
510       (eq_attr "exynos_m1_neon_type" "neon_fp_unary"))
511  "em1_nalu")
512
513(define_insn_reservation
514  "exynos_m1_neon_fp_add" 4
515  (and (eq_attr "tune" "exynosm1")
516       (eq_attr "exynos_m1_neon_type" "neon_fp_add"))
517  "em1_fadd")
518
519(define_insn_reservation
520  "exynos_m1_neon_fp_abd" 3
521  (and (eq_attr "tune" "exynosm1")
522       (eq_attr "exynos_m1_neon_type" "neon_fp_abd"))
523  "em1_nmisc")
524
525(define_insn_reservation
526  "exynos_m1_neon_fp_compare" 1
527  (and (eq_attr "tune" "exynosm1")
528       (eq_attr "exynos_m1_neon_type" "neon_fp_compare"))
529  "em1_nmisc")
530
531;; TODO: the latency and throughput of reduce insns actually varies between
532;; 3-5 and 1/4-1, but picked the median values.
533(define_insn_reservation
534  "exynos_m1_neon_fp_reduc" 5
535  (and (eq_attr "tune" "exynosm1")
536       (eq_attr "exynos_m1_neon_type" "neon_fp_reduc_minmax"))
537  "(em1_nmisc * 4)")
538
539(define_insn_reservation
540  "exynos_m1_neon_fp_reduc_add" 10
541  (and (eq_attr "tune" "exynosm1")
542       (eq_attr "exynos_m1_neon_type" "neon_fp_reduc_add"))
543  "((em1_nalu * 2), em1_fadd)")
544
545(define_insn_reservation
546  "exynos_m1_neon_fp_round" 4
547  (and (eq_attr "tune" "exynosm1")
548       (eq_attr "exynos_m1_neon_type" "neon_fp_round"))
549  "em1_fcvt")
550
551(define_insn_reservation
552  "exynos_m1_neon_fp_cvt" 4
553  (and (eq_attr "tune" "exynosm1")
554       (eq_attr "exynos_m1_neon_type" "neon_fp_cvt"))
555  "em1_fcvt")
556
557(define_insn_reservation
558  "exynos_m1_neon_fp_mul" 5
559  (and (eq_attr "tune" "exynosm1")
560       (eq_attr "exynos_m1_neon_type" "neon_fp_mul"))
561  "em1_fmac")
562
563(define_insn_reservation
564  "exynos_m1_neon_fp_mla" 6
565  (and (eq_attr "tune" "exynosm1")
566       (eq_attr "exynos_m1_neon_type" "neon_fp_mla"))
567  "em1_fmac")
568
569(define_insn_reservation
570  "exynos_m1_neon_fp_estimate" 5
571  (and (eq_attr "tune" "exynosm1")
572       (eq_attr "exynos_m1_neon_type" "neon_fp_estimate"))
573  "em1_fcvt")
574
575(define_insn_reservation
576  "exynos_m1_neon_fp_estimatex" 1
577  (and (eq_attr "tune" "exynosm1")
578       (eq_attr "exynos_m1_neon_type" "neon_fp_estimatex"))
579  "em1_nmisc")
580
581(define_insn_reservation
582  "exynos_m1_neon_fp_step" 6
583  (and (eq_attr "tune" "exynosm1")
584       (eq_attr "exynos_m1_neon_type" "neon_fp_step"))
585  "em1_fmac")
586
587;; Miscellaneous Instructions.
588
589(define_insn_reservation
590  "exynos_m1_neon_bitops" 2
591  (and (eq_attr "tune" "exynosm1")
592       (eq_attr "exynos_m1_neon_type" "neon_bitops"))
593  "em1_nalu")
594
595(define_insn_reservation
596  "exynos_m1_neon_bitops_q" 3
597  (and (eq_attr "tune" "exynosm1")
598       (eq_attr "exynos_m1_neon_type" "neon_bitops_q"))
599  "(em1_nalu, em1_nalu)")
600
601(define_insn_reservation
602  "exynos_m1_neon_bitins" 2
603  (and (eq_attr "tune" "exynosm1")
604       (eq_attr "exynos_m1_neon_type" "neon_bitins"))
605  "em1_nalu1")
606
607;; TODO: it is more complicated than this.
608(define_insn_reservation
609  "exynos_m1_neon_tbl" 2
610  (and (eq_attr "tune" "exynosm1")
611       (eq_attr "exynos_m1_neon_type" "neon_tbl"))
612  "em1_nalu1")
613
614(define_insn_reservation
615  "exynos_m1_neon_from_gp" 4
616  (and (eq_attr "tune" "exynosm1")
617       (eq_attr "exynos_m1_neon_type" "neon_from_gp"))
618  "em1_st")
619
620(define_insn_reservation
621  "exynos_m1_neon_to_gp" 9
622  (and (eq_attr "tune" "exynosm1")
623       (eq_attr "exynos_m1_neon_type" "neon_to_gp"))
624  "em1_lfst")
625
626;; Load Instructions.
627
628(define_insn_reservation
629  "exynos_m1_neon_load" 5
630  (and (eq_attr "tune" "exynosm1")
631       (eq_attr "type" "f_loads, f_loadd, neon_ldp"))
632  "em1_ld")
633
634(define_insn_reservation
635  "exynos_m1_neon_load_q" 6
636  (and (eq_attr "tune" "exynosm1")
637       (eq_attr "type" "neon_ldp_q"))
638  "(em1_ld, em1_ld)")
639
640(define_insn_reservation
641  "exynos_m1_neon_load1_1" 6
642  (and (eq_attr "tune" "exynosm1")
643       (eq_attr "exynos_m1_neon_type" "neon_load1_1, neon_load1_all"))
644  "em1_ld")
645
646(define_insn_reservation
647  "exynos_m1_neon_load1_2" 6
648  (and (eq_attr "tune" "exynosm1")
649       (eq_attr "exynos_m1_neon_type" "neon_load1_2"))
650  "(em1_ld * 2)")
651
652(define_insn_reservation
653  "exynos_m1_neon_load1_3" 7
654  (and (eq_attr "tune" "exynosm1")
655       (eq_attr "exynos_m1_neon_type" "neon_load1_3"))
656  "(em1_ld * 3)")
657
658(define_insn_reservation
659  "exynos_m1_neon_load1_4" 8
660  (and (eq_attr "tune" "exynosm1")
661       (eq_attr "exynos_m1_neon_type" "neon_load1_4"))
662  "(em1_ld * 4)")
663
664(define_insn_reservation
665  "exynos_m1_neon_load1_one" 7
666  (and (eq_attr "tune" "exynosm1")
667       (eq_attr "exynos_m1_neon_type" "neon_load1_one"))
668  "((em1_ld * 2), em1_nalu)")
669
670(define_insn_reservation
671  "exynos_m1_neon_load2_2" 10
672  (and (eq_attr "tune" "exynosm1")
673       (eq_attr "exynos_m1_neon_type" "neon_load2_2"))
674  "(em1_ld * 5)")
675
676(define_insn_reservation
677  "exynos_m1_neon_load2_one" 7
678  (and (eq_attr "tune" "exynosm1")
679       (eq_attr "exynos_m1_neon_type" "neon_load2_one"))
680  "((em1_ld * 2), (em1_nalu * 2))")
681
682(define_insn_reservation
683  "exynos_m1_neon_load2_all" 6
684  (and (eq_attr "tune" "exynosm1")
685       (eq_attr "exynos_m1_neon_type" "neon_load2_all"))
686  "(em1_ld * 2)")
687
688(define_insn_reservation
689  "exynos_m1_neon_load3_3" 12
690  (and (eq_attr "tune" "exynosm1")
691       (eq_attr "exynos_m1_neon_type" "neon_load3_3"))
692  "(em1_ld * 6)")
693
694(define_insn_reservation
695  "exynos_m1_neon_load3_one" 9
696  (and (eq_attr "tune" "exynosm1")
697       (eq_attr "exynos_m1_neon_type" "neon_load3_one"))
698  "((em1_ld * 4), (em1_nalu * 3))")
699
700(define_insn_reservation
701  "exynos_m1_neon_load3_all" 7
702  (and (eq_attr "tune" "exynosm1")
703       (eq_attr "exynos_m1_neon_type" "neon_load3_all"))
704  "(em1_ld * 3)")
705
706(define_insn_reservation
707  "exynos_m1_neon_load4_4" 14
708  (and (eq_attr "tune" "exynosm1")
709       (eq_attr "exynos_m1_neon_type" "neon_load4_4"))
710  "(em1_ld * 7)")
711
712(define_insn_reservation
713  "exynos_m1_neon_load4_one" 9
714  (and (eq_attr "tune" "exynosm1")
715       (eq_attr "exynos_m1_neon_type" "neon_load4_one"))
716  "((em1_ld * 4), (em1_nalu * 4))")
717
718(define_insn_reservation
719  "exynos_m1_neon_load4_all" 8
720  (and (eq_attr "tune" "exynosm1")
721       (eq_attr "exynos_m1_neon_type" "neon_load4_all"))
722  "(em1_ld * 4)")
723
724;; Store Instructions.
725
726(define_insn_reservation
727  "exynos_m1_neon_store" 1
728  (and (eq_attr "tune" "exynosm1")
729       (eq_attr "type" "f_stores, f_stored, neon_stp"))
730  "em1_sfst")
731
732(define_insn_reservation
733  "exynos_m1_neon_store_q" 3
734  (and (eq_attr "tune" "exynosm1")
735       (eq_attr "type" "neon_stp_q"))
736  "(em1_sfst * 2)")
737
738(define_insn_reservation
739  "exynos_m1_neon_store1_1" 1
740  (and (eq_attr "tune" "exynosm1")
741       (eq_attr "exynos_m1_neon_type" "neon_store1_1"))
742  "em1_sfst")
743
744(define_insn_reservation
745  "exynos_m1_neon_store1_2" 2
746  (and (eq_attr "tune" "exynosm1")
747       (eq_attr "exynos_m1_neon_type" "neon_store1_2"))
748  "(em1_sfst * 2)")
749
750(define_insn_reservation
751  "exynos_m1_neon_store1_3" 3
752  (and (eq_attr "tune" "exynosm1")
753       (eq_attr "exynos_m1_neon_type" "neon_store1_3"))
754  "(em1_sfst * 3)")
755
756(define_insn_reservation
757  "exynos_m1_neon_store1_4" 4
758  (and (eq_attr "tune" "exynosm1")
759       (eq_attr "exynos_m1_neon_type" "neon_store1_4"))
760  "(em1_sfst * 4)")
761
762(define_insn_reservation
763  "exynos_m1_neon_store1_one" 7
764  (and (eq_attr "tune" "exynosm1")
765       (eq_attr "exynos_m1_neon_type" "neon_store1_one"))
766  "em1_sfst")
767
768(define_insn_reservation
769  "exynos_m1_neon_store2" 7
770  (and (eq_attr "tune" "exynosm1")
771       (eq_attr "exynos_m1_neon_type" "neon_store2_2, neon_store2_one"))
772  "em1_sfst, em1_fst")
773
774(define_insn_reservation
775  "exynos_m1_neon_store3" 16
776  (and (eq_attr "tune" "exynosm1")
777       (eq_attr "exynos_m1_neon_type" "neon_store3_3, neon_store3_one"))
778  "((em1_sfst * 3), (em1_fst * 2), em1_nalu)")
779
780(define_insn_reservation
781  "exynos_m1_neon_store4" 17
782  (and (eq_attr "tune" "exynosm1")
783       (eq_attr "exynos_m1_neon_type" "neon_store4_4, neon_store4_one"))
784  "((em1_sfst * 4), (em1_fst * 2), em1_nalu)")
785
786;; Floating-Point Operations.
787
788(define_insn_reservation "exynos_m1_fp_const" 2
789  (and (eq_attr "tune" "exynosm1")
790       (eq_attr "type" "fconsts, fconstd"))
791  "em1_nalu")
792
793(define_insn_reservation "exynos_m1_fp_add" 4
794  (and (eq_attr "tune" "exynosm1")
795       (eq_attr "type" "fadds, faddd"))
796  "em1_fadd")
797
798(define_insn_reservation "exynos_m1_fp_mul" 5
799  (and (eq_attr "tune" "exynosm1")
800       (eq_attr "type" "fmuls, fmuld"))
801  "em1_fmac")
802
803(define_insn_reservation "exynos_m1_fp_mac" 6
804  (and (eq_attr "tune" "exynosm1")
805       (eq_attr "type" "fmacs, ffmas, fmacd, ffmad"))
806  "em1_fmac")
807
808(define_insn_reservation "exynos_m1_fp_cvt" 4
809  (and (eq_attr "tune" "exynosm1")
810       (eq_attr "type" "f_cvt, f_rints, f_rintd"))
811  "em1_fcvt")
812
813(define_insn_reservation "exynos_m1_fp_cvt_i" 13
814  (and (eq_attr "tune" "exynosm1")
815       (eq_attr "type" "f_cvtf2i"))
816  "(em1_fcvt, em1_lfst)")
817
818(define_insn_reservation "exynos_m1_i_cvt_fp" 9
819  (and (eq_attr "tune" "exynosm1")
820       (eq_attr "type" "f_cvti2f"))
821  "(em1_st, em1_fcvt)")
822
823(define_insn_reservation "exynos_m1_fp_cmp" 4
824  (and (eq_attr "tune" "exynosm1")
825       (eq_attr "type" "fcmps, fcmpd"))
826  "em1_nmisc")
827
828(define_insn_reservation "exynos_m1_fp_ccmp" 7
829  (and (eq_attr "tune" "exynosm1")
830       (eq_attr "type" "fccmps, fccmpd"))
831  "(em1_st, em1_nmisc)")
832
833(define_insn_reservation "exynos_m1_fp_sel" 4
834  (and (eq_attr "tune" "exynosm1")
835       (eq_attr "type" "fcsel"))
836  "(em1_st + em1_nalu0)")
837
838(define_insn_reservation "exynos_m1_fp_arith" 2
839  (and (eq_attr "tune" "exynosm1")
840       (eq_attr "type" "ffariths, ffarithd"))
841  "em1_nalu")
842
843(define_insn_reservation "exynos_m1_fp_cpy" 2
844  (and (eq_attr "tune" "exynosm1")
845       (eq_attr "type" "fmov"))
846  "em1_nalu")
847
848(define_insn_reservation "exynos_m1_fp_divs" 15
849  (and (eq_attr "tune" "exynosm1")
850       (eq_attr "type" "fdivs, neon_fp_div_s, neon_fp_div_s_q,\
851			fsqrts, neon_fp_sqrt_s, neon_fp_sqrt_s_q"))
852  "(em1_fvar * 9)")
853
854(define_insn_reservation "exynos_m1_fp_divd" 22
855  (and (eq_attr "tune" "exynosm1")
856       (eq_attr "type" "fdivd, neon_fp_div_d, neon_fp_div_d_q,\
857			fsqrtd, neon_fp_sqrt_d, neon_fp_sqrt_d_q"))
858  "(em1_fvar * 9)")
859
860(define_insn_reservation "exynos_m1_fp_minmax" 2
861  (and (eq_attr "tune" "exynosm1")
862       (eq_attr "type" "f_minmaxs, f_minmaxd"))
863  "(em1_nmisc * 2)")
864
865;; Crypto Operations.
866
867(define_insn_reservation "exynos_m1_crypto_simple" 2
868  (and (eq_attr "tune" "exynosm1")
869       (eq_attr "type" "crypto_aese, crypto_aesmc,\
870			crypto_sha1_xor, crypto_sha1_fast, crypto_sha256_fast"))
871  "em1_ncrypt")
872
873(define_insn_reservation "exynos_m1_crypto_complex" 6
874  (and (eq_attr "tune" "exynosm1")
875       (eq_attr "type" "crypto_sha1_slow, crypto_sha256_slow"))
876  "em1_ncrypt")
877
878(define_insn_reservation "exynos_m1_crypto_poly" 2
879  (and (eq_attr "tune" "exynosm1")
880       (eq_attr "type" "neon_mul_b_long, neon_mul_h_long, neon_mul_s_long"))
881  "em1_ncrypt")
882
883(define_insn_reservation "exynos_m1_crypto_polyl" 4
884  (and (eq_attr "tune" "exynosm1")
885       (eq_attr "type" "neon_mul_d_long"))
886  "em1_ncrypt")
887
888(define_insn_reservation "exynos_m1_crc" 2
889  (and (eq_attr "tune" "exynosm1")
890       (eq_attr "type" "crc"))
891  "em1_c")
892
893;; Simple execution unit bypasses
894
895;; Pre-decrement and post-increment addressing modes update the register quickly.
896;; TODO: figure out how to tell the addressing mode register from the loaded one.
897(define_bypass 1 "exynos_m1_store*, exynos_m1_neon_store*"
898		 "exynos_m1_store*, exynos_m1_neon_store*,
899		  exynos_m1_load*, exynos_m1_neon_load*")
900
901;; MLAs can feed other MLAs quickly.
902(define_bypass 1 "exynos_m1_mla*" "exynos_m1_mla*")
903
904;; Insns in FMAC or FADD can feed other such insns quickly.
905(define_bypass 4 "exynos_m1_fp_mul"
906		 "exynos_m1_fp_add, exynos_m1_fp_mul, exynos_m1_fp_mac")
907(define_bypass 5 "exynos_m1_fp_mac"
908		 "exynos_m1_fp_add, exynos_m1_fp_mul, exynos_m1_fp_mac")
909(define_bypass 4 "exynos_m1_neon_fp_mul"
910		 "exynos_m1_neon_fp_add, exynos_m1_neon_fp_mul,\
911		  exynos_m1_neon_fp_mla, exynos_m1_neon_fp_step")
912(define_bypass 5 "exynos_m1_neon_fp_mla, exynos_m1_neon_fp_step"
913		 "exynos_m1_neon_fp_add, exynos_m1_neon_fp_mul,\
914		  exynos_m1_neon_fp_mla, exynos_m1_neon_fp_step")
915(define_bypass 3 "exynos_m1_fp_add"
916		 "exynos_m1_fp_add, exynos_m1_fp_mul, exynos_m1_fp_mac")
917(define_bypass 3 "exynos_m1_neon_fp_add"
918		 "exynos_m1_neon_fp_add, exynos_m1_neon_fp_mul,\
919		  exynos_m1_neon_fp_mla, exynos_m1_neon_fp_step")
920
921;; Insns in NALU can feed other such insns quickly.
922(define_bypass 1 "exynos_m1_fp_const, exynos_m1_fp_arith, exynos_m1_fp_cpy"
923		 "exynos_m1_fp_const, exynos_m1_fp_arith, exynos_m1_fp_cpy,\
924		  exynos_m1_fp_sel")
925(define_bypass 3 "exynos_m1_fp_sel"
926		 "exynos_m1_fp_const, exynos_m1_fp_arith, exynos_m1_fp_cpy,\
927		  exynos_m1_fp_sel")
928(define_bypass 1 "exynos_m1_neon_arith_basic, exynos_m1_neon_shift_basic,\
929		  exynos_m1_neon_bitops, exynos_m1_neon_bitins,\
930		  exynos_m1_neon_tbl"
931		 "exynos_m1_neon_arith_basic, exynos_m1_neon_shift_basic,\
932		  exynos_m1_neon_shift_acc, exynos_m1_neon_shift_complex,\
933		  exynos_m1_neon_bitops*, exynos_m1_neon_bitins,\
934		  exynos_m1_neon_tbl")
935(define_bypass 3 "exynos_m1_neon_shift_acc, exynos_m1_neon_shift_complex"
936		 "exynos_m1_neon_arith_basic, exynos_m1_neon_shift_basic,\
937		  exynos_m1_neon_shift_acc, exynos_m1_neon_shift_complex,\
938		  exynos_m1_neon_bitops*, exynos_m1_neon_bitins,\
939		  exynos_m1_neon_tbl")
940(define_bypass 1 "exynos_m1_neon_fp_unary" "exynos_m1_neon_fp_unary")
941
942;; Insns in NCRYPT can feed other such insns quickly.
943(define_bypass 1 "exynos_m1_crypto_simple, exynos_m1_crypto_poly"
944		 "exynos_m1_crypto_simple, exynos_m1_crypto_complex,\
945		  exynos_m1_crypto_poly*")
946(define_bypass 3 "exynos_m1_crypto_polyl"
947		 "exynos_m1_crypto_simple, exynos_m1_crypto_complex,\
948		  exynos_m1_crypto_poly*")
949(define_bypass 5 "exynos_m1_crypto_complex"
950		 "exynos_m1_crypto_simple, exynos_m1_crypto_complex,\
951		  exynos_m1_crypto_poly*")
952
953;; Predicted branches take no time, but mispredicted ones take forever anyway.
954(define_bypass 1 "exynos_m1_*"
955		 "exynos_m1_call, exynos_m1_branch")
956