1;; ARM Cortex-A8 NEON scheduling description.
2;; Copyright (C) 2007-2018 Free Software Foundation, Inc.
3;; Contributed by CodeSourcery.
4
5;; This file is part of GCC.
6
7;; GCC is free software; you can redistribute it and/or modify it
8;; under the terms of the GNU General Public License as published
9;; by the Free Software Foundation; either version 3, or (at your
10;; option) any later version.
11
12;; GCC is distributed in the hope that it will be useful, but WITHOUT
13;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
15;; License for more details.
16
17;; You should have received a copy of the GNU General Public License
18;; along with GCC; see the file COPYING3.  If not see
19;; <http://www.gnu.org/licenses/>.
20
21(define_attr "cortex_a8_neon_type"
22   "neon_int_1,neon_int_2,neon_int_3,neon_int_4,neon_int_5,neon_vqneg_vqabs,
23   neon_bit_ops_q,
24   neon_vaba,neon_vaba_qqq, neon_vmov,
25   neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,neon_mul_qqq_8_16_32_ddd_32,
26   neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar,
27   neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,neon_mla_qqq_8_16,
28   neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long,
29   neon_mla_qqq_32_qqd_32_scalar,neon_mul_ddd_16_scalar_32_16_long_scalar,
30   neon_mul_qqd_32_scalar,neon_mla_ddd_16_scalar_qdd_32_16_long_scalar,
31   neon_shift_1,neon_shift_2,neon_shift_3,
32   neon_vqshl_vrshl_vqrshl_qqq,neon_vsra_vrsra,neon_fp_vadd_ddd_vabs_dd,
33   neon_fp_vadd_qqq_vabs_qq,neon_fp_vsum,neon_fp_vmul_ddd,neon_fp_vmul_qqd,
34   neon_fp_vmla_ddd,neon_fp_vmla_qqq,neon_fp_vmla_ddd_scalar,
35   neon_fp_vmla_qqq_scalar,neon_fp_vrecps_vrsqrts_ddd,
36   neon_fp_vrecps_vrsqrts_qqq,neon_bp_simple,neon_bp_2cycle,neon_bp_3cycle,
37   neon_ldr,neon_str,neon_vld1_1_2_regs,neon_vld1_3_4_regs,
38   neon_vld2_2_regs_vld1_vld2_all_lanes,neon_vld2_4_regs,neon_vld3_vld4,
39   neon_vst1_1_2_regs_vst2_2_regs,neon_vst1_3_4_regs,
40   neon_vst2_4_regs_vst3_vst4,neon_vld1_vld2_lane,
41   neon_vld3_vld4_lane,neon_vst1_vst2_lane,neon_vst3_vst4_lane,
42   neon_vld3_vld4_all_lanes,neon_mcr,neon_mcr_2_mcrr,neon_mrc,neon_mrrc,
43   neon_ldm_2,neon_stm_2,none,unknown"
44  (cond [
45          (eq_attr "type" "neon_logic, neon_logic_q,\
46                           neon_bsl, neon_cls, neon_cnt,\
47                           neon_add, neon_add_q")
48                          (const_string "neon_int_1")
49          (eq_attr "type" "neon_add_widen, neon_sub_widen,\
50                           neon_sub, neon_sub_q")
51                          (const_string "neon_int_2")
52          (eq_attr "type" "neon_neg, neon_neg_q,\
53                           neon_reduc_add, neon_reduc_add_q,\
54                           neon_reduc_add_long,\
55                           neon_add_long, neon_sub_long")
56                          (const_string "neon_int_3")
57          (eq_attr "type" "neon_abs, neon_abs_q,
58                           neon_compare_zero, neon_compare_zero_q,\
59                           neon_add_halve_narrow_q,\
60                           neon_sub_halve_narrow_q,\
61                           neon_add_halve, neon_add_halve_q,\
62                           neon_qadd, neon_qadd_q,\
63                           neon_tst, neon_tst_q")
64                          (const_string "neon_int_4")
65          (eq_attr "type" "neon_abd_long, neon_sub_halve, neon_sub_halve_q,\
66                           neon_qsub, neon_qsub_q,\
67                           neon_abd, neon_abd_q,\
68                           neon_compare, neon_compare_q,\
69                           neon_minmax, neon_minmax_q, neon_reduc_minmax,\
70                           neon_reduc_minmax_q")
71                          (const_string "neon_int_5")
72          (eq_attr "type" "neon_qneg, neon_qneg_q, neon_qabs, neon_qabs_q")
73                           (const_string "neon_vqneg_vqabs")
74          (eq_attr "type" "neon_move, neon_move_q")
75                           (const_string "neon_vmov")
76          (eq_attr "type" "neon_bsl_q, neon_cls_q, neon_cnt_q")
77                           (const_string "neon_bit_ops_q")
78          (eq_attr "type" "neon_arith_acc, neon_reduc_add_acc")
79                          (const_string "neon_vaba")
80          (eq_attr "type" "neon_arith_acc_q")
81                          (const_string "neon_vaba_qqq")
82          (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\
83                           neon_shift_imm_long, neon_shift_imm_narrow_q,\
84                           neon_shift_reg")
85                           (const_string "neon_shift_1")
86          (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q,
87                           neon_sat_shift_imm_narrow_q,\
88                           neon_sat_shift_reg")
89                           (const_string "neon_shift_2")
90          (eq_attr "type" "neon_shift_reg_q")
91                           (const_string "neon_shift_3")
92          (eq_attr "type" "neon_sat_shift_reg_q")
93                           (const_string "neon_vqshl_vrshl_vqrshl_qqq")
94          (eq_attr "type" "neon_shift_acc, neon_shift_acc_q")
95                           (const_string "neon_vsra_vrsra")
96          (eq_attr "type" "neon_mul_b, neon_mul_h,\
97                           neon_mul_b_long, neon_mul_h_long,\
98                           neon_sat_mul_b, neon_sat_mul_h,\
99                           neon_sat_mul_b_long, neon_sat_mul_h_long")
100                           (const_string
101                            "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long")
102          (eq_attr "type" "neon_mul_b_q, neon_mul_h_q,\
103                           neon_sat_mul_b_q, neon_sat_mul_h_q")
104                           (const_string "neon_mul_qqq_8_16_32_ddd_32")
105          (eq_attr "type" "neon_mul_s, neon_mul_s_long,\
106                           neon_sat_mul_s, neon_sat_mul_s_long,\
107                           neon_mul_h_scalar_q, neon_sat_mul_h_scalar_q,\
108                           neon_mul_s_scalar, neon_sat_mul_s_scalar,\
109                           neon_mul_s_scalar_long,\
110                           neon_sat_mul_s_scalar_long")
111                           (const_string
112             "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")
113          (eq_attr "type" "neon_mla_b, neon_mla_h,\
114                           neon_mla_b_long, neon_mla_h_long,\
115                           neon_sat_mla_b_long, neon_sat_mla_h_long,\
116                           neon_sat_mla_h_scalar_long")
117                           (const_string
118                             "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long")
119          (eq_attr "type" "neon_mla_b_q, neon_mla_h_q")
120                           (const_string "neon_mla_qqq_8_16")
121          (eq_attr "type" "neon_mla_s, neon_mla_s_long,\
122                           neon_sat_mla_s_long,\
123                           neon_mla_h_scalar_q, neon_mla_s_scalar,\
124                           neon_mla_s_scalar_long,\
125                           neon_sat_mla_s_scalar_long")
126                           (const_string
127 "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")
128          (eq_attr "type" "neon_mla_s_q, neon_mla_s_scalar_q")
129                           (const_string "neon_mla_qqq_32_qqd_32_scalar")
130          (eq_attr "type" "neon_mul_h_scalar, neon_sat_mul_h_scalar,\
131                           neon_mul_h_scalar_long,\
132                           neon_sat_mul_h_scalar_long")
133                          (const_string
134                            "neon_mul_ddd_16_scalar_32_16_long_scalar")
135          (eq_attr "type" "neon_mul_s_q, neon_sat_mul_s_q,\
136                           neon_mul_s_scalar_q")
137                           (const_string "neon_mul_qqd_32_scalar")
138          (eq_attr "type" "neon_mla_h_scalar, neon_mla_h_scalar_long")
139                           (const_string
140                             "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar")
141          (eq_attr "type" "neon_fp_abd_s, neon_fp_abs_s, neon_fp_neg_s,\
142                           neon_fp_addsub_s, neon_fp_compare_s,\
143                           neon_fp_minmax_s, neon_fp_mul_s,\
144                           neon_fp_recpe_s, neon_fp_rsqrte_s,\
145                           neon_fp_to_int_s, neon_int_to_fp_s")
146                           (const_string "neon_fp_vadd_ddd_vabs_dd")
147          (eq_attr "type" "neon_fp_abd_s_q, neon_fp_abs_s_q,\
148                           neon_fp_neg_s_q,\
149                           neon_fp_addsub_s_q, neon_fp_compare_s_q,\
150                           neon_fp_minmax_s_q, neon_fp_mul_s_q,\
151                           neon_fp_recpe_s_q, neon_fp_rsqrte_s_q,\
152                           neon_fp_to_int_s_q, neon_int_to_fp_s_q")
153                           (const_string "neon_fp_vadd_qqq_vabs_qq")
154          (eq_attr "type" "neon_fp_reduc_add_s, neon_fp_reduc_minmax_s,\
155                           neon_fp_reduc_add_s_q, neon_fp_reduc_minmax_s_q")
156                           (const_string "neon_fp_vsum")
157          (eq_attr "type" "neon_fp_mul_s_scalar")
158                           (const_string "neon_fp_vmul_ddd")
159          (eq_attr "type" "neon_fp_mul_s_scalar_q")
160                           (const_string "neon_fp_vmul_qqd")
161          (eq_attr "type" "neon_fp_mla_s")
162                           (const_string "neon_fp_vmla_ddd")
163          (eq_attr "type" "neon_fp_mla_s_q")
164                           (const_string "neon_fp_vmla_qqq")
165          (eq_attr "type" "neon_fp_mla_s_scalar")
166                           (const_string "neon_fp_vmla_ddd_scalar")
167          (eq_attr "type" "neon_fp_mla_s_scalar_q")
168                           (const_string "neon_fp_vmla_qqq_scalar")
169          (eq_attr "type" "neon_fp_recps_s, neon_fp_rsqrts_s")
170                           (const_string "neon_fp_vrecps_vrsqrts_ddd")
171          (eq_attr "type" "neon_fp_recps_s_q, neon_fp_rsqrts_s_q")
172                           (const_string "neon_fp_vrecps_vrsqrts_qqq")
173          (eq_attr "type" "neon_move_narrow_q, neon_dup,\
174                           neon_dup_q, neon_permute, neon_zip,\
175                           neon_ext, neon_rev, neon_rev_q")
176                           (const_string "neon_bp_simple")
177          (eq_attr "type" "neon_permute_q, neon_ext_q, neon_tbl1, neon_tbl2")
178                           (const_string "neon_bp_2cycle")
179          (eq_attr "type" "neon_zip_q, neon_tbl3, neon_tbl4")
180                           (const_string "neon_bp_3cycle")
181          (eq_attr "type" "neon_ldr")
182                           (const_string "neon_ldr")
183          (eq_attr "type" "neon_str")
184                           (const_string "neon_str")
185          (eq_attr "type" "neon_load1_1reg, neon_load1_1reg_q,\
186                           neon_load1_2reg, neon_load1_2reg_q,\
187                           neon_load2_2reg, neon_load2_2reg_q")
188                           (const_string "neon_vld1_1_2_regs")
189          (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q,\
190                           neon_load1_4reg, neon_load1_4reg_q")
191                           (const_string "neon_vld1_3_4_regs")
192          (eq_attr "type" "neon_load1_all_lanes, neon_load1_all_lanes_q,\
193                           neon_load2_all_lanes, neon_load2_all_lanes_q")
194                           (const_string
195                              "neon_vld2_2_regs_vld1_vld2_all_lanes")
196          (eq_attr "type" "neon_load3_all_lanes, neon_load3_all_lanes_q,\
197                           neon_load4_all_lanes, neon_load4_all_lanes_q,\
198                           neon_load2_4reg, neon_load2_4reg_q")
199                           (const_string "neon_vld2_4_regs")
200          (eq_attr "type" "neon_load3_3reg, neon_load3_3reg_q,\
201                           neon_load4_4reg, neon_load4_4reg_q")
202                           (const_string "neon_vld3_vld4")
203          (eq_attr "type" "f_loads, f_loadd, f_stores, f_stored,\
204                           neon_load1_one_lane, neon_load1_one_lane_q,\
205                           neon_load2_one_lane, neon_load2_one_lane_q")
206                           (const_string "neon_vld1_vld2_lane")
207          (eq_attr "type" "neon_load3_one_lane, neon_load3_one_lane_q,\
208                           neon_load4_one_lane, neon_load4_one_lane_q")
209                           (const_string "neon_vld3_vld4_lane")
210          (eq_attr "type" "neon_store1_1reg, neon_store1_1reg_q,\
211                           neon_store1_2reg, neon_store1_2reg_q,\
212                           neon_store2_2reg, neon_store2_2reg_q")
213                           (const_string "neon_vst1_1_2_regs_vst2_2_regs")
214          (eq_attr "type" "neon_store1_3reg, neon_store1_3reg_q,\
215                           neon_store1_4reg, neon_store1_4reg_q")
216                           (const_string "neon_vst1_3_4_regs")
217          (eq_attr "type" "neon_store2_4reg, neon_store2_4reg_q,\
218                           neon_store3_3reg, neon_store3_3reg_q,\
219                           neon_store4_4reg, neon_store4_4reg_q")
220                           (const_string "neon_vst2_4_regs_vst3_vst4")
221          (eq_attr "type" "neon_store1_one_lane, neon_store1_one_lane_q,\
222                           neon_store2_one_lane, neon_store2_one_lane_q")
223                           (const_string "neon_vst1_vst2_lane")
224          (eq_attr "type" "neon_store3_one_lane, neon_store3_one_lane_q,\
225                           neon_store4_one_lane, neon_store4_one_lane_q")
226                           (const_string "neon_vst3_vst4_lane")
227          (eq_attr "type" "neon_from_gp, f_mcr")
228                           (const_string "neon_mcr")
229          (eq_attr "type" "neon_from_gp_q, f_mcrr")
230                           (const_string "neon_mcr_2_mcrr")
231          (eq_attr "type" "neon_to_gp, f_mrc")
232                           (const_string "neon_mrc")
233          (eq_attr "type" "neon_to_gp_q, f_mrrc")
234                           (const_string "neon_mrrc")]
235          (const_string "unknown")))
236
237(define_automaton "cortex_a8_neon")
238
239;; Only one load, store, permute, MCR or MRC instruction can be issued
240;; per cycle.
241(define_cpu_unit "cortex_a8_neon_issue_perm" "cortex_a8_neon")
242
243;; Only one data-processing instruction can be issued per cycle.
244(define_cpu_unit "cortex_a8_neon_issue_dp" "cortex_a8_neon")
245
246;; The VFPLite unit (non-pipelined).
247(define_cpu_unit "cortex_a8_vfplite" "cortex_a8_neon")
248
249;; We need a special mutual exclusion (to be used in addition to
250;; cortex_a8_neon_issue_dp) for the case when an instruction such as
251;; vmla.f is forwarded from E5 of the floating-point multiply pipeline to
252;; E2 of the floating-point add pipeline.  On the cycle previous to that
253;; forward we must prevent issue of any instruction to the floating-point
254;; add pipeline, but still allow issue of a data-processing instruction
255;; to any of the other pipelines.
256(define_cpu_unit "cortex_a8_neon_issue_fadd" "cortex_a8_neon")
257
258;; Patterns of reservation.
259;; We model the NEON issue units as running in parallel with the core ones.
260;; We assume that multi-cycle NEON instructions get decomposed into
261;; micro-ops as they are issued into the NEON pipeline, and not as they
262;; are issued into the ARM pipeline.  Dual issue may not occur except
263;; upon the first and last cycles of a multi-cycle instruction, but it
264;; is unclear whether two multi-cycle instructions can issue together (in
265;; this model they cannot).  It is also unclear whether a pair of
266;; a multi-cycle and single-cycle instructions, that could potentially
267;; issue together, only do so if (say) the single-cycle one precedes
268;; the other.
269
270(define_reservation "cortex_a8_neon_dp"
271                    "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp")
272(define_reservation "cortex_a8_neon_dp_2"
273                    "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp,\
274                     cortex_a8_neon_issue_dp")
275(define_reservation "cortex_a8_neon_dp_4"
276                    "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp,\
277                     cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
278                     cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
279                     cortex_a8_neon_issue_dp")
280
281(define_reservation "cortex_a8_neon_fadd"
282                    "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp+\
283                     cortex_a8_neon_issue_fadd")
284(define_reservation "cortex_a8_neon_fadd_2"
285                    "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp+\
286                     cortex_a8_neon_issue_fadd,\
287                     cortex_a8_neon_issue_dp+cortex_a8_neon_issue_fadd")
288
289(define_reservation "cortex_a8_neon_perm"
290                    "(cortex_a8_alu0|cortex_a8_alu1)+\
291                     cortex_a8_neon_issue_perm")
292(define_reservation "cortex_a8_neon_perm_2"
293                    "(cortex_a8_alu0|cortex_a8_alu1)+\
294                     cortex_a8_neon_issue_perm,\
295                     cortex_a8_neon_issue_perm")
296(define_reservation "cortex_a8_neon_perm_3"
297                    "(cortex_a8_alu0|cortex_a8_alu1)+\
298                     cortex_a8_neon_issue_perm,\
299                     cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
300                     cortex_a8_neon_issue_perm")
301
302(define_reservation "cortex_a8_neon_ls"
303                    "cortex_a8_issue_ls+cortex_a8_neon_issue_perm")
304(define_reservation "cortex_a8_neon_ls_2"
305                    "cortex_a8_issue_ls+cortex_a8_neon_issue_perm,\
306                     cortex_a8_neon_issue_perm")
307(define_reservation "cortex_a8_neon_ls_3"
308                    "cortex_a8_issue_ls+cortex_a8_neon_issue_perm,\
309                     cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
310                     cortex_a8_neon_issue_perm")
311(define_reservation "cortex_a8_neon_ls_4"
312                    "cortex_a8_issue_ls+cortex_a8_neon_issue_perm,\
313                     cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
314                     cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
315                     cortex_a8_neon_issue_perm")
316(define_reservation "cortex_a8_neon_ls_5"
317                    "cortex_a8_issue_ls+cortex_a8_neon_issue_perm,\
318                     cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
319                     cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
320                     cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
321                     cortex_a8_neon_issue_perm")
322
323(define_reservation "cortex_a8_neon_fmul_then_fadd"
324                    "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp,\
325		     nothing*3,\
326		     cortex_a8_neon_issue_fadd")
327(define_reservation "cortex_a8_neon_fmul_then_fadd_2"
328                    "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp,\
329		     cortex_a8_neon_issue_dp,\
330		     nothing*2,\
331		     cortex_a8_neon_issue_fadd,\
332		     cortex_a8_neon_issue_fadd")
333
334;; VFP instructions can only be single-issued into the NEON pipeline.
335(define_reservation "cortex_a8_vfp"
336                    "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp+\
337                     cortex_a8_neon_issue_perm+cortex_a8_vfplite")
338
339;; VFP instructions.
340;; The VFPLite unit that executes these isn't pipelined; we give the
341;; worst-case latencies (and choose the double-precision ones where we
342;; do not distinguish on precision).  We assume RunFast mode is not
343;; enabled and therefore do not model the possible VFP instruction
344;; execution in the NEON floating point pipelines, nor additional
345;; latencies for the processing of subnormals.
346;;
347;; TODO: RunFast mode could potentially be enabled when -ffast-math
348;; is specified.
349
350(define_insn_reservation "cortex_a8_vfp_add_sub" 10
351  (and (eq_attr "tune" "cortexa8")
352       (eq_attr "type" "fconsts,fconstd,fadds,faddd"))
353  "cortex_a8_vfp,cortex_a8_vfplite*9")
354
355(define_insn_reservation "cortex_a8_vfp_muls" 12
356  (and (eq_attr "tune" "cortexa8")
357       (eq_attr "type" "fmuls"))
358  "cortex_a8_vfp,cortex_a8_vfplite*11")
359
360;; Don't model a reservation for more than 15 cycles as this explodes the
361;; state space of the automaton for little gain.  It is unlikely that the
362;; scheduler will find enough instructions to hide the full latency of the
363;; instructions.
364(define_insn_reservation "cortex_a8_vfp_muld" 17
365  (and (eq_attr "tune" "cortexa8")
366       (eq_attr "type" "fmuld"))
367  "cortex_a8_vfp,cortex_a8_vfplite*15")
368
369(define_insn_reservation "cortex_a8_vfp_macs" 21
370  (and (eq_attr "tune" "cortexa8")
371       (eq_attr "type" "fmacs,ffmas"))
372  "cortex_a8_vfp,cortex_a8_vfplite*15")
373
374(define_insn_reservation "cortex_a8_vfp_macd" 26
375  (and (eq_attr "tune" "cortexa8")
376       (eq_attr "type" "fmacd,ffmad"))
377  "cortex_a8_vfp,cortex_a8_vfplite*15")
378
379(define_insn_reservation "cortex_a8_vfp_divs" 37
380  (and (eq_attr "tune" "cortexa8")
381       (eq_attr "type" "fdivs, fsqrts"))
382  "cortex_a8_vfp,cortex_a8_vfplite*15")
383
384(define_insn_reservation "cortex_a8_vfp_divd" 65
385  (and (eq_attr "tune" "cortexa8")
386       (eq_attr "type" "fdivd, fsqrtd"))
387  "cortex_a8_vfp,cortex_a8_vfplite*15")
388
389;; Comparisons can actually take 7 cycles sometimes instead of four,
390;; but given all the other instructions lumped into type=ffarith that
391;; take four cycles, we pick that latency.
392(define_insn_reservation "cortex_a8_vfp_farith" 4
393  (and (eq_attr "tune" "cortexa8")
394       (eq_attr "type" "fmov,ffariths,ffarithd,fconsts,fconstd,fcmps,fcmpd"))
395  "cortex_a8_vfp,cortex_a8_vfplite*3")
396
397(define_insn_reservation "cortex_a8_vfp_cvt" 7
398  (and (eq_attr "tune" "cortexa8")
399       (eq_attr "type" "f_cvt,f_cvtf2i,f_cvti2f"))
400  "cortex_a8_vfp,cortex_a8_vfplite*6")
401
402;; NEON -> core transfers.
403
404(define_insn_reservation "cortex_a8_neon_mrc" 20
405  (and (eq_attr "tune" "cortexa8")
406       (eq_attr "cortex_a8_neon_type" "neon_mrc"))
407  "cortex_a8_neon_ls")
408
409(define_insn_reservation "cortex_a8_neon_mrrc" 21
410  (and (eq_attr "tune" "cortexa8")
411       (eq_attr "cortex_a8_neon_type" "neon_mrrc"))
412  "cortex_a8_neon_ls_2")
413
414;; Arithmetic Operations
415
416;; Instructions using this reservation read their source operands at N2, and
417;; produce a result at N3.
418(define_insn_reservation "cortex_a8_neon_int_1" 3
419  (and (eq_attr "tune" "cortexa8")
420       (eq_attr "cortex_a8_neon_type" "neon_int_1"))
421  "cortex_a8_neon_dp")
422
423;; Instructions using this reservation read their (D|Q)m operands at N1,
424;; their (D|Q)n operands at N2, and produce a result at N3.
425(define_insn_reservation "cortex_a8_neon_int_2" 3
426  (and (eq_attr "tune" "cortexa8")
427       (eq_attr "cortex_a8_neon_type" "neon_int_2"))
428  "cortex_a8_neon_dp")
429
430;; Instructions using this reservation read their source operands at N1, and
431;; produce a result at N3.
432(define_insn_reservation "cortex_a8_neon_int_3" 3
433  (and (eq_attr "tune" "cortexa8")
434       (eq_attr "cortex_a8_neon_type" "neon_int_3"))
435  "cortex_a8_neon_dp")
436
437;; Instructions using this reservation read their source operands at N2, and
438;; produce a result at N4.
439(define_insn_reservation "cortex_a8_neon_int_4" 4
440  (and (eq_attr "tune" "cortexa8")
441       (eq_attr "cortex_a8_neon_type" "neon_int_4"))
442  "cortex_a8_neon_dp")
443
444;; Instructions using this reservation read their (D|Q)m operands at N1,
445;; their (D|Q)n operands at N2, and produce a result at N4.
446(define_insn_reservation "cortex_a8_neon_int_5" 4
447  (and (eq_attr "tune" "cortexa8")
448       (eq_attr "cortex_a8_neon_type" "neon_int_5"))
449  "cortex_a8_neon_dp")
450
451;; Instructions using this reservation read their source operands at N1, and
452;; produce a result at N4.
453(define_insn_reservation "cortex_a8_neon_vqneg_vqabs" 4
454  (and (eq_attr "tune" "cortexa8")
455       (eq_attr "cortex_a8_neon_type" "neon_vqneg_vqabs"))
456  "cortex_a8_neon_dp")
457
458;; Instructions using this reservation produce a result at N3.
459(define_insn_reservation "cortex_a8_neon_vmov" 3
460  (and (eq_attr "tune" "cortexa8")
461       (eq_attr "cortex_a8_neon_type" "neon_vmov"))
462  "cortex_a8_neon_dp")
463
464;; Instructions using this reservation read their (D|Q)n operands at N2,
465;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
466;; produce a result at N6.
467(define_insn_reservation "cortex_a8_neon_vaba" 6
468  (and (eq_attr "tune" "cortexa8")
469       (eq_attr "cortex_a8_neon_type" "neon_vaba"))
470  "cortex_a8_neon_dp")
471
472;; Instructions using this reservation read their (D|Q)n operands at N2,
473;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
474;; produce a result at N6 on cycle 2.
475(define_insn_reservation "cortex_a8_neon_vaba_qqq" 7
476  (and (eq_attr "tune" "cortexa8")
477       (eq_attr "cortex_a8_neon_type" "neon_vaba_qqq"))
478  "cortex_a8_neon_dp_2")
479
480;; Instructions using this reservation read their source operands at N2, and
481;; produce a result at N3 on cycle 2.
482(define_insn_reservation "cortex_a8_neon_bit_ops_q" 4
483  (and (eq_attr "tune" "cortexa8")
484       (eq_attr "cortex_a8_neon_type" "neon_bit_ops_q"))
485  "cortex_a8_neon_dp_2")
486
487;; Integer Multiply/Accumulate Operations
488
489;; Instructions using this reservation read their source operands at N2, and
490;; produce a result at N6.
491(define_insn_reservation "cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long" 6
492  (and (eq_attr "tune" "cortexa8")
493       (eq_attr "cortex_a8_neon_type"
494         "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long"))
495  "cortex_a8_neon_dp")
496
497;; Instructions using this reservation read their source operands at N2, and
498;; produce a result at N6 on cycle 2.
499(define_insn_reservation "cortex_a8_neon_mul_qqq_8_16_32_ddd_32" 7
500  (and (eq_attr "tune" "cortexa8")
501       (eq_attr "cortex_a8_neon_type" "neon_mul_qqq_8_16_32_ddd_32"))
502  "cortex_a8_neon_dp_2")
503
504;; Instructions using this reservation read their (D|Q)n operands at N2,
505;; their (D|Q)m operands at N1, and produce a result at N6 on cycle 2.
506(define_insn_reservation "cortex_a8_neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar" 7
507  (and (eq_attr "tune" "cortexa8")
508       (eq_attr "cortex_a8_neon_type"
509            "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar"))
510  "cortex_a8_neon_dp_2")
511
512;; Instructions using this reservation read their (D|Q)n operands at N2,
513;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and
514;; produce a result at N6.
515(define_insn_reservation "cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long" 6
516  (and (eq_attr "tune" "cortexa8")
517       (eq_attr "cortex_a8_neon_type"
518                  "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long"))
519  "cortex_a8_neon_dp")
520
521;; Instructions using this reservation read their (D|Q)n operands at N2,
522;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and
523;; produce a result at N6 on cycle 2.
524(define_insn_reservation "cortex_a8_neon_mla_qqq_8_16" 7
525  (and (eq_attr "tune" "cortexa8")
526       (eq_attr "cortex_a8_neon_type" "neon_mla_qqq_8_16"))
527  "cortex_a8_neon_dp_2")
528
529;; Instructions using this reservation read their (D|Q)n operands at N2,
530;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
531;; produce a result at N6 on cycle 2.
532(define_insn_reservation "cortex_a8_neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long" 7
533  (and (eq_attr "tune" "cortexa8")
534       (eq_attr "cortex_a8_neon_type"
535 "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long"))
536  "cortex_a8_neon_dp_2")
537
538;; Instructions using this reservation read their (D|Q)n operands at N2,
539;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
540;; produce a result at N6 on cycle 4.
541(define_insn_reservation "cortex_a8_neon_mla_qqq_32_qqd_32_scalar" 9
542  (and (eq_attr "tune" "cortexa8")
543       (eq_attr "cortex_a8_neon_type" "neon_mla_qqq_32_qqd_32_scalar"))
544  "cortex_a8_neon_dp_4")
545
546;; Instructions using this reservation read their (D|Q)n operands at N2,
547;; their (D|Q)m operands at N1, and produce a result at N6.
548(define_insn_reservation "cortex_a8_neon_mul_ddd_16_scalar_32_16_long_scalar" 6
549  (and (eq_attr "tune" "cortexa8")
550       (eq_attr "cortex_a8_neon_type"
551                  "neon_mul_ddd_16_scalar_32_16_long_scalar"))
552  "cortex_a8_neon_dp")
553
554;; Instructions using this reservation read their (D|Q)n operands at N2,
555;; their (D|Q)m operands at N1, and produce a result at N6 on cycle 4.
556(define_insn_reservation "cortex_a8_neon_mul_qqd_32_scalar" 9
557  (and (eq_attr "tune" "cortexa8")
558       (eq_attr "cortex_a8_neon_type" "neon_mul_qqd_32_scalar"))
559  "cortex_a8_neon_dp_4")
560
561;; Instructions using this reservation read their (D|Q)n operands at N2,
562;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
563;; produce a result at N6.
564(define_insn_reservation "cortex_a8_neon_mla_ddd_16_scalar_qdd_32_16_long_scalar" 6
565  (and (eq_attr "tune" "cortexa8")
566       (eq_attr "cortex_a8_neon_type"
567                  "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar"))
568  "cortex_a8_neon_dp")
569
570;; Shift Operations
571
572;; Instructions using this reservation read their source operands at N1, and
573;; produce a result at N3.
574(define_insn_reservation "cortex_a8_neon_shift_1" 3
575  (and (eq_attr "tune" "cortexa8")
576       (eq_attr "cortex_a8_neon_type" "neon_shift_1"))
577  "cortex_a8_neon_dp")
578
579;; Instructions using this reservation read their source operands at N1, and
580;; produce a result at N4.
581(define_insn_reservation "cortex_a8_neon_shift_2" 4
582  (and (eq_attr "tune" "cortexa8")
583       (eq_attr "cortex_a8_neon_type" "neon_shift_2"))
584  "cortex_a8_neon_dp")
585
586;; Instructions using this reservation read their source operands at N1, and
587;; produce a result at N3 on cycle 2.
588(define_insn_reservation "cortex_a8_neon_shift_3" 4
589  (and (eq_attr "tune" "cortexa8")
590       (eq_attr "cortex_a8_neon_type" "neon_shift_3"))
591  "cortex_a8_neon_dp_2")
592
593;; Instructions using this reservation read their source operands at N1, and
594;; produce a result at N4 on cycle 2.
595(define_insn_reservation "cortex_a8_neon_vqshl_vrshl_vqrshl_qqq" 5
596  (and (eq_attr "tune" "cortexa8")
597       (eq_attr "cortex_a8_neon_type" "neon_vqshl_vrshl_vqrshl_qqq"))
598  "cortex_a8_neon_dp_2")
599
600;; Instructions using this reservation read their (D|Q)m operands at N1,
601;; their (D|Q)d operands at N3, and produce a result at N6.
602(define_insn_reservation "cortex_a8_neon_vsra_vrsra" 6
603  (and (eq_attr "tune" "cortexa8")
604       (eq_attr "cortex_a8_neon_type" "neon_vsra_vrsra"))
605  "cortex_a8_neon_dp")
606
607;; Floating point Operations
608
609;; Instructions using this reservation read their source operands at N2, and
610;; produce a result at N5.
611(define_insn_reservation "cortex_a8_neon_fp_vadd_ddd_vabs_dd" 5
612  (and (eq_attr "tune" "cortexa8")
613       (eq_attr "cortex_a8_neon_type" "neon_fp_vadd_ddd_vabs_dd"))
614 "cortex_a8_neon_fadd")
615
616;; Instructions using this reservation read their source operands at N2, and
617;; produce a result at N5 on cycle 2.
618(define_insn_reservation "cortex_a8_neon_fp_vadd_qqq_vabs_qq" 6
619  (and (eq_attr "tune" "cortexa8")
620       (eq_attr "cortex_a8_neon_type" "neon_fp_vadd_qqq_vabs_qq"))
621  "cortex_a8_neon_fadd_2")
622
623;; Instructions using this reservation read their source operands at N1, and
624;; produce a result at N5.
625(define_insn_reservation "cortex_a8_neon_fp_vsum" 5
626  (and (eq_attr "tune" "cortexa8")
627       (eq_attr "cortex_a8_neon_type" "neon_fp_vsum"))
628  "cortex_a8_neon_fadd")
629
630;; Instructions using this reservation read their (D|Q)n operands at N2,
631;; their (D|Q)m operands at N1, and produce a result at N5.
632(define_insn_reservation "cortex_a8_neon_fp_vmul_ddd" 5
633  (and (eq_attr "tune" "cortexa8")
634       (eq_attr "cortex_a8_neon_type" "neon_fp_vmul_ddd"))
635  "cortex_a8_neon_dp")
636
637;; Instructions using this reservation read their (D|Q)n operands at N2,
638;; their (D|Q)m operands at N1, and produce a result at N5 on cycle 2.
639(define_insn_reservation "cortex_a8_neon_fp_vmul_qqd" 6
640  (and (eq_attr "tune" "cortexa8")
641       (eq_attr "cortex_a8_neon_type" "neon_fp_vmul_qqd"))
642  "cortex_a8_neon_dp_2")
643
644;; Instructions using this reservation read their (D|Q)n operands at N2,
645;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and
646;; produce a result at N9.
647(define_insn_reservation "cortex_a8_neon_fp_vmla_ddd" 9
648  (and (eq_attr "tune" "cortexa8")
649       (eq_attr "cortex_a8_neon_type" "neon_fp_vmla_ddd"))
650  "cortex_a8_neon_fmul_then_fadd")
651
652;; Instructions using this reservation read their (D|Q)n operands at N2,
653;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and
654;; produce a result at N9 on cycle 2.
655(define_insn_reservation "cortex_a8_neon_fp_vmla_qqq" 10
656  (and (eq_attr "tune" "cortexa8")
657       (eq_attr "cortex_a8_neon_type" "neon_fp_vmla_qqq"))
658  "cortex_a8_neon_fmul_then_fadd_2")
659
660;; Instructions using this reservation read their (D|Q)n operands at N2,
661;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
662;; produce a result at N9.
663(define_insn_reservation "cortex_a8_neon_fp_vmla_ddd_scalar" 9
664  (and (eq_attr "tune" "cortexa8")
665       (eq_attr "cortex_a8_neon_type" "neon_fp_vmla_ddd_scalar"))
666  "cortex_a8_neon_fmul_then_fadd")
667
668;; Instructions using this reservation read their (D|Q)n operands at N2,
669;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
670;; produce a result at N9 on cycle 2.
671(define_insn_reservation "cortex_a8_neon_fp_vmla_qqq_scalar" 10
672  (and (eq_attr "tune" "cortexa8")
673       (eq_attr "cortex_a8_neon_type" "neon_fp_vmla_qqq_scalar"))
674  "cortex_a8_neon_fmul_then_fadd_2")
675
676;; Instructions using this reservation read their source operands at N2, and
677;; produce a result at N9.
678(define_insn_reservation "cortex_a8_neon_fp_vrecps_vrsqrts_ddd" 9
679  (and (eq_attr "tune" "cortexa8")
680       (eq_attr "cortex_a8_neon_type" "neon_fp_vrecps_vrsqrts_ddd"))
681  "cortex_a8_neon_fmul_then_fadd")
682
683;; Instructions using this reservation read their source operands at N2, and
684;; produce a result at N9 on cycle 2.
685(define_insn_reservation "cortex_a8_neon_fp_vrecps_vrsqrts_qqq" 10
686  (and (eq_attr "tune" "cortexa8")
687       (eq_attr "type" "neon_fp_recps_s_q, neon_fp_rsqrts_s_q"))
688  "cortex_a8_neon_fmul_then_fadd_2")
689
690;; Permute operations.
691
692;; Instructions using this reservation read their source operands at N1, and
693;; produce a result at N2.
694(define_insn_reservation "cortex_a8_neon_bp_simple" 2
695  (and (eq_attr "tune" "cortexa8")
696       (eq_attr "cortex_a8_neon_type" "neon_bp_simple"))
697  "cortex_a8_neon_perm")
698
699;; Instructions using this reservation read their source operands at N1, and
700;; produce a result at N2 on cycle 2.
701(define_insn_reservation "cortex_a8_neon_bp_2cycle" 3
702  (and (eq_attr "tune" "cortexa8")
703       (eq_attr "cortex_a8_neon_type" "neon_bp_2cycle"))
704  "cortex_a8_neon_perm_2")
705
706;; Instructions using this reservation read their source operands at N1, and
707;; produce a result at N2 on cycle 3.
708(define_insn_reservation "cortex_a8_neon_bp_3cycle" 4
709  (and (eq_attr "tune" "cortexa8")
710       (eq_attr "cortex_a8_neon_type" "neon_bp_3cycle"))
711  "cortex_a8_neon_perm_3")
712
713;; Load Operations.
714
715;; Instructions using this reservation produce a result at N1.
716(define_insn_reservation "cortex_a8_neon_ldr" 1
717  (and (eq_attr "tune" "cortexa8")
718       (eq_attr "cortex_a8_neon_type" "neon_ldr"))
719  "cortex_a8_neon_ls")
720
721;; Instructions using this reservation read their source operands at N1.
722(define_insn_reservation "cortex_a8_neon_str" 0
723  (and (eq_attr "tune" "cortexa8")
724       (eq_attr "cortex_a8_neon_type" "neon_str"))
725  "cortex_a8_neon_ls")
726
727;; Instructions using this reservation produce a result at N1 on cycle 2.
728(define_insn_reservation "cortex_a8_neon_vld1_1_2_regs" 2
729  (and (eq_attr "tune" "cortexa8")
730       (eq_attr "cortex_a8_neon_type" "neon_vld1_1_2_regs"))
731  "cortex_a8_neon_ls_2")
732
733;; Instructions using this reservation produce a result at N1 on cycle 3.
734(define_insn_reservation "cortex_a8_neon_vld1_3_4_regs" 3
735  (and (eq_attr "tune" "cortexa8")
736       (eq_attr "cortex_a8_neon_type" "neon_vld1_3_4_regs"))
737  "cortex_a8_neon_ls_3")
738
739;; Instructions using this reservation produce a result at N2 on cycle 2.
740(define_insn_reservation "cortex_a8_neon_vld2_2_regs_vld1_vld2_all_lanes" 3
741  (and (eq_attr "tune" "cortexa8")
742       (eq_attr "cortex_a8_neon_type" "neon_vld2_2_regs_vld1_vld2_all_lanes"))
743  "cortex_a8_neon_ls_2")
744
745;; Instructions using this reservation produce a result at N2 on cycle 3.
746(define_insn_reservation "cortex_a8_neon_vld2_4_regs" 4
747  (and (eq_attr "tune" "cortexa8")
748       (eq_attr "cortex_a8_neon_type" "neon_vld2_4_regs"))
749  "cortex_a8_neon_ls_3")
750
751;; Instructions using this reservation produce a result at N2 on cycle 4.
752(define_insn_reservation "cortex_a8_neon_vld3_vld4" 5
753  (and (eq_attr "tune" "cortexa8")
754       (eq_attr "cortex_a8_neon_type" "neon_vld3_vld4"))
755  "cortex_a8_neon_ls_4")
756
757;; Store operations.
758
759;; Instructions using this reservation read their source operands at N1.
760(define_insn_reservation "cortex_a8_neon_vst1_1_2_regs_vst2_2_regs" 0
761  (and (eq_attr "tune" "cortexa8")
762       (eq_attr "cortex_a8_neon_type" "neon_vst1_1_2_regs_vst2_2_regs"))
763  "cortex_a8_neon_ls_2")
764
765;; Instructions using this reservation read their source operands at N1.
766(define_insn_reservation "cortex_a8_neon_vst1_3_4_regs" 0
767  (and (eq_attr "tune" "cortexa8")
768       (eq_attr "cortex_a8_neon_type" "neon_vst1_3_4_regs"))
769  "cortex_a8_neon_ls_3")
770
771;; Instructions using this reservation read their source operands at N1.
772(define_insn_reservation "cortex_a8_neon_vst2_4_regs_vst3_vst4" 0
773  (and (eq_attr "tune" "cortexa8")
774       (eq_attr "cortex_a8_neon_type" "neon_vst2_4_regs_vst3_vst4"))
775  "cortex_a8_neon_ls_4")
776
777;; Instructions using this reservation read their source operands at N1, and
778;; produce a result at N2 on cycle 3.
779(define_insn_reservation "cortex_a8_neon_vld1_vld2_lane" 4
780  (and (eq_attr "tune" "cortexa8")
781       (eq_attr "cortex_a8_neon_type" "neon_vld1_vld2_lane"))
782  "cortex_a8_neon_ls_3")
783
784;; Instructions using this reservation read their source operands at N1, and
785;; produce a result at N2 on cycle 5.
786(define_insn_reservation "cortex_a8_neon_vld3_vld4_lane" 6
787  (and (eq_attr "tune" "cortexa8")
788       (eq_attr "cortex_a8_neon_type" "neon_vld3_vld4_lane"))
789  "cortex_a8_neon_ls_5")
790
791;; Instructions using this reservation read their source operands at N1.
792(define_insn_reservation "cortex_a8_neon_vst1_vst2_lane" 0
793  (and (eq_attr "tune" "cortexa8")
794       (eq_attr "cortex_a8_neon_type" "neon_vst1_vst2_lane"))
795  "cortex_a8_neon_ls_2")
796
797;; Instructions using this reservation read their source operands at N1.
798(define_insn_reservation "cortex_a8_neon_vst3_vst4_lane" 0
799  (and (eq_attr "tune" "cortexa8")
800       (eq_attr "cortex_a8_neon_type" "neon_vst3_vst4_lane"))
801  "cortex_a8_neon_ls_3")
802
803;; Register Transfer Operations
804
805;; Instructions using this reservation produce a result at N2.
806(define_insn_reservation "cortex_a8_neon_mcr" 2
807  (and (eq_attr "tune" "cortexa8")
808       (eq_attr "cortex_a8_neon_type" "neon_mcr"))
809  "cortex_a8_neon_perm")
810
811;; Instructions using this reservation produce a result at N2.
812(define_insn_reservation "cortex_a8_neon_mcr_2_mcrr" 2
813  (and (eq_attr "tune" "cortexa8")
814       (eq_attr "cortex_a8_neon_type" "neon_mcr_2_mcrr"))
815  "cortex_a8_neon_perm_2")
816
817;; Exceptions to the default latencies.
818
819(define_bypass 1 "cortex_a8_neon_mcr_2_mcrr"
820               "cortex_a8_neon_int_1,\
821               cortex_a8_neon_int_4,\
822               cortex_a8_neon_bit_ops_q,\
823               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
824               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
825               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
826               cortex_a8_neon_mla_qqq_8_16,\
827               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
828               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
829               cortex_a8_neon_fp_vmla_ddd,\
830               cortex_a8_neon_fp_vmla_qqq,\
831               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
832               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
833
834(define_bypass 1 "cortex_a8_neon_mcr"
835               "cortex_a8_neon_int_1,\
836               cortex_a8_neon_int_4,\
837               cortex_a8_neon_bit_ops_q,\
838               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
839               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
840               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
841               cortex_a8_neon_mla_qqq_8_16,\
842               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
843               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
844               cortex_a8_neon_fp_vmla_ddd,\
845               cortex_a8_neon_fp_vmla_qqq,\
846               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
847               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
848
849(define_bypass 5 "cortex_a8_neon_vld3_vld4_lane"
850               "cortex_a8_neon_int_1,\
851               cortex_a8_neon_int_4,\
852               cortex_a8_neon_bit_ops_q,\
853               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
854               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
855               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
856               cortex_a8_neon_mla_qqq_8_16,\
857               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
858               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
859               cortex_a8_neon_fp_vmla_ddd,\
860               cortex_a8_neon_fp_vmla_qqq,\
861               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
862               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
863
864(define_bypass 3 "cortex_a8_neon_vld1_vld2_lane"
865               "cortex_a8_neon_int_1,\
866               cortex_a8_neon_int_4,\
867               cortex_a8_neon_bit_ops_q,\
868               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
869               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
870               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
871               cortex_a8_neon_mla_qqq_8_16,\
872               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
873               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
874               cortex_a8_neon_fp_vmla_ddd,\
875               cortex_a8_neon_fp_vmla_qqq,\
876               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
877               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
878
879(define_bypass 4 "cortex_a8_neon_vld3_vld4"
880               "cortex_a8_neon_int_1,\
881               cortex_a8_neon_int_4,\
882               cortex_a8_neon_bit_ops_q,\
883               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
884               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
885               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
886               cortex_a8_neon_mla_qqq_8_16,\
887               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
888               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
889               cortex_a8_neon_fp_vmla_ddd,\
890               cortex_a8_neon_fp_vmla_qqq,\
891               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
892               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
893
894(define_bypass 3 "cortex_a8_neon_vld2_4_regs"
895               "cortex_a8_neon_int_1,\
896               cortex_a8_neon_int_4,\
897               cortex_a8_neon_bit_ops_q,\
898               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
899               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
900               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
901               cortex_a8_neon_mla_qqq_8_16,\
902               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
903               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
904               cortex_a8_neon_fp_vmla_ddd,\
905               cortex_a8_neon_fp_vmla_qqq,\
906               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
907               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
908
909(define_bypass 2 "cortex_a8_neon_vld2_2_regs_vld1_vld2_all_lanes"
910               "cortex_a8_neon_int_1,\
911               cortex_a8_neon_int_4,\
912               cortex_a8_neon_bit_ops_q,\
913               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
914               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
915               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
916               cortex_a8_neon_mla_qqq_8_16,\
917               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
918               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
919               cortex_a8_neon_fp_vmla_ddd,\
920               cortex_a8_neon_fp_vmla_qqq,\
921               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
922               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
923
924(define_bypass 2 "cortex_a8_neon_vld1_3_4_regs"
925               "cortex_a8_neon_int_1,\
926               cortex_a8_neon_int_4,\
927               cortex_a8_neon_bit_ops_q,\
928               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
929               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
930               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
931               cortex_a8_neon_mla_qqq_8_16,\
932               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
933               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
934               cortex_a8_neon_fp_vmla_ddd,\
935               cortex_a8_neon_fp_vmla_qqq,\
936               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
937               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
938
939(define_bypass 1 "cortex_a8_neon_vld1_1_2_regs"
940               "cortex_a8_neon_int_1,\
941               cortex_a8_neon_int_4,\
942               cortex_a8_neon_bit_ops_q,\
943               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
944               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
945               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
946               cortex_a8_neon_mla_qqq_8_16,\
947               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
948               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
949               cortex_a8_neon_fp_vmla_ddd,\
950               cortex_a8_neon_fp_vmla_qqq,\
951               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
952               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
953
954(define_bypass 0 "cortex_a8_neon_ldr"
955               "cortex_a8_neon_int_1,\
956               cortex_a8_neon_int_4,\
957               cortex_a8_neon_bit_ops_q,\
958               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
959               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
960               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
961               cortex_a8_neon_mla_qqq_8_16,\
962               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
963               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
964               cortex_a8_neon_fp_vmla_ddd,\
965               cortex_a8_neon_fp_vmla_qqq,\
966               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
967               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
968
969(define_bypass 3 "cortex_a8_neon_bp_3cycle"
970               "cortex_a8_neon_int_1,\
971               cortex_a8_neon_int_4,\
972               cortex_a8_neon_bit_ops_q,\
973               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
974               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
975               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
976               cortex_a8_neon_mla_qqq_8_16,\
977               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
978               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
979               cortex_a8_neon_fp_vmla_ddd,\
980               cortex_a8_neon_fp_vmla_qqq,\
981               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
982               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
983
984(define_bypass 2 "cortex_a8_neon_bp_2cycle"
985               "cortex_a8_neon_int_1,\
986               cortex_a8_neon_int_4,\
987               cortex_a8_neon_bit_ops_q,\
988               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
989               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
990               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
991               cortex_a8_neon_mla_qqq_8_16,\
992               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
993               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
994               cortex_a8_neon_fp_vmla_ddd,\
995               cortex_a8_neon_fp_vmla_qqq,\
996               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
997               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
998
999(define_bypass 1 "cortex_a8_neon_bp_simple"
1000               "cortex_a8_neon_int_1,\
1001               cortex_a8_neon_int_4,\
1002               cortex_a8_neon_bit_ops_q,\
1003               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1004               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1005               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1006               cortex_a8_neon_mla_qqq_8_16,\
1007               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1008               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1009               cortex_a8_neon_fp_vmla_ddd,\
1010               cortex_a8_neon_fp_vmla_qqq,\
1011               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1012               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1013
1014(define_bypass 9 "cortex_a8_neon_fp_vrecps_vrsqrts_qqq"
1015               "cortex_a8_neon_int_1,\
1016               cortex_a8_neon_int_4,\
1017               cortex_a8_neon_bit_ops_q,\
1018               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1019               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1020               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1021               cortex_a8_neon_mla_qqq_8_16,\
1022               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1023               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1024               cortex_a8_neon_fp_vmla_ddd,\
1025               cortex_a8_neon_fp_vmla_qqq,\
1026               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1027               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1028
1029(define_bypass 8 "cortex_a8_neon_fp_vrecps_vrsqrts_ddd"
1030               "cortex_a8_neon_int_1,\
1031               cortex_a8_neon_int_4,\
1032               cortex_a8_neon_bit_ops_q,\
1033               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1034               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1035               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1036               cortex_a8_neon_mla_qqq_8_16,\
1037               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1038               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1039               cortex_a8_neon_fp_vmla_ddd,\
1040               cortex_a8_neon_fp_vmla_qqq,\
1041               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1042               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1043
1044(define_bypass 9 "cortex_a8_neon_fp_vmla_qqq_scalar"
1045               "cortex_a8_neon_int_1,\
1046               cortex_a8_neon_int_4,\
1047               cortex_a8_neon_bit_ops_q,\
1048               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1049               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1050               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1051               cortex_a8_neon_mla_qqq_8_16,\
1052               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1053               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1054               cortex_a8_neon_fp_vmla_ddd,\
1055               cortex_a8_neon_fp_vmla_qqq,\
1056               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1057               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1058
1059(define_bypass 8 "cortex_a8_neon_fp_vmla_ddd_scalar"
1060               "cortex_a8_neon_int_1,\
1061               cortex_a8_neon_int_4,\
1062               cortex_a8_neon_bit_ops_q,\
1063               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1064               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1065               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1066               cortex_a8_neon_mla_qqq_8_16,\
1067               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1068               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1069               cortex_a8_neon_fp_vmla_ddd,\
1070               cortex_a8_neon_fp_vmla_qqq,\
1071               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1072               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1073
1074(define_bypass 9 "cortex_a8_neon_fp_vmla_qqq"
1075               "cortex_a8_neon_int_1,\
1076               cortex_a8_neon_int_4,\
1077               cortex_a8_neon_bit_ops_q,\
1078               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1079               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1080               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1081               cortex_a8_neon_mla_qqq_8_16,\
1082               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1083               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1084               cortex_a8_neon_fp_vmla_ddd,\
1085               cortex_a8_neon_fp_vmla_qqq,\
1086               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1087               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1088
1089(define_bypass 8 "cortex_a8_neon_fp_vmla_ddd"
1090               "cortex_a8_neon_int_1,\
1091               cortex_a8_neon_int_4,\
1092               cortex_a8_neon_bit_ops_q,\
1093               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1094               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1095               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1096               cortex_a8_neon_mla_qqq_8_16,\
1097               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1098               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1099               cortex_a8_neon_fp_vmla_ddd,\
1100               cortex_a8_neon_fp_vmla_qqq,\
1101               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1102               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1103
1104(define_bypass 5 "cortex_a8_neon_fp_vmul_qqd"
1105               "cortex_a8_neon_int_1,\
1106               cortex_a8_neon_int_4,\
1107               cortex_a8_neon_bit_ops_q,\
1108               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1109               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1110               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1111               cortex_a8_neon_mla_qqq_8_16,\
1112               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1113               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1114               cortex_a8_neon_fp_vmla_ddd,\
1115               cortex_a8_neon_fp_vmla_qqq,\
1116               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1117               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1118
1119(define_bypass 4 "cortex_a8_neon_fp_vmul_ddd"
1120               "cortex_a8_neon_int_1,\
1121               cortex_a8_neon_int_4,\
1122               cortex_a8_neon_bit_ops_q,\
1123               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1124               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1125               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1126               cortex_a8_neon_mla_qqq_8_16,\
1127               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1128               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1129               cortex_a8_neon_fp_vmla_ddd,\
1130               cortex_a8_neon_fp_vmla_qqq,\
1131               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1132               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1133
1134(define_bypass 4 "cortex_a8_neon_fp_vsum"
1135               "cortex_a8_neon_int_1,\
1136               cortex_a8_neon_int_4,\
1137               cortex_a8_neon_bit_ops_q,\
1138               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1139               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1140               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1141               cortex_a8_neon_mla_qqq_8_16,\
1142               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1143               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1144               cortex_a8_neon_fp_vmla_ddd,\
1145               cortex_a8_neon_fp_vmla_qqq,\
1146               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1147               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1148
1149(define_bypass 5 "cortex_a8_neon_fp_vadd_qqq_vabs_qq"
1150               "cortex_a8_neon_int_1,\
1151               cortex_a8_neon_int_4,\
1152               cortex_a8_neon_bit_ops_q,\
1153               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1154               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1155               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1156               cortex_a8_neon_mla_qqq_8_16,\
1157               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1158               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1159               cortex_a8_neon_fp_vmla_ddd,\
1160               cortex_a8_neon_fp_vmla_qqq,\
1161               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1162               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1163
1164(define_bypass 4 "cortex_a8_neon_fp_vadd_ddd_vabs_dd"
1165               "cortex_a8_neon_int_1,\
1166               cortex_a8_neon_int_4,\
1167               cortex_a8_neon_bit_ops_q,\
1168               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1169               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1170               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1171               cortex_a8_neon_mla_qqq_8_16,\
1172               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1173               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1174               cortex_a8_neon_fp_vmla_ddd,\
1175               cortex_a8_neon_fp_vmla_qqq,\
1176               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1177               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1178
1179(define_bypass 5 "cortex_a8_neon_vsra_vrsra"
1180               "cortex_a8_neon_int_1,\
1181               cortex_a8_neon_int_4,\
1182               cortex_a8_neon_bit_ops_q,\
1183               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1184               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1185               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1186               cortex_a8_neon_mla_qqq_8_16,\
1187               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1188               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1189               cortex_a8_neon_fp_vmla_ddd,\
1190               cortex_a8_neon_fp_vmla_qqq,\
1191               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1192               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1193
1194(define_bypass 4 "cortex_a8_neon_vqshl_vrshl_vqrshl_qqq"
1195               "cortex_a8_neon_int_1,\
1196               cortex_a8_neon_int_4,\
1197               cortex_a8_neon_bit_ops_q,\
1198               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1199               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1200               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1201               cortex_a8_neon_mla_qqq_8_16,\
1202               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1203               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1204               cortex_a8_neon_fp_vmla_ddd,\
1205               cortex_a8_neon_fp_vmla_qqq,\
1206               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1207               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1208
1209(define_bypass 3 "cortex_a8_neon_shift_3"
1210               "cortex_a8_neon_int_1,\
1211               cortex_a8_neon_int_4,\
1212               cortex_a8_neon_bit_ops_q,\
1213               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1214               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1215               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1216               cortex_a8_neon_mla_qqq_8_16,\
1217               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1218               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1219               cortex_a8_neon_fp_vmla_ddd,\
1220               cortex_a8_neon_fp_vmla_qqq,\
1221               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1222               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1223
1224(define_bypass 3 "cortex_a8_neon_shift_2"
1225               "cortex_a8_neon_int_1,\
1226               cortex_a8_neon_int_4,\
1227               cortex_a8_neon_bit_ops_q,\
1228               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1229               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1230               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1231               cortex_a8_neon_mla_qqq_8_16,\
1232               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1233               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1234               cortex_a8_neon_fp_vmla_ddd,\
1235               cortex_a8_neon_fp_vmla_qqq,\
1236               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1237               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1238
1239(define_bypass 2 "cortex_a8_neon_shift_1"
1240               "cortex_a8_neon_int_1,\
1241               cortex_a8_neon_int_4,\
1242               cortex_a8_neon_bit_ops_q,\
1243               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1244               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1245               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1246               cortex_a8_neon_mla_qqq_8_16,\
1247               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1248               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1249               cortex_a8_neon_fp_vmla_ddd,\
1250               cortex_a8_neon_fp_vmla_qqq,\
1251               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1252               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1253
1254(define_bypass 5 "cortex_a8_neon_mla_ddd_16_scalar_qdd_32_16_long_scalar"
1255               "cortex_a8_neon_int_1,\
1256               cortex_a8_neon_int_4,\
1257               cortex_a8_neon_bit_ops_q,\
1258               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1259               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1260               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1261               cortex_a8_neon_mla_qqq_8_16,\
1262               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1263               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1264               cortex_a8_neon_fp_vmla_ddd,\
1265               cortex_a8_neon_fp_vmla_qqq,\
1266               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1267               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1268
1269(define_bypass 8 "cortex_a8_neon_mul_qqd_32_scalar"
1270               "cortex_a8_neon_int_1,\
1271               cortex_a8_neon_int_4,\
1272               cortex_a8_neon_bit_ops_q,\
1273               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1274               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1275               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1276               cortex_a8_neon_mla_qqq_8_16,\
1277               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1278               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1279               cortex_a8_neon_fp_vmla_ddd,\
1280               cortex_a8_neon_fp_vmla_qqq,\
1281               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1282               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1283
1284(define_bypass 5 "cortex_a8_neon_mul_ddd_16_scalar_32_16_long_scalar"
1285               "cortex_a8_neon_int_1,\
1286               cortex_a8_neon_int_4,\
1287               cortex_a8_neon_bit_ops_q,\
1288               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1289               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1290               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1291               cortex_a8_neon_mla_qqq_8_16,\
1292               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1293               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1294               cortex_a8_neon_fp_vmla_ddd,\
1295               cortex_a8_neon_fp_vmla_qqq,\
1296               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1297               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1298
1299(define_bypass 8 "cortex_a8_neon_mla_qqq_32_qqd_32_scalar"
1300               "cortex_a8_neon_int_1,\
1301               cortex_a8_neon_int_4,\
1302               cortex_a8_neon_bit_ops_q,\
1303               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1304               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1305               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1306               cortex_a8_neon_mla_qqq_8_16,\
1307               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1308               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1309               cortex_a8_neon_fp_vmla_ddd,\
1310               cortex_a8_neon_fp_vmla_qqq,\
1311               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1312               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1313
1314(define_bypass 6 "cortex_a8_neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long"
1315               "cortex_a8_neon_int_1,\
1316               cortex_a8_neon_int_4,\
1317               cortex_a8_neon_bit_ops_q,\
1318               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1319               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1320               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1321               cortex_a8_neon_mla_qqq_8_16,\
1322               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1323               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1324               cortex_a8_neon_fp_vmla_ddd,\
1325               cortex_a8_neon_fp_vmla_qqq,\
1326               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1327               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1328
1329(define_bypass 6 "cortex_a8_neon_mla_qqq_8_16"
1330               "cortex_a8_neon_int_1,\
1331               cortex_a8_neon_int_4,\
1332               cortex_a8_neon_bit_ops_q,\
1333               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1334               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1335               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1336               cortex_a8_neon_mla_qqq_8_16,\
1337               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1338               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1339               cortex_a8_neon_fp_vmla_ddd,\
1340               cortex_a8_neon_fp_vmla_qqq,\
1341               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1342               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1343
1344(define_bypass 5 "cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long"
1345               "cortex_a8_neon_int_1,\
1346               cortex_a8_neon_int_4,\
1347               cortex_a8_neon_bit_ops_q,\
1348               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1349               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1350               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1351               cortex_a8_neon_mla_qqq_8_16,\
1352               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1353               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1354               cortex_a8_neon_fp_vmla_ddd,\
1355               cortex_a8_neon_fp_vmla_qqq,\
1356               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1357               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1358
1359(define_bypass 6 "cortex_a8_neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar"
1360               "cortex_a8_neon_int_1,\
1361               cortex_a8_neon_int_4,\
1362               cortex_a8_neon_bit_ops_q,\
1363               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1364               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1365               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1366               cortex_a8_neon_mla_qqq_8_16,\
1367               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1368               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1369               cortex_a8_neon_fp_vmla_ddd,\
1370               cortex_a8_neon_fp_vmla_qqq,\
1371               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1372               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1373
1374(define_bypass 6 "cortex_a8_neon_mul_qqq_8_16_32_ddd_32"
1375               "cortex_a8_neon_int_1,\
1376               cortex_a8_neon_int_4,\
1377               cortex_a8_neon_bit_ops_q,\
1378               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1379               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1380               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1381               cortex_a8_neon_mla_qqq_8_16,\
1382               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1383               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1384               cortex_a8_neon_fp_vmla_ddd,\
1385               cortex_a8_neon_fp_vmla_qqq,\
1386               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1387               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1388
1389(define_bypass 5 "cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long"
1390               "cortex_a8_neon_int_1,\
1391               cortex_a8_neon_int_4,\
1392               cortex_a8_neon_bit_ops_q,\
1393               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1394               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1395               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1396               cortex_a8_neon_mla_qqq_8_16,\
1397               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1398               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1399               cortex_a8_neon_fp_vmla_ddd,\
1400               cortex_a8_neon_fp_vmla_qqq,\
1401               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1402               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1403
1404(define_bypass 6 "cortex_a8_neon_vaba_qqq"
1405               "cortex_a8_neon_int_1,\
1406               cortex_a8_neon_int_4,\
1407               cortex_a8_neon_bit_ops_q,\
1408               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1409               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1410               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1411               cortex_a8_neon_mla_qqq_8_16,\
1412               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1413               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1414               cortex_a8_neon_fp_vmla_ddd,\
1415               cortex_a8_neon_fp_vmla_qqq,\
1416               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1417               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1418
1419(define_bypass 5 "cortex_a8_neon_vaba"
1420               "cortex_a8_neon_int_1,\
1421               cortex_a8_neon_int_4,\
1422               cortex_a8_neon_bit_ops_q,\
1423               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1424               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1425               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1426               cortex_a8_neon_mla_qqq_8_16,\
1427               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1428               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1429               cortex_a8_neon_fp_vmla_ddd,\
1430               cortex_a8_neon_fp_vmla_qqq,\
1431               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1432               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1433
1434(define_bypass 3 "cortex_a8_neon_bit_ops_q"
1435               "cortex_a8_neon_int_1,\
1436               cortex_a8_neon_int_4,\
1437               cortex_a8_neon_bit_ops_q,\
1438               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1439               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1440               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1441               cortex_a8_neon_mla_qqq_8_16,\
1442               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1443               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1444               cortex_a8_neon_fp_vmla_ddd,\
1445               cortex_a8_neon_fp_vmla_qqq,\
1446               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1447               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1448
1449(define_bypass 3 "cortex_a8_neon_vqneg_vqabs"
1450               "cortex_a8_neon_int_1,\
1451               cortex_a8_neon_int_4,\
1452               cortex_a8_neon_bit_ops_q,\
1453               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1454               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1455               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1456               cortex_a8_neon_mla_qqq_8_16,\
1457               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1458               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1459               cortex_a8_neon_fp_vmla_ddd,\
1460               cortex_a8_neon_fp_vmla_qqq,\
1461               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1462               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1463
1464(define_bypass 3 "cortex_a8_neon_int_5"
1465               "cortex_a8_neon_int_1,\
1466               cortex_a8_neon_int_4,\
1467               cortex_a8_neon_bit_ops_q,\
1468               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1469               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1470               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1471               cortex_a8_neon_mla_qqq_8_16,\
1472               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1473               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1474               cortex_a8_neon_fp_vmla_ddd,\
1475               cortex_a8_neon_fp_vmla_qqq,\
1476               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1477               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1478
1479(define_bypass 3 "cortex_a8_neon_int_4"
1480               "cortex_a8_neon_int_1,\
1481               cortex_a8_neon_int_4,\
1482               cortex_a8_neon_bit_ops_q,\
1483               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1484               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1485               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1486               cortex_a8_neon_mla_qqq_8_16,\
1487               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1488               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1489               cortex_a8_neon_fp_vmla_ddd,\
1490               cortex_a8_neon_fp_vmla_qqq,\
1491               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1492               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1493
1494(define_bypass 2 "cortex_a8_neon_int_3"
1495               "cortex_a8_neon_int_1,\
1496               cortex_a8_neon_int_4,\
1497               cortex_a8_neon_bit_ops_q,\
1498               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1499               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1500               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1501               cortex_a8_neon_mla_qqq_8_16,\
1502               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1503               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1504               cortex_a8_neon_fp_vmla_ddd,\
1505               cortex_a8_neon_fp_vmla_qqq,\
1506               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1507               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1508
1509(define_bypass 2 "cortex_a8_neon_int_2"
1510               "cortex_a8_neon_int_1,\
1511               cortex_a8_neon_int_4,\
1512               cortex_a8_neon_bit_ops_q,\
1513               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1514               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1515               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1516               cortex_a8_neon_mla_qqq_8_16,\
1517               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1518               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1519               cortex_a8_neon_fp_vmla_ddd,\
1520               cortex_a8_neon_fp_vmla_qqq,\
1521               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1522               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1523
1524(define_bypass 2 "cortex_a8_neon_int_1"
1525               "cortex_a8_neon_int_1,\
1526               cortex_a8_neon_int_4,\
1527               cortex_a8_neon_bit_ops_q,\
1528               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1529               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1530               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1531               cortex_a8_neon_mla_qqq_8_16,\
1532               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1533               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1534               cortex_a8_neon_fp_vmla_ddd,\
1535               cortex_a8_neon_fp_vmla_qqq,\
1536               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1537               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1538
1539