1*38fd1498Szrj;; Copyright (C) 2010-2018 Free Software Foundation, Inc. 2*38fd1498Szrj;; 3*38fd1498Szrj;; This file is part of GCC. 4*38fd1498Szrj;; 5*38fd1498Szrj;; GCC is free software; you can redistribute it and/or modify 6*38fd1498Szrj;; it under the terms of the GNU General Public License as published by 7*38fd1498Szrj;; the Free Software Foundation; either version 3, or (at your option) 8*38fd1498Szrj;; any later version. 9*38fd1498Szrj;; 10*38fd1498Szrj;; GCC is distributed in the hope that it will be useful, 11*38fd1498Szrj;; but WITHOUT ANY WARRANTY; without even the implied warranty of 12*38fd1498Szrj;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13*38fd1498Szrj;; GNU General Public License for more details. 14*38fd1498Szrj;; 15*38fd1498Szrj;; You should have received a copy of the GNU General Public License 16*38fd1498Szrj;; along with GCC; see the file COPYING3. If not see 17*38fd1498Szrj;; <http://www.gnu.org/licenses/>. 18*38fd1498Szrj;; 19*38fd1498Szrj;; AMD bdver1 Scheduling 20*38fd1498Szrj;; 21*38fd1498Szrj;; The bdver1 contains four pipelined FP units, two integer units and 22*38fd1498Szrj;; two address generation units. 23*38fd1498Szrj;; 24*38fd1498Szrj;; The predecode logic is determining boundaries of instructions in the 64 25*38fd1498Szrj;; byte cache line. So the cache line straddling problem of K6 might be issue 26*38fd1498Szrj;; here as well, but it is not noted in the documentation. 27*38fd1498Szrj;; 28*38fd1498Szrj;; Three DirectPath instructions decoders and only one VectorPath decoder 29*38fd1498Szrj;; is available. They can decode three DirectPath instructions or one 30*38fd1498Szrj;; VectorPath instruction per cycle. 31*38fd1498Szrj;; 32*38fd1498Szrj;; The load/store queue unit is not attached to the schedulers but 33*38fd1498Szrj;; communicates with all the execution units separately instead. 34*38fd1498Szrj 35*38fd1498Szrj 36*38fd1498Szrj(define_attr "bdver1_decode" "direct,vector,double" 37*38fd1498Szrj (const_string "direct")) 38*38fd1498Szrj 39*38fd1498Szrj(define_automaton "bdver1,bdver1_ieu,bdver1_load,bdver1_fp,bdver1_agu") 40*38fd1498Szrj 41*38fd1498Szrj(define_cpu_unit "bdver1-decode0" "bdver1") 42*38fd1498Szrj(define_cpu_unit "bdver1-decode1" "bdver1") 43*38fd1498Szrj(define_cpu_unit "bdver1-decode2" "bdver1") 44*38fd1498Szrj(define_cpu_unit "bdver1-decodev" "bdver1") 45*38fd1498Szrj 46*38fd1498Szrj;; Model the fact that double decoded instruction may take 2 cycles 47*38fd1498Szrj;; to decode when decoder2 and decoder0 in next cycle 48*38fd1498Szrj;; is used (this is needed to allow throughput of 1.5 double decoded 49*38fd1498Szrj;; instructions per cycle). 50*38fd1498Szrj;; 51*38fd1498Szrj;; In order to avoid dependence between reservation of decoder 52*38fd1498Szrj;; and other units, we model decoder as two stage fully pipelined unit 53*38fd1498Szrj;; and only double decoded instruction may occupy unit in the first cycle. 54*38fd1498Szrj;; With this scheme however two double instructions can be issued cycle0. 55*38fd1498Szrj;; 56*38fd1498Szrj;; Avoid this by using presence set requiring decoder0 to be allocated 57*38fd1498Szrj;; too. Vector decoded instructions then can't be issued when modeled 58*38fd1498Szrj;; as consuming decoder0+decoder1+decoder2. 59*38fd1498Szrj;; We solve that by specialized vector decoder unit and exclusion set. 60*38fd1498Szrj(presence_set "bdver1-decode2" "bdver1-decode0") 61*38fd1498Szrj(exclusion_set "bdver1-decodev" "bdver1-decode0,bdver1-decode1,bdver1-decode2") 62*38fd1498Szrj 63*38fd1498Szrj(define_reservation "bdver1-vector" "nothing,bdver1-decodev") 64*38fd1498Szrj(define_reservation "bdver1-direct1" "nothing,bdver1-decode1") 65*38fd1498Szrj(define_reservation "bdver1-direct" "nothing, 66*38fd1498Szrj (bdver1-decode0 | bdver1-decode1 67*38fd1498Szrj | bdver1-decode2)") 68*38fd1498Szrj;; Double instructions behaves like two direct instructions. 69*38fd1498Szrj(define_reservation "bdver1-double" "((bdver1-decode2,bdver1-decode0) 70*38fd1498Szrj | (nothing,(bdver1-decode0 + bdver1-decode1)) 71*38fd1498Szrj | (nothing,(bdver1-decode1 + bdver1-decode2)))") 72*38fd1498Szrj 73*38fd1498Szrj 74*38fd1498Szrj(define_cpu_unit "bdver1-ieu0" "bdver1_ieu") 75*38fd1498Szrj(define_cpu_unit "bdver1-ieu1" "bdver1_ieu") 76*38fd1498Szrj(define_reservation "bdver1-ieu" "(bdver1-ieu0 | bdver1-ieu1)") 77*38fd1498Szrj 78*38fd1498Szrj(define_cpu_unit "bdver1-agu0" "bdver1_agu") 79*38fd1498Szrj(define_cpu_unit "bdver1-agu1" "bdver1_agu") 80*38fd1498Szrj(define_reservation "bdver1-agu" "(bdver1-agu0 | bdver1-agu1)") 81*38fd1498Szrj 82*38fd1498Szrj(define_cpu_unit "bdver1-load0" "bdver1_load") 83*38fd1498Szrj(define_cpu_unit "bdver1-load1" "bdver1_load") 84*38fd1498Szrj(define_reservation "bdver1-load" "bdver1-agu, 85*38fd1498Szrj (bdver1-load0 | bdver1-load1),nothing") 86*38fd1498Szrj;; 128bit SSE instructions issue two loads at once. 87*38fd1498Szrj(define_reservation "bdver1-load2" "bdver1-agu, 88*38fd1498Szrj (bdver1-load0 + bdver1-load1),nothing") 89*38fd1498Szrj 90*38fd1498Szrj(define_reservation "bdver1-store" "(bdver1-load0 | bdver1-load1)") 91*38fd1498Szrj;; 128bit SSE instructions issue two stores at once. 92*38fd1498Szrj(define_reservation "bdver1-store2" "(bdver1-load0 + bdver1-load1)") 93*38fd1498Szrj 94*38fd1498Szrj;; vectorpath (microcoded) instructions are single issue instructions. 95*38fd1498Szrj;; So, they occupy all the integer units. 96*38fd1498Szrj(define_reservation "bdver1-ivector" "bdver1-ieu0+bdver1-ieu1+ 97*38fd1498Szrj bdver1-agu0+bdver1-agu1+ 98*38fd1498Szrj bdver1-load0+bdver1-load1") 99*38fd1498Szrj 100*38fd1498Szrj;; The FP operations start to execute at stage 12 in the pipeline, while 101*38fd1498Szrj;; integer operations start to execute at stage 9 for athlon and 11 for K8 102*38fd1498Szrj;; Compensate the difference for athlon because it results in significantly 103*38fd1498Szrj;; smaller automata. 104*38fd1498Szrj;; NOTE: the above information was just copied from athlon.md, and was not 105*38fd1498Szrj;; actually verified for bdver1. 106*38fd1498Szrj(define_reservation "bdver1-fpsched" "nothing,nothing,nothing") 107*38fd1498Szrj;; The floating point loads. 108*38fd1498Szrj(define_reservation "bdver1-fpload" "(bdver1-fpsched + bdver1-load)") 109*38fd1498Szrj(define_reservation "bdver1-fpload2" "(bdver1-fpsched + bdver1-load2)") 110*38fd1498Szrj 111*38fd1498Szrj;; Four FP units. 112*38fd1498Szrj(define_cpu_unit "bdver1-ffma0" "bdver1_fp") 113*38fd1498Szrj(define_cpu_unit "bdver1-ffma1" "bdver1_fp") 114*38fd1498Szrj(define_cpu_unit "bdver1-fmal0" "bdver1_fp") 115*38fd1498Szrj(define_cpu_unit "bdver1-fmal1" "bdver1_fp") 116*38fd1498Szrj 117*38fd1498Szrj(define_reservation "bdver1-ffma" "(bdver1-ffma0 | bdver1-ffma1)") 118*38fd1498Szrj(define_reservation "bdver1-fcvt" "bdver1-ffma0") 119*38fd1498Szrj(define_reservation "bdver1-fmma" "bdver1-ffma0") 120*38fd1498Szrj(define_reservation "bdver1-fxbar" "bdver1-ffma1") 121*38fd1498Szrj(define_reservation "bdver1-fmal" "(bdver1-fmal0 | bdver1-fmal1)") 122*38fd1498Szrj(define_reservation "bdver1-fsto" "bdver1-fmal1") 123*38fd1498Szrj 124*38fd1498Szrj;; Vector operations usually consume many of pipes. 125*38fd1498Szrj(define_reservation "bdver1-fvector" "(bdver1-ffma0 + bdver1-ffma1 126*38fd1498Szrj + bdver1-fmal0 + bdver1-fmal1)") 127*38fd1498Szrj 128*38fd1498Szrj;; Jump instructions are executed in the branch unit completely transparent to us. 129*38fd1498Szrj(define_insn_reservation "bdver1_call" 0 130*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 131*38fd1498Szrj (eq_attr "type" "call,callv")) 132*38fd1498Szrj "bdver1-double,bdver1-agu") 133*38fd1498Szrj;; PUSH mem is double path. 134*38fd1498Szrj(define_insn_reservation "bdver1_push" 1 135*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 136*38fd1498Szrj (eq_attr "type" "push")) 137*38fd1498Szrj "bdver1-direct,bdver1-agu,bdver1-store") 138*38fd1498Szrj;; POP r16/mem are double path. 139*38fd1498Szrj(define_insn_reservation "bdver1_pop" 1 140*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 141*38fd1498Szrj (eq_attr "type" "pop")) 142*38fd1498Szrj "bdver1-direct,bdver1-ivector") 143*38fd1498Szrj;; LEAVE no latency info so far, assume same with amdfam10. 144*38fd1498Szrj(define_insn_reservation "bdver1_leave" 3 145*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 146*38fd1498Szrj (eq_attr "type" "leave")) 147*38fd1498Szrj "bdver1-vector,bdver1-ivector") 148*38fd1498Szrj;; LEA executes in AGU unit with 1 cycle latency on BDVER1. 149*38fd1498Szrj(define_insn_reservation "bdver1_lea" 1 150*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 151*38fd1498Szrj (eq_attr "type" "lea")) 152*38fd1498Szrj "bdver1-direct,bdver1-agu") 153*38fd1498Szrj 154*38fd1498Szrj;; MUL executes in special multiplier unit attached to IEU1. 155*38fd1498Szrj(define_insn_reservation "bdver1_imul_DI" 6 156*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 157*38fd1498Szrj (and (eq_attr "type" "imul") 158*38fd1498Szrj (and (eq_attr "mode" "DI") 159*38fd1498Szrj (eq_attr "memory" "none,unknown")))) 160*38fd1498Szrj "bdver1-direct1,bdver1-ieu1") 161*38fd1498Szrj(define_insn_reservation "bdver1_imul" 4 162*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 163*38fd1498Szrj (and (eq_attr "type" "imul") 164*38fd1498Szrj (eq_attr "memory" "none,unknown"))) 165*38fd1498Szrj "bdver1-direct1,bdver1-ieu1") 166*38fd1498Szrj(define_insn_reservation "bdver1_imul_mem_DI" 10 167*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 168*38fd1498Szrj (and (eq_attr "type" "imul") 169*38fd1498Szrj (and (eq_attr "mode" "DI") 170*38fd1498Szrj (eq_attr "memory" "load,both")))) 171*38fd1498Szrj "bdver1-direct1,bdver1-load,bdver1-ieu1") 172*38fd1498Szrj(define_insn_reservation "bdver1_imul_mem" 8 173*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 174*38fd1498Szrj (and (eq_attr "type" "imul") 175*38fd1498Szrj (eq_attr "memory" "load,both"))) 176*38fd1498Szrj "bdver1-direct1,bdver1-load,bdver1-ieu1") 177*38fd1498Szrj 178*38fd1498Szrj;; IDIV cannot execute in parallel with other instructions. Dealing with it 179*38fd1498Szrj;; as with short latency vector instruction is good approximation avoiding 180*38fd1498Szrj;; scheduler from trying too hard to can hide it's latency by overlap with 181*38fd1498Szrj;; other instructions. 182*38fd1498Szrj;; ??? Experiments show that the IDIV can overlap with roughly 6 cycles 183*38fd1498Szrj;; of the other code. 184*38fd1498Szrj(define_insn_reservation "bdver1_idiv" 6 185*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 186*38fd1498Szrj (and (eq_attr "type" "idiv") 187*38fd1498Szrj (eq_attr "memory" "none,unknown"))) 188*38fd1498Szrj "bdver1-vector,(bdver1-ieu0*6+(bdver1-fpsched,bdver1-fvector))") 189*38fd1498Szrj 190*38fd1498Szrj(define_insn_reservation "bdver1_idiv_mem" 10 191*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 192*38fd1498Szrj (and (eq_attr "type" "idiv") 193*38fd1498Szrj (eq_attr "memory" "load,both"))) 194*38fd1498Szrj "bdver1-vector,((bdver1-load,bdver1-ieu0*6)+(bdver1-fpsched,bdver1-fvector))") 195*38fd1498Szrj 196*38fd1498Szrj;; The parallelism of string instructions is not documented. Model it same way 197*38fd1498Szrj;; as IDIV to create smaller automata. This probably does not matter much. 198*38fd1498Szrj;; Using the same heuristics for bdver1 as amdfam10 and K8 with IDIV. 199*38fd1498Szrj(define_insn_reservation "bdver1_str" 6 200*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 201*38fd1498Szrj (and (eq_attr "type" "str") 202*38fd1498Szrj (eq_attr "memory" "load,both,store"))) 203*38fd1498Szrj "bdver1-vector,bdver1-load,bdver1-ieu0*6") 204*38fd1498Szrj 205*38fd1498Szrj;; Integer instructions. 206*38fd1498Szrj(define_insn_reservation "bdver1_idirect" 1 207*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 208*38fd1498Szrj (and (eq_attr "bdver1_decode" "direct") 209*38fd1498Szrj (and (eq_attr "unit" "integer,unknown") 210*38fd1498Szrj (eq_attr "memory" "none,unknown")))) 211*38fd1498Szrj "bdver1-direct,bdver1-ieu") 212*38fd1498Szrj(define_insn_reservation "bdver1_ivector" 2 213*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 214*38fd1498Szrj (and (eq_attr "bdver1_decode" "vector") 215*38fd1498Szrj (and (eq_attr "unit" "integer,unknown") 216*38fd1498Szrj (eq_attr "memory" "none,unknown")))) 217*38fd1498Szrj "bdver1-vector,bdver1-ieu,bdver1-ieu") 218*38fd1498Szrj(define_insn_reservation "bdver1_idirect_loadmov" 4 219*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 220*38fd1498Szrj (and (eq_attr "type" "imov") 221*38fd1498Szrj (eq_attr "memory" "load"))) 222*38fd1498Szrj "bdver1-direct,bdver1-load") 223*38fd1498Szrj(define_insn_reservation "bdver1_idirect_load" 5 224*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 225*38fd1498Szrj (and (eq_attr "bdver1_decode" "direct") 226*38fd1498Szrj (and (eq_attr "unit" "integer,unknown") 227*38fd1498Szrj (eq_attr "memory" "load")))) 228*38fd1498Szrj "bdver1-direct,bdver1-load,bdver1-ieu") 229*38fd1498Szrj(define_insn_reservation "bdver1_ivector_load" 6 230*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 231*38fd1498Szrj (and (eq_attr "bdver1_decode" "vector") 232*38fd1498Szrj (and (eq_attr "unit" "integer,unknown") 233*38fd1498Szrj (eq_attr "memory" "load")))) 234*38fd1498Szrj "bdver1-vector,bdver1-load,bdver1-ieu,bdver1-ieu") 235*38fd1498Szrj(define_insn_reservation "bdver1_idirect_movstore" 4 236*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 237*38fd1498Szrj (and (eq_attr "type" "imov") 238*38fd1498Szrj (eq_attr "memory" "store"))) 239*38fd1498Szrj "bdver1-direct,bdver1-agu,bdver1-store") 240*38fd1498Szrj(define_insn_reservation "bdver1_idirect_both" 4 241*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 242*38fd1498Szrj (and (eq_attr "bdver1_decode" "direct") 243*38fd1498Szrj (and (eq_attr "unit" "integer,unknown") 244*38fd1498Szrj (eq_attr "memory" "both")))) 245*38fd1498Szrj "bdver1-direct,bdver1-load, 246*38fd1498Szrj bdver1-ieu,bdver1-store, 247*38fd1498Szrj bdver1-store") 248*38fd1498Szrj(define_insn_reservation "bdver1_ivector_both" 5 249*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 250*38fd1498Szrj (and (eq_attr "bdver1_decode" "vector") 251*38fd1498Szrj (and (eq_attr "unit" "integer,unknown") 252*38fd1498Szrj (eq_attr "memory" "both")))) 253*38fd1498Szrj "bdver1-vector,bdver1-load, 254*38fd1498Szrj bdver1-ieu, 255*38fd1498Szrj bdver1-ieu, 256*38fd1498Szrj bdver1-store") 257*38fd1498Szrj(define_insn_reservation "bdver1_idirect_store" 4 258*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 259*38fd1498Szrj (and (eq_attr "bdver1_decode" "direct") 260*38fd1498Szrj (and (eq_attr "unit" "integer,unknown") 261*38fd1498Szrj (eq_attr "memory" "store")))) 262*38fd1498Szrj "bdver1-direct,(bdver1-ieu+bdver1-agu), 263*38fd1498Szrj bdver1-store") 264*38fd1498Szrj(define_insn_reservation "bdver1_ivector_store" 5 265*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 266*38fd1498Szrj (and (eq_attr "bdver1_decode" "vector") 267*38fd1498Szrj (and (eq_attr "unit" "integer,unknown") 268*38fd1498Szrj (eq_attr "memory" "store")))) 269*38fd1498Szrj "bdver1-vector,(bdver1-ieu+bdver1-agu),bdver1-ieu, 270*38fd1498Szrj bdver1-store") 271*38fd1498Szrj 272*38fd1498Szrj;; BDVER1 floating point units. 273*38fd1498Szrj(define_insn_reservation "bdver1_fldxf" 13 274*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 275*38fd1498Szrj (and (eq_attr "type" "fmov") 276*38fd1498Szrj (and (eq_attr "memory" "load") 277*38fd1498Szrj (eq_attr "mode" "XF")))) 278*38fd1498Szrj "bdver1-vector,bdver1-fpload2,bdver1-fvector*9") 279*38fd1498Szrj(define_insn_reservation "bdver1_fld" 5 280*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 281*38fd1498Szrj (and (eq_attr "type" "fmov") 282*38fd1498Szrj (eq_attr "memory" "load"))) 283*38fd1498Szrj "bdver1-direct,bdver1-fpload,bdver1-ffma") 284*38fd1498Szrj(define_insn_reservation "bdver1_fstxf" 8 285*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 286*38fd1498Szrj (and (eq_attr "type" "fmov") 287*38fd1498Szrj (and (eq_attr "memory" "store,both") 288*38fd1498Szrj (eq_attr "mode" "XF")))) 289*38fd1498Szrj "bdver1-vector,(bdver1-fpsched+bdver1-agu),(bdver1-store2+(bdver1-fvector*6))") 290*38fd1498Szrj(define_insn_reservation "bdver1_fst" 2 291*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 292*38fd1498Szrj (and (eq_attr "type" "fmov") 293*38fd1498Szrj (eq_attr "memory" "store,both"))) 294*38fd1498Szrj "bdver1-double,(bdver1-fpsched+bdver1-agu),(bdver1-fsto+bdver1-store)") 295*38fd1498Szrj(define_insn_reservation "bdver1_fist" 2 296*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 297*38fd1498Szrj (eq_attr "type" "fistp,fisttp")) 298*38fd1498Szrj "bdver1-double,(bdver1-fpsched+bdver1-agu),(bdver1-fsto+bdver1-store)") 299*38fd1498Szrj(define_insn_reservation "bdver1_fmov_bdver1" 2 300*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 301*38fd1498Szrj (eq_attr "type" "fmov")) 302*38fd1498Szrj "bdver1-direct,bdver1-fpsched,bdver1-ffma") 303*38fd1498Szrj(define_insn_reservation "bdver1_fadd_load" 10 304*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 305*38fd1498Szrj (and (eq_attr "type" "fop") 306*38fd1498Szrj (eq_attr "memory" "load"))) 307*38fd1498Szrj "bdver1-direct,bdver1-fpload,bdver1-ffma") 308*38fd1498Szrj(define_insn_reservation "bdver1_fadd" 6 309*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 310*38fd1498Szrj (eq_attr "type" "fop")) 311*38fd1498Szrj "bdver1-direct,bdver1-fpsched,bdver1-ffma") 312*38fd1498Szrj(define_insn_reservation "bdver1_fmul_load" 10 313*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 314*38fd1498Szrj (and (eq_attr "type" "fmul") 315*38fd1498Szrj (eq_attr "memory" "load"))) 316*38fd1498Szrj "bdver1-double,bdver1-fpload,bdver1-ffma") 317*38fd1498Szrj(define_insn_reservation "bdver1_fmul" 6 318*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 319*38fd1498Szrj (eq_attr "type" "fmul")) 320*38fd1498Szrj "bdver1-direct,bdver1-fpsched,bdver1-ffma") 321*38fd1498Szrj(define_insn_reservation "bdver1_fsgn" 2 322*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 323*38fd1498Szrj (eq_attr "type" "fsgn")) 324*38fd1498Szrj "bdver1-direct,bdver1-fpsched,bdver1-ffma") 325*38fd1498Szrj(define_insn_reservation "bdver1_fdiv_load" 46 326*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 327*38fd1498Szrj (and (eq_attr "type" "fdiv") 328*38fd1498Szrj (eq_attr "memory" "load"))) 329*38fd1498Szrj "bdver1-direct,bdver1-fpload,bdver1-ffma") 330*38fd1498Szrj(define_insn_reservation "bdver1_fdiv" 42 331*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 332*38fd1498Szrj (eq_attr "type" "fdiv")) 333*38fd1498Szrj "bdver1-direct,bdver1-fpsched,bdver1-ffma") 334*38fd1498Szrj(define_insn_reservation "bdver1_fpspc_load" 103 335*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 336*38fd1498Szrj (and (eq_attr "type" "fpspc") 337*38fd1498Szrj (eq_attr "memory" "load"))) 338*38fd1498Szrj "bdver1-vector,bdver1-fpload,bdver1-fvector") 339*38fd1498Szrj(define_insn_reservation "bdver1_fpspc" 100 340*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 341*38fd1498Szrj (and (eq_attr "type" "fpspc") 342*38fd1498Szrj (eq_attr "memory" "load"))) 343*38fd1498Szrj "bdver1-vector,bdver1-fpload,bdver1-fvector") 344*38fd1498Szrj(define_insn_reservation "bdver1_fcmov_load" 17 345*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 346*38fd1498Szrj (and (eq_attr "type" "fcmov") 347*38fd1498Szrj (eq_attr "memory" "load"))) 348*38fd1498Szrj "bdver1-vector,bdver1-fpload,bdver1-fvector") 349*38fd1498Szrj(define_insn_reservation "bdver1_fcmov" 15 350*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 351*38fd1498Szrj (eq_attr "type" "fcmov")) 352*38fd1498Szrj "bdver1-vector,bdver1-fpsched,bdver1-fvector") 353*38fd1498Szrj(define_insn_reservation "bdver1_fcomi_load" 6 354*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 355*38fd1498Szrj (and (eq_attr "type" "fcmp") 356*38fd1498Szrj (and (eq_attr "bdver1_decode" "double") 357*38fd1498Szrj (eq_attr "memory" "load")))) 358*38fd1498Szrj "bdver1-double,bdver1-fpload,(bdver1-ffma | bdver1-fsto)") 359*38fd1498Szrj(define_insn_reservation "bdver1_fcomi" 2 360*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 361*38fd1498Szrj (and (eq_attr "bdver1_decode" "double") 362*38fd1498Szrj (eq_attr "type" "fcmp"))) 363*38fd1498Szrj "bdver1-double,bdver1-fpsched,(bdver1-ffma | bdver1-fsto)") 364*38fd1498Szrj(define_insn_reservation "bdver1_fcom_load" 6 365*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 366*38fd1498Szrj (and (eq_attr "type" "fcmp") 367*38fd1498Szrj (eq_attr "memory" "load"))) 368*38fd1498Szrj "bdver1-direct,bdver1-fpload,bdver1-ffma") 369*38fd1498Szrj(define_insn_reservation "bdver1_fcom" 2 370*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 371*38fd1498Szrj (eq_attr "type" "fcmp")) 372*38fd1498Szrj "bdver1-direct,bdver1-fpsched,bdver1-ffma") 373*38fd1498Szrj(define_insn_reservation "bdver1_fxch" 2 374*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 375*38fd1498Szrj (eq_attr "type" "fxch")) 376*38fd1498Szrj "bdver1-direct,bdver1-fpsched,bdver1-ffma") 377*38fd1498Szrj 378*38fd1498Szrj;; SSE loads. 379*38fd1498Szrj(define_insn_reservation "bdver1_ssevector_avx128_unaligned_load" 4 380*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 381*38fd1498Szrj (and (eq_attr "type" "ssemov") 382*38fd1498Szrj (and (eq_attr "prefix" "vex") 383*38fd1498Szrj (and (eq_attr "movu" "1") 384*38fd1498Szrj (and (eq_attr "mode" "V4SF,V2DF") 385*38fd1498Szrj (eq_attr "memory" "load")))))) 386*38fd1498Szrj "bdver1-direct,bdver1-fpload") 387*38fd1498Szrj(define_insn_reservation "bdver1_ssevector_avx256_unaligned_load" 5 388*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 389*38fd1498Szrj (and (eq_attr "type" "ssemov") 390*38fd1498Szrj (and (eq_attr "movu" "1") 391*38fd1498Szrj (and (eq_attr "mode" "V8SF,V4DF") 392*38fd1498Szrj (eq_attr "memory" "load"))))) 393*38fd1498Szrj "bdver1-double,bdver1-fpload") 394*38fd1498Szrj(define_insn_reservation "bdver1_ssevector_sse128_unaligned_load" 4 395*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 396*38fd1498Szrj (and (eq_attr "type" "ssemov") 397*38fd1498Szrj (and (eq_attr "movu" "1") 398*38fd1498Szrj (and (eq_attr "mode" "V4SF,V2DF") 399*38fd1498Szrj (eq_attr "memory" "load"))))) 400*38fd1498Szrj "bdver1-direct,bdver1-fpload,bdver1-fmal") 401*38fd1498Szrj(define_insn_reservation "bdver1_ssevector_avx128_load" 4 402*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 403*38fd1498Szrj (and (eq_attr "type" "ssemov") 404*38fd1498Szrj (and (eq_attr "prefix" "vex") 405*38fd1498Szrj (and (eq_attr "mode" "V4SF,V2DF,TI") 406*38fd1498Szrj (eq_attr "memory" "load"))))) 407*38fd1498Szrj "bdver1-direct,bdver1-fpload,bdver1-fmal") 408*38fd1498Szrj(define_insn_reservation "bdver1_ssevector_avx256_load" 5 409*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 410*38fd1498Szrj (and (eq_attr "type" "ssemov") 411*38fd1498Szrj (and (eq_attr "mode" "V8SF,V4DF,OI") 412*38fd1498Szrj (eq_attr "memory" "load")))) 413*38fd1498Szrj "bdver1-double,bdver1-fpload,bdver1-fmal") 414*38fd1498Szrj(define_insn_reservation "bdver1_ssevector_sse128_load" 4 415*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 416*38fd1498Szrj (and (eq_attr "type" "ssemov") 417*38fd1498Szrj (and (eq_attr "mode" "V4SF,V2DF,TI") 418*38fd1498Szrj (eq_attr "memory" "load")))) 419*38fd1498Szrj "bdver1-direct,bdver1-fpload") 420*38fd1498Szrj(define_insn_reservation "bdver1_ssescalar_movq_load" 4 421*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 422*38fd1498Szrj (and (eq_attr "type" "ssemov") 423*38fd1498Szrj (and (eq_attr "mode" "DI") 424*38fd1498Szrj (eq_attr "memory" "load")))) 425*38fd1498Szrj "bdver1-direct,bdver1-fpload,bdver1-fmal") 426*38fd1498Szrj(define_insn_reservation "bdver1_ssescalar_vmovss_load" 4 427*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 428*38fd1498Szrj (and (eq_attr "type" "ssemov") 429*38fd1498Szrj (and (eq_attr "prefix" "vex") 430*38fd1498Szrj (and (eq_attr "mode" "SF") 431*38fd1498Szrj (eq_attr "memory" "load"))))) 432*38fd1498Szrj "bdver1-direct,bdver1-fpload") 433*38fd1498Szrj(define_insn_reservation "bdver1_ssescalar_sse128_load" 4 434*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 435*38fd1498Szrj (and (eq_attr "type" "ssemov") 436*38fd1498Szrj (and (eq_attr "mode" "SF,DF") 437*38fd1498Szrj (eq_attr "memory" "load")))) 438*38fd1498Szrj "bdver1-direct,bdver1-fpload, bdver1-ffma") 439*38fd1498Szrj(define_insn_reservation "bdver1_mmxsse_load" 4 440*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 441*38fd1498Szrj (and (eq_attr "type" "mmxmov,ssemov") 442*38fd1498Szrj (eq_attr "memory" "load"))) 443*38fd1498Szrj "bdver1-direct,bdver1-fpload, bdver1-fmal") 444*38fd1498Szrj 445*38fd1498Szrj;; SSE stores. 446*38fd1498Szrj(define_insn_reservation "bdver1_sse_store_avx256" 5 447*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 448*38fd1498Szrj (and (eq_attr "type" "ssemov") 449*38fd1498Szrj (and (eq_attr "mode" "V8SF,V4DF,OI") 450*38fd1498Szrj (eq_attr "memory" "store,both")))) 451*38fd1498Szrj "bdver1-double,(bdver1-fpsched+bdver1-agu),((bdver1-fsto+bdver1-store)*2)") 452*38fd1498Szrj(define_insn_reservation "bdver1_sse_store" 4 453*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 454*38fd1498Szrj (and (eq_attr "type" "ssemov") 455*38fd1498Szrj (and (eq_attr "mode" "V4SF,V2DF,TI") 456*38fd1498Szrj (eq_attr "memory" "store,both")))) 457*38fd1498Szrj "bdver1-direct,(bdver1-fpsched+bdver1-agu),((bdver1-fsto+bdver1-store)*2)") 458*38fd1498Szrj(define_insn_reservation "bdver1_mmxsse_store_short" 4 459*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 460*38fd1498Szrj (and (eq_attr "type" "mmxmov,ssemov") 461*38fd1498Szrj (eq_attr "memory" "store,both"))) 462*38fd1498Szrj "bdver1-direct,(bdver1-fpsched+bdver1-agu),(bdver1-fsto+bdver1-store)") 463*38fd1498Szrj 464*38fd1498Szrj;; Register moves. 465*38fd1498Szrj(define_insn_reservation "bdver1_ssevector_avx256" 3 466*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 467*38fd1498Szrj (and (eq_attr "type" "ssemov") 468*38fd1498Szrj (and (eq_attr "mode" "V8SF,V4DF,OI") 469*38fd1498Szrj (eq_attr "memory" "none")))) 470*38fd1498Szrj "bdver1-double,bdver1-fpsched,bdver1-fmal") 471*38fd1498Szrj(define_insn_reservation "bdver1_movss_movsd" 2 472*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 473*38fd1498Szrj (and (eq_attr "type" "ssemov") 474*38fd1498Szrj (and (eq_attr "mode" "SF,DF") 475*38fd1498Szrj (eq_attr "memory" "none")))) 476*38fd1498Szrj "bdver1-direct,bdver1-fpsched,bdver1-ffma") 477*38fd1498Szrj(define_insn_reservation "bdver1_mmxssemov" 2 478*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 479*38fd1498Szrj (and (eq_attr "type" "mmxmov,ssemov") 480*38fd1498Szrj (eq_attr "memory" "none"))) 481*38fd1498Szrj "bdver1-direct,bdver1-fpsched,bdver1-fmal") 482*38fd1498Szrj;; SSE logs. 483*38fd1498Szrj(define_insn_reservation "bdver1_sselog_load_256" 7 484*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 485*38fd1498Szrj (and (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1") 486*38fd1498Szrj (and (eq_attr "mode" "V8SF") 487*38fd1498Szrj (eq_attr "memory" "load")))) 488*38fd1498Szrj "bdver1-double,bdver1-fpload,bdver1-fmal") 489*38fd1498Szrj(define_insn_reservation "bdver1_sselog_256" 3 490*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 491*38fd1498Szrj (and (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1") 492*38fd1498Szrj (eq_attr "mode" "V8SF"))) 493*38fd1498Szrj "bdver1-double,bdver1-fpsched,bdver1-fmal") 494*38fd1498Szrj(define_insn_reservation "bdver1_sselog_load" 6 495*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 496*38fd1498Szrj (and (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1") 497*38fd1498Szrj (eq_attr "memory" "load"))) 498*38fd1498Szrj "bdver1-direct,bdver1-fpload,bdver1-fxbar") 499*38fd1498Szrj(define_insn_reservation "bdver1_sselog" 2 500*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 501*38fd1498Szrj (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1")) 502*38fd1498Szrj "bdver1-direct,bdver1-fpsched,bdver1-fxbar") 503*38fd1498Szrj 504*38fd1498Szrj;; PCMP actually executes in FMAL. 505*38fd1498Szrj(define_insn_reservation "bdver1_ssecmp_load" 6 506*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 507*38fd1498Szrj (and (eq_attr "type" "ssecmp") 508*38fd1498Szrj (eq_attr "memory" "load"))) 509*38fd1498Szrj "bdver1-direct,bdver1-fpload,bdver1-ffma") 510*38fd1498Szrj(define_insn_reservation "bdver1_ssecmp" 2 511*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 512*38fd1498Szrj (eq_attr "type" "ssecmp")) 513*38fd1498Szrj "bdver1-direct,bdver1-fpsched,bdver1-ffma") 514*38fd1498Szrj(define_insn_reservation "bdver1_ssecomi_load" 6 515*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 516*38fd1498Szrj (and (eq_attr "type" "ssecomi") 517*38fd1498Szrj (eq_attr "memory" "load"))) 518*38fd1498Szrj "bdver1-double,bdver1-fpload,(bdver1-ffma | bdver1-fsto)") 519*38fd1498Szrj(define_insn_reservation "bdver1_ssecomi" 2 520*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 521*38fd1498Szrj (eq_attr "type" "ssecomi")) 522*38fd1498Szrj "bdver1-double,bdver1-fpsched,(bdver1-ffma | bdver1-fsto)") 523*38fd1498Szrj 524*38fd1498Szrj;; Conversions behaves very irregularly and the scheduling is critical here. 525*38fd1498Szrj;; Take each instruction separately. 526*38fd1498Szrj 527*38fd1498Szrj;; 256 bit conversion. 528*38fd1498Szrj(define_insn_reservation "bdver1_vcvtX2Y_avx256_load" 8 529*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 530*38fd1498Szrj (and (eq_attr "type" "ssecvt") 531*38fd1498Szrj (and (eq_attr "memory" "load") 532*38fd1498Szrj (ior (ior (match_operand:V4DF 0 "register_operand") 533*38fd1498Szrj (ior (match_operand:V8SF 0 "register_operand") 534*38fd1498Szrj (match_operand:V8SI 0 "register_operand"))) 535*38fd1498Szrj (ior (match_operand:V4DF 1 "nonimmediate_operand") 536*38fd1498Szrj (ior (match_operand:V8SF 1 "nonimmediate_operand") 537*38fd1498Szrj (match_operand:V8SI 1 "nonimmediate_operand"))))))) 538*38fd1498Szrj "bdver1-vector,bdver1-fpload,bdver1-fvector") 539*38fd1498Szrj(define_insn_reservation "bdver1_vcvtX2Y_avx256" 4 540*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 541*38fd1498Szrj (and (eq_attr "type" "ssecvt") 542*38fd1498Szrj (and (eq_attr "memory" "none") 543*38fd1498Szrj (ior (ior (match_operand:V4DF 0 "register_operand") 544*38fd1498Szrj (ior (match_operand:V8SF 0 "register_operand") 545*38fd1498Szrj (match_operand:V8SI 0 "register_operand"))) 546*38fd1498Szrj (ior (match_operand:V4DF 1 "nonimmediate_operand") 547*38fd1498Szrj (ior (match_operand:V8SF 1 "nonimmediate_operand") 548*38fd1498Szrj (match_operand:V8SI 1 "nonimmediate_operand"))))))) 549*38fd1498Szrj "bdver1-vector,bdver1-fpsched,bdver1-fvector") 550*38fd1498Szrj;; CVTSS2SD, CVTSD2SS. 551*38fd1498Szrj(define_insn_reservation "bdver1_ssecvt_cvtss2sd_load" 8 552*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 553*38fd1498Szrj (and (eq_attr "type" "ssecvt") 554*38fd1498Szrj (and (eq_attr "mode" "SF,DF") 555*38fd1498Szrj (eq_attr "memory" "load")))) 556*38fd1498Szrj "bdver1-direct,bdver1-fpload,bdver1-fcvt") 557*38fd1498Szrj(define_insn_reservation "bdver1_ssecvt_cvtss2sd" 4 558*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 559*38fd1498Szrj (and (eq_attr "type" "ssecvt") 560*38fd1498Szrj (and (eq_attr "mode" "SF,DF") 561*38fd1498Szrj (eq_attr "memory" "none")))) 562*38fd1498Szrj "bdver1-direct,bdver1-fpsched,bdver1-fcvt") 563*38fd1498Szrj;; CVTSI2SD, CVTSI2SS, CVTSI2SDQ, CVTSI2SSQ. 564*38fd1498Szrj(define_insn_reservation "bdver1_sseicvt_cvtsi2sd_load" 8 565*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 566*38fd1498Szrj (and (eq_attr "type" "sseicvt") 567*38fd1498Szrj (and (eq_attr "mode" "SF,DF") 568*38fd1498Szrj (eq_attr "memory" "load")))) 569*38fd1498Szrj "bdver1-direct,bdver1-fpload,bdver1-fcvt") 570*38fd1498Szrj(define_insn_reservation "bdver1_sseicvt_cvtsi2sd" 4 571*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 572*38fd1498Szrj (and (eq_attr "type" "sseicvt") 573*38fd1498Szrj (and (eq_attr "mode" "SF,DF") 574*38fd1498Szrj (eq_attr "memory" "none")))) 575*38fd1498Szrj "bdver1-double,bdver1-fpsched,(nothing | bdver1-fcvt)") 576*38fd1498Szrj;; CVTPD2PS. 577*38fd1498Szrj(define_insn_reservation "bdver1_ssecvt_cvtpd2ps_load" 8 578*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 579*38fd1498Szrj (and (eq_attr "type" "ssecvt") 580*38fd1498Szrj (and (eq_attr "memory" "load") 581*38fd1498Szrj (and (match_operand:V4SF 0 "register_operand") 582*38fd1498Szrj (match_operand:V2DF 1 "nonimmediate_operand"))))) 583*38fd1498Szrj "bdver1-double,bdver1-fpload,(bdver1-fxbar | bdver1-fcvt)") 584*38fd1498Szrj(define_insn_reservation "bdver1_ssecvt_cvtpd2ps" 4 585*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 586*38fd1498Szrj (and (eq_attr "type" "ssecvt") 587*38fd1498Szrj (and (eq_attr "memory" "none") 588*38fd1498Szrj (and (match_operand:V4SF 0 "register_operand") 589*38fd1498Szrj (match_operand:V2DF 1 "nonimmediate_operand"))))) 590*38fd1498Szrj "bdver1-double,bdver1-fpsched,(bdver1-fxbar | bdver1-fcvt)") 591*38fd1498Szrj;; CVTPI2PS, CVTDQ2PS. 592*38fd1498Szrj(define_insn_reservation "bdver1_ssecvt_cvtdq2ps_load" 8 593*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 594*38fd1498Szrj (and (eq_attr "type" "ssecvt") 595*38fd1498Szrj (and (eq_attr "memory" "load") 596*38fd1498Szrj (and (match_operand:V4SF 0 "register_operand") 597*38fd1498Szrj (ior (match_operand:V2SI 1 "nonimmediate_operand") 598*38fd1498Szrj (match_operand:V4SI 1 "nonimmediate_operand")))))) 599*38fd1498Szrj "bdver1-direct,bdver1-fpload,bdver1-fcvt") 600*38fd1498Szrj(define_insn_reservation "bdver1_ssecvt_cvtdq2ps" 4 601*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 602*38fd1498Szrj (and (eq_attr "type" "ssecvt") 603*38fd1498Szrj (and (eq_attr "memory" "none") 604*38fd1498Szrj (and (match_operand:V4SF 0 "register_operand") 605*38fd1498Szrj (ior (match_operand:V2SI 1 "nonimmediate_operand") 606*38fd1498Szrj (match_operand:V4SI 1 "nonimmediate_operand")))))) 607*38fd1498Szrj "bdver1-direct,bdver1-fpsched,bdver1-fcvt") 608*38fd1498Szrj;; CVTDQ2PD. 609*38fd1498Szrj(define_insn_reservation "bdver1_ssecvt_cvtdq2pd_load" 8 610*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 611*38fd1498Szrj (and (eq_attr "type" "ssecvt") 612*38fd1498Szrj (and (eq_attr "memory" "load") 613*38fd1498Szrj (and (match_operand:V2DF 0 "register_operand") 614*38fd1498Szrj (match_operand:V4SI 1 "nonimmediate_operand"))))) 615*38fd1498Szrj "bdver1-double,bdver1-fpload,(bdver1-fxbar | bdver1-fcvt)") 616*38fd1498Szrj(define_insn_reservation "bdver1_ssecvt_cvtdq2pd" 4 617*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 618*38fd1498Szrj (and (eq_attr "type" "ssecvt") 619*38fd1498Szrj (and (eq_attr "memory" "none") 620*38fd1498Szrj (and (match_operand:V2DF 0 "register_operand") 621*38fd1498Szrj (match_operand:V4SI 1 "nonimmediate_operand"))))) 622*38fd1498Szrj "bdver1-double,bdver1-fpsched,(bdver1-fxbar | bdver1-fcvt)") 623*38fd1498Szrj;; CVTPS2PD, CVTPI2PD. 624*38fd1498Szrj(define_insn_reservation "bdver1_ssecvt_cvtps2pd_load" 6 625*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 626*38fd1498Szrj (and (eq_attr "type" "ssecvt") 627*38fd1498Szrj (and (eq_attr "memory" "load") 628*38fd1498Szrj (and (match_operand:V2DF 0 "register_operand") 629*38fd1498Szrj (ior (match_operand:V2SI 1 "nonimmediate_operand") 630*38fd1498Szrj (match_operand:V4SF 1 "nonimmediate_operand")))))) 631*38fd1498Szrj "bdver1-double,bdver1-fpload,(bdver1-fxbar | bdver1-fcvt)") 632*38fd1498Szrj(define_insn_reservation "bdver1_ssecvt_cvtps2pd" 2 633*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 634*38fd1498Szrj (and (eq_attr "type" "ssecvt") 635*38fd1498Szrj (and (eq_attr "memory" "load") 636*38fd1498Szrj (and (match_operand:V2DF 0 "register_operand") 637*38fd1498Szrj (ior (match_operand:V2SI 1 "nonimmediate_operand") 638*38fd1498Szrj (match_operand:V4SF 1 "nonimmediate_operand")))))) 639*38fd1498Szrj "bdver1-double,bdver1-fpsched,(bdver1-fxbar | bdver1-fcvt)") 640*38fd1498Szrj;; CVTSD2SI, CVTSD2SIQ, CVTSS2SI, CVTSS2SIQ, CVTTSD2SI, CVTTSD2SIQ, CVTTSS2SI, CVTTSS2SIQ. 641*38fd1498Szrj(define_insn_reservation "bdver1_ssecvt_cvtsX2si_load" 8 642*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 643*38fd1498Szrj (and (eq_attr "type" "sseicvt") 644*38fd1498Szrj (and (eq_attr "mode" "SI,DI") 645*38fd1498Szrj (eq_attr "memory" "load")))) 646*38fd1498Szrj "bdver1-double,bdver1-fpload,(bdver1-fcvt | bdver1-fsto)") 647*38fd1498Szrj(define_insn_reservation "bdver1_ssecvt_cvtsX2si" 4 648*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 649*38fd1498Szrj (and (eq_attr "type" "sseicvt") 650*38fd1498Szrj (and (eq_attr "mode" "SI,DI") 651*38fd1498Szrj (eq_attr "memory" "none")))) 652*38fd1498Szrj "bdver1-double,bdver1-fpsched,(bdver1-fcvt | bdver1-fsto)") 653*38fd1498Szrj;; CVTPD2PI, CVTTPD2PI. 654*38fd1498Szrj(define_insn_reservation "bdver1_ssecvt_cvtpd2pi_load" 8 655*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 656*38fd1498Szrj (and (eq_attr "type" "ssecvt") 657*38fd1498Szrj (and (eq_attr "memory" "load") 658*38fd1498Szrj (and (match_operand:V2DF 1 "nonimmediate_operand") 659*38fd1498Szrj (match_operand:V2SI 0 "register_operand"))))) 660*38fd1498Szrj "bdver1-double,bdver1-fpload,(bdver1-fcvt | bdver1-fxbar)") 661*38fd1498Szrj(define_insn_reservation "bdver1_ssecvt_cvtpd2pi" 4 662*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 663*38fd1498Szrj (and (eq_attr "type" "ssecvt") 664*38fd1498Szrj (and (eq_attr "memory" "none") 665*38fd1498Szrj (and (match_operand:V2DF 1 "nonimmediate_operand") 666*38fd1498Szrj (match_operand:V2SI 0 "register_operand"))))) 667*38fd1498Szrj "bdver1-double,bdver1-fpsched,(bdver1-fcvt | bdver1-fxbar)") 668*38fd1498Szrj;; CVTPD2DQ, CVTTPD2DQ. 669*38fd1498Szrj(define_insn_reservation "bdver1_ssecvt_cvtpd2dq_load" 6 670*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 671*38fd1498Szrj (and (eq_attr "type" "ssecvt") 672*38fd1498Szrj (and (eq_attr "memory" "load") 673*38fd1498Szrj (and (match_operand:V2DF 1 "nonimmediate_operand") 674*38fd1498Szrj (match_operand:V4SI 0 "register_operand"))))) 675*38fd1498Szrj "bdver1-double,bdver1-fpload,(bdver1-fcvt | bdver1-fxbar)") 676*38fd1498Szrj(define_insn_reservation "bdver1_ssecvt_cvtpd2dq" 2 677*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 678*38fd1498Szrj (and (eq_attr "type" "ssecvt") 679*38fd1498Szrj (and (eq_attr "memory" "none") 680*38fd1498Szrj (and (match_operand:V2DF 1 "nonimmediate_operand") 681*38fd1498Szrj (match_operand:V4SI 0 "register_operand"))))) 682*38fd1498Szrj "bdver1-double,bdver1-fpsched,(bdver1-fcvt | bdver1-fxbar)") 683*38fd1498Szrj;; CVTPS2PI, CVTTPS2PI, CVTPS2DQ, CVTTPS2DQ. 684*38fd1498Szrj(define_insn_reservation "bdver1_ssecvt_cvtps2pi_load" 8 685*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 686*38fd1498Szrj (and (eq_attr "type" "ssecvt") 687*38fd1498Szrj (and (eq_attr "memory" "load") 688*38fd1498Szrj (and (match_operand:V4SF 1 "nonimmediate_operand") 689*38fd1498Szrj (ior (match_operand: V2SI 0 "register_operand") 690*38fd1498Szrj (match_operand: V4SI 0 "register_operand")))))) 691*38fd1498Szrj "bdver1-direct,bdver1-fpload,bdver1-fcvt") 692*38fd1498Szrj(define_insn_reservation "bdver1_ssecvt_cvtps2pi" 4 693*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 694*38fd1498Szrj (and (eq_attr "type" "ssecvt") 695*38fd1498Szrj (and (eq_attr "memory" "none") 696*38fd1498Szrj (and (match_operand:V4SF 1 "nonimmediate_operand") 697*38fd1498Szrj (ior (match_operand: V2SI 0 "register_operand") 698*38fd1498Szrj (match_operand: V4SI 0 "register_operand")))))) 699*38fd1498Szrj "bdver1-direct,bdver1-fpsched,bdver1-fcvt") 700*38fd1498Szrj 701*38fd1498Szrj;; SSE MUL, ADD, and MULADD. 702*38fd1498Szrj(define_insn_reservation "bdver1_ssemuladd_load_256" 11 703*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 704*38fd1498Szrj (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd") 705*38fd1498Szrj (and (eq_attr "mode" "V8SF,V4DF") 706*38fd1498Szrj (eq_attr "memory" "load")))) 707*38fd1498Szrj "bdver1-double,bdver1-fpload,bdver1-ffma") 708*38fd1498Szrj(define_insn_reservation "bdver1_ssemuladd_256" 7 709*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 710*38fd1498Szrj (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd") 711*38fd1498Szrj (and (eq_attr "mode" "V8SF,V4DF") 712*38fd1498Szrj (eq_attr "memory" "none")))) 713*38fd1498Szrj "bdver1-double,bdver1-fpsched,bdver1-ffma") 714*38fd1498Szrj(define_insn_reservation "bdver1_ssemuladd_load" 10 715*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 716*38fd1498Szrj (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd") 717*38fd1498Szrj (eq_attr "memory" "load"))) 718*38fd1498Szrj "bdver1-direct,bdver1-fpload,bdver1-ffma") 719*38fd1498Szrj(define_insn_reservation "bdver1_ssemuladd" 6 720*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 721*38fd1498Szrj (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd") 722*38fd1498Szrj (eq_attr "memory" "none"))) 723*38fd1498Szrj "bdver1-direct,bdver1-fpsched,bdver1-ffma") 724*38fd1498Szrj(define_insn_reservation "bdver1_sseimul_load" 8 725*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 726*38fd1498Szrj (and (eq_attr "type" "sseimul") 727*38fd1498Szrj (eq_attr "memory" "load"))) 728*38fd1498Szrj "bdver1-direct,bdver1-fpload,bdver1-fmma") 729*38fd1498Szrj(define_insn_reservation "bdver1_sseimul" 4 730*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 731*38fd1498Szrj (and (eq_attr "type" "sseimul") 732*38fd1498Szrj (eq_attr "memory" "none"))) 733*38fd1498Szrj "bdver1-direct,bdver1-fpsched,bdver1-fmma") 734*38fd1498Szrj(define_insn_reservation "bdver1_sseiadd_load" 6 735*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 736*38fd1498Szrj (and (eq_attr "type" "sseiadd") 737*38fd1498Szrj (eq_attr "memory" "load"))) 738*38fd1498Szrj "bdver1-direct,bdver1-fpload,bdver1-fmal") 739*38fd1498Szrj(define_insn_reservation "bdver1_sseiadd" 2 740*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 741*38fd1498Szrj (and (eq_attr "type" "sseiadd") 742*38fd1498Szrj (eq_attr "memory" "none"))) 743*38fd1498Szrj "bdver1-direct,bdver1-fpsched,bdver1-fmal") 744*38fd1498Szrj 745*38fd1498Szrj;; SSE DIV: no throughput information (assume same as amdfam10). 746*38fd1498Szrj(define_insn_reservation "bdver1_ssediv_double_load_256" 31 747*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 748*38fd1498Szrj (and (eq_attr "type" "ssediv") 749*38fd1498Szrj (and (eq_attr "mode" "V4DF") 750*38fd1498Szrj (eq_attr "memory" "load")))) 751*38fd1498Szrj "bdver1-double,bdver1-fpload,(bdver1-ffma0*17 | bdver1-ffma1*17)") 752*38fd1498Szrj(define_insn_reservation "bdver1_ssediv_double_256" 27 753*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 754*38fd1498Szrj (and (eq_attr "type" "ssediv") 755*38fd1498Szrj (and (eq_attr "mode" "V4DF") 756*38fd1498Szrj (eq_attr "memory" "none")))) 757*38fd1498Szrj "bdver1-double,bdver1-fpsched,(bdver1-ffma0*17 | bdver1-ffma1*17)") 758*38fd1498Szrj(define_insn_reservation "bdver1_ssediv_single_load_256" 28 759*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 760*38fd1498Szrj (and (eq_attr "type" "ssediv") 761*38fd1498Szrj (and (eq_attr "mode" "V8SF") 762*38fd1498Szrj (eq_attr "memory" "load")))) 763*38fd1498Szrj "bdver1-double,bdver1-fpload,(bdver1-ffma0*17 | bdver1-ffma1*17)") 764*38fd1498Szrj(define_insn_reservation "bdver1_ssediv_single_256" 24 765*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 766*38fd1498Szrj (and (eq_attr "type" "ssediv") 767*38fd1498Szrj (and (eq_attr "mode" "V8SF") 768*38fd1498Szrj (eq_attr "memory" "none")))) 769*38fd1498Szrj "bdver1-double,bdver1-fpsched,(bdver1-ffma0*17 | bdver1-ffma1*17)") 770*38fd1498Szrj(define_insn_reservation "bdver1_ssediv_double_load" 31 771*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 772*38fd1498Szrj (and (eq_attr "type" "ssediv") 773*38fd1498Szrj (and (eq_attr "mode" "DF,V2DF") 774*38fd1498Szrj (eq_attr "memory" "load")))) 775*38fd1498Szrj "bdver1-direct,bdver1-fpload,(bdver1-ffma0*17 | bdver1-ffma1*17)") 776*38fd1498Szrj(define_insn_reservation "bdver1_ssediv_double" 27 777*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 778*38fd1498Szrj (and (eq_attr "type" "ssediv") 779*38fd1498Szrj (and (eq_attr "mode" "DF,V2DF") 780*38fd1498Szrj (eq_attr "memory" "none")))) 781*38fd1498Szrj "bdver1-direct,bdver1-fpsched,(bdver1-ffma0*17 | bdver1-ffma1*17)") 782*38fd1498Szrj(define_insn_reservation "bdver1_ssediv_single_load" 28 783*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 784*38fd1498Szrj (and (eq_attr "type" "ssediv") 785*38fd1498Szrj (and (eq_attr "mode" "SF,V4SF") 786*38fd1498Szrj (eq_attr "memory" "load")))) 787*38fd1498Szrj "bdver1-direct,bdver1-fpload,(bdver1-ffma0*17 | bdver1-ffma1*17)") 788*38fd1498Szrj(define_insn_reservation "bdver1_ssediv_single" 24 789*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 790*38fd1498Szrj (and (eq_attr "type" "ssediv") 791*38fd1498Szrj (and (eq_attr "mode" "SF,V4SF") 792*38fd1498Szrj (eq_attr "memory" "none")))) 793*38fd1498Szrj "bdver1-direct,bdver1-fpsched,(bdver1-ffma0*17 | bdver1-ffma1*17)") 794*38fd1498Szrj 795*38fd1498Szrj(define_insn_reservation "bdver1_sseins" 3 796*38fd1498Szrj (and (eq_attr "cpu" "bdver1,bdver2") 797*38fd1498Szrj (and (eq_attr "type" "sseins") 798*38fd1498Szrj (eq_attr "mode" "TI"))) 799*38fd1498Szrj "bdver1-direct,bdver1-fpsched,bdver1-fxbar") 800*38fd1498Szrj 801