1*38fd1498Szrj;; Scheduling for Haswell and derived processors. 2*38fd1498Szrj;; Copyright (C) 2004-2018 Free Software Foundation, Inc. 3*38fd1498Szrj;; 4*38fd1498Szrj;; This file is part of GCC. 5*38fd1498Szrj;; 6*38fd1498Szrj;; GCC is free software; you can redistribute it and/or modify 7*38fd1498Szrj;; it under the terms of the GNU General Public License as published by 8*38fd1498Szrj;; the Free Software Foundation; either version 3, or (at your option) 9*38fd1498Szrj;; any later version. 10*38fd1498Szrj;; 11*38fd1498Szrj;; GCC is distributed in the hope that it will be useful, 12*38fd1498Szrj;; but WITHOUT ANY WARRANTY; without even the implied warranty of 13*38fd1498Szrj;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14*38fd1498Szrj;; GNU General Public License for more details. 15*38fd1498Szrj;; 16*38fd1498Szrj;; You should have received a copy of the GNU General Public License 17*38fd1498Szrj;; along with GCC; see the file COPYING3. If not see 18*38fd1498Szrj;; <http://www.gnu.org/licenses/>. */ 19*38fd1498Szrj 20*38fd1498Szrj;; The scheduling description in this file is based on core2.md. 21*38fd1498Szrj;; The major difference from the CORE2 pipeline is that HASWELL has 22*38fd1498Szrj;; two MU for load and one MU for store. 23*38fd1498Szrj(define_automaton "haswell_decoder,haswell_core,haswell_idiv,haswell_fdiv,haswell_ssediv,haswell_load,haswell_store") 24*38fd1498Szrj 25*38fd1498Szrj;; The CPU domain, used for HASWELL bypass latencies 26*38fd1498Szrj(define_attr "hsw_domain" "int,float,simd" 27*38fd1498Szrj (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint") 28*38fd1498Szrj (const_string "float") 29*38fd1498Szrj (eq_attr "type" "sselog,sselog1,sseiadd,sseiadd1,sseishft,sseishft1,sseimul, 30*38fd1498Szrj sse,ssemov,sseadd,sseadd1,ssemul,ssecmp,ssecomi,ssecvt, 31*38fd1498Szrj ssecvt1,sseicvt,ssediv,sseins,ssemuladd,sse4arg") 32*38fd1498Szrj (cond [(eq_attr "mode" "V4DF,V8SF,V2DF,V4SF,SF,DF") 33*38fd1498Szrj (const_string "float") 34*38fd1498Szrj (eq_attr "mode" "SI") 35*38fd1498Szrj (const_string "int")] 36*38fd1498Szrj (const_string "simd")) 37*38fd1498Szrj (eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft") 38*38fd1498Szrj (const_string "simd")] 39*38fd1498Szrj (const_string "int"))) 40*38fd1498Szrj 41*38fd1498Szrj(define_cpu_unit "hsw_decoder0" "haswell_decoder") 42*38fd1498Szrj(define_cpu_unit "hsw_decoder1" "haswell_decoder") 43*38fd1498Szrj(define_cpu_unit "hsw_decoder2" "haswell_decoder") 44*38fd1498Szrj(define_cpu_unit "hsw_decoder3" "haswell_decoder") 45*38fd1498Szrj 46*38fd1498Szrj;; We first wish to find an instruction for hsw_decoder0, so exclude 47*38fd1498Szrj;; other hsw_decoders from being reserved until hsw_decoder0 is 48*38fd1498Szrj;; reserved. 49*38fd1498Szrj(presence_set "hsw_decoder1" "hsw_decoder0") 50*38fd1498Szrj(presence_set "hsw_decoder2" "hsw_decoder0") 51*38fd1498Szrj(presence_set "hsw_decoder3" "hsw_decoder0") 52*38fd1498Szrj 53*38fd1498Szrj;; Most instructions can be decoded on any of the three decoders. 54*38fd1498Szrj(define_reservation "hsw_decodern" "(hsw_decoder0|hsw_decoder1|hsw_decoder2|hsw_decoder3)") 55*38fd1498Szrj 56*38fd1498Szrj;; The out-of-order core has eight pipelines. These are similar to the 57*38fd1498Szrj;; Pentium Pro's five pipelines. Port 2,3 are responsible for memory loads, 58*38fd1498Szrj;; port 7 for store address calculations, port 4 for memory stores, and 59*38fd1498Szrj;; ports 0, 1, 5 and 6 for everything else. 60*38fd1498Szrj 61*38fd1498Szrj(define_cpu_unit "hsw_p0,hsw_p1,hsw_p5,hsw_p6" "haswell_core") 62*38fd1498Szrj(define_cpu_unit "hsw_p2,hsw_p3" "haswell_load") 63*38fd1498Szrj(define_cpu_unit "hsw_p4,hsw_p7" "haswell_store") 64*38fd1498Szrj(define_cpu_unit "hsw_idiv" "haswell_idiv") 65*38fd1498Szrj(define_cpu_unit "hsw_fdiv" "haswell_fdiv") 66*38fd1498Szrj(define_cpu_unit "hsw_ssediv" "haswell_ssediv") 67*38fd1498Szrj 68*38fd1498Szrj(define_reservation "hsw_p0156" "hsw_p0|hsw_p1|hsw_p5|hsw_p6") 69*38fd1498Szrj(define_reservation "hsw_p0p1p5p6" "hsw_p0+hsw_p1+hsw_p5+hsw_p6") 70*38fd1498Szrj(define_reservation "hsw_p23" "hsw_p2|hsw_p3") 71*38fd1498Szrj(define_reservation "hsw_p4p7" "hsw_p4+hsw_p7") 72*38fd1498Szrj(define_reservation "hsw_p237" "hsw_p2|hsw_p3|hsw_p7") 73*38fd1498Szrj(define_reservation "hsw_p015" "hsw_p0|hsw_p1|hsw_p5") 74*38fd1498Szrj(define_reservation "hsw_p01" "hsw_p0|hsw_p1") 75*38fd1498Szrj 76*38fd1498Szrj(define_insn_reservation "hsw_complex_insn" 6 77*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 78*38fd1498Szrj (eq_attr "type" "other,multi,str")) 79*38fd1498Szrj "hsw_decoder0") 80*38fd1498Szrj 81*38fd1498Szrj(define_insn_reservation "hsw_call" 1 82*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 83*38fd1498Szrj (eq_attr "type" "call,callv")) 84*38fd1498Szrj "hsw_decoder0") 85*38fd1498Szrj 86*38fd1498Szrj;; imov with memory operands does not use the integer units. 87*38fd1498Szrj;; imovx always decodes to one uop, and also doesn't use the integer 88*38fd1498Szrj;; units if it has memory operands. 89*38fd1498Szrj(define_insn_reservation "hsw_imov" 1 90*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 91*38fd1498Szrj (and (eq_attr "memory" "none") 92*38fd1498Szrj (eq_attr "type" "imov,imovx"))) 93*38fd1498Szrj "hsw_decodern,hsw_p0156") 94*38fd1498Szrj 95*38fd1498Szrj(define_insn_reservation "hsw_imov_load" 2 96*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 97*38fd1498Szrj (and (eq_attr "memory" "load") 98*38fd1498Szrj (eq_attr "type" "imov,imovx"))) 99*38fd1498Szrj "hsw_decodern,hsw_p23") 100*38fd1498Szrj 101*38fd1498Szrj(define_insn_reservation "hsw_imov_store" 3 102*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 103*38fd1498Szrj (and (eq_attr "memory" "store") 104*38fd1498Szrj (eq_attr "type" "imov"))) 105*38fd1498Szrj "hsw_decodern,hsw_p4+(hsw_p2|hsw_p3|hsw_p7)") 106*38fd1498Szrj 107*38fd1498Szrj(define_insn_reservation "hsw_icmov" 2 108*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 109*38fd1498Szrj (and (eq_attr "memory" "none") 110*38fd1498Szrj (eq_attr "type" "icmov"))) 111*38fd1498Szrj "hsw_decodern,hsw_p0156,hsw_p0156") 112*38fd1498Szrj 113*38fd1498Szrj(define_insn_reservation "hsw_icmov_load" 2 114*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 115*38fd1498Szrj (and (eq_attr "memory" "load") 116*38fd1498Szrj (eq_attr "type" "icmov"))) 117*38fd1498Szrj "hsw_decodern,hsw_p23+hsw_p0156,hsw_p0156") 118*38fd1498Szrj 119*38fd1498Szrj(define_insn_reservation "hsw_push_reg" 3 120*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 121*38fd1498Szrj (and (eq_attr "memory" "store") 122*38fd1498Szrj (eq_attr "type" "push"))) 123*38fd1498Szrj "hsw_decodern,hsw_p4+hsw_p237") 124*38fd1498Szrj 125*38fd1498Szrj(define_insn_reservation "hsw_push_mem" 3 126*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 127*38fd1498Szrj (and (eq_attr "memory" "both") 128*38fd1498Szrj (eq_attr "type" "push"))) 129*38fd1498Szrj "hsw_decodern,hsw_p4+hsw_p237,hsw_p237") 130*38fd1498Szrj 131*38fd1498Szrj;; Consider lea latency as having 2 components. 132*38fd1498Szrj(define_insn_reservation "hsw_lea" 1 133*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 134*38fd1498Szrj (and (eq_attr "memory" "none") 135*38fd1498Szrj (eq_attr "type" "lea"))) 136*38fd1498Szrj "hsw_decodern,hsw_p1|hsw_p5") 137*38fd1498Szrj 138*38fd1498Szrj(define_insn_reservation "hsw_shift_rotate" 1 139*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 140*38fd1498Szrj (and (eq_attr "memory" "none") 141*38fd1498Szrj (eq_attr "type" "ishift,ishift1,rotate,rotate1"))) 142*38fd1498Szrj "hsw_decodern,hsw_p0|hsw_p6") 143*38fd1498Szrj 144*38fd1498Szrj(define_insn_reservation "hsw_shift_rotate_mem" 1 145*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 146*38fd1498Szrj (and (eq_attr "memory" "!none") 147*38fd1498Szrj (eq_attr "type" "ishift,ishift1,rotate,rotate1"))) 148*38fd1498Szrj "hsw_decodern,(hsw_p0|hsw_p6)+hsw_p237+hsw_p4") 149*38fd1498Szrj 150*38fd1498Szrj(define_insn_reservation "hsw_branch" 1 151*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 152*38fd1498Szrj (and (eq_attr "memory" "none") 153*38fd1498Szrj (eq_attr "type" "ibr"))) 154*38fd1498Szrj "hsw_decodern,hsw_p6") 155*38fd1498Szrj 156*38fd1498Szrj(define_insn_reservation "hsw_indirect_branch" 2 157*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 158*38fd1498Szrj (and (eq_attr "memory" "!none") 159*38fd1498Szrj (eq_attr "type" "ibr"))) 160*38fd1498Szrj "hsw_decoder0,hsw_p23+hsw_p6") 161*38fd1498Szrj 162*38fd1498Szrj(define_insn_reservation "hsw_leave" 4 163*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 164*38fd1498Szrj (eq_attr "type" "leave")) 165*38fd1498Szrj "hsw_decoder0,hsw_p23+hsw_p0156,hsw_p0156") 166*38fd1498Szrj 167*38fd1498Szrj;; imul and imulx with two/three operands only execute on port 1. 168*38fd1498Szrj(define_insn_reservation "hsw_imul" 3 169*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 170*38fd1498Szrj (and (eq_attr "memory" "none") 171*38fd1498Szrj (eq_attr "type" "imul"))) 172*38fd1498Szrj "hsw_decodern,hsw_p1") 173*38fd1498Szrj 174*38fd1498Szrj(define_insn_reservation "hsw_imul_mem" 3 175*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 176*38fd1498Szrj (and (eq_attr "memory" "!none") 177*38fd1498Szrj (eq_attr "type" "imul"))) 178*38fd1498Szrj "hsw_decodern,hsw_p23+hsw_p1") 179*38fd1498Szrj 180*38fd1498Szrj(define_insn_reservation "hsw_imulx" 4 181*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 182*38fd1498Szrj (and (eq_attr "memory" "none") 183*38fd1498Szrj (eq_attr "type" "imulx"))) 184*38fd1498Szrj "hsw_decodern,hsw_p0156,hsw_p0156") 185*38fd1498Szrj 186*38fd1498Szrj(define_insn_reservation "hsw_imulx_mem" 4 187*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 188*38fd1498Szrj (and (eq_attr "memory" "!none") 189*38fd1498Szrj (eq_attr "type" "imulx"))) 190*38fd1498Szrj "hsw_decodern,hsw_p23+hsw_p0156,(hsw_p0|hsw_p6|hsw_p6)") 191*38fd1498Szrj 192*38fd1498Szrj 193*38fd1498Szrj;; div and idiv are very similar, so we model them the same. 194*38fd1498Szrj;; Use the same latency for all QI,HI and SI modes. 195*38fd1498Szrj(define_insn_reservation "hsw_idiv" 23 196*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 197*38fd1498Szrj (and (eq_attr "memory" "none") 198*38fd1498Szrj (eq_attr "type" "idiv"))) 199*38fd1498Szrj "hsw_decoder0,(hsw_p0p1p5p6+hsw_idiv)*9") 200*38fd1498Szrj 201*38fd1498Szrj(define_insn_reservation "hsw_idiv_load" 23 202*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 203*38fd1498Szrj (and (eq_attr "memory" "load") 204*38fd1498Szrj (eq_attr "type" "idiv"))) 205*38fd1498Szrj "hsw_decoder0,hsw_p23+hsw_p0+hsw_idiv,(hsw_p0p1p5p6+hsw_idiv)*9") 206*38fd1498Szrj 207*38fd1498Szrj;; x87 floating point operations. 208*38fd1498Szrj 209*38fd1498Szrj(define_insn_reservation "hsw_fxch" 0 210*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 211*38fd1498Szrj (eq_attr "type" "fxch")) 212*38fd1498Szrj "hsw_decodern") 213*38fd1498Szrj 214*38fd1498Szrj(define_insn_reservation "hsw_fop" 3 215*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 216*38fd1498Szrj (and (eq_attr "memory" "none,unknown") 217*38fd1498Szrj (eq_attr "type" "fop"))) 218*38fd1498Szrj "hsw_decodern,hsw_p1") 219*38fd1498Szrj 220*38fd1498Szrj(define_insn_reservation "hsw_fop_load" 5 221*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 222*38fd1498Szrj (and (eq_attr "memory" "load") 223*38fd1498Szrj (eq_attr "type" "fop"))) 224*38fd1498Szrj "hsw_decodern,hsw_p23+hsw_p1,hsw_p1") 225*38fd1498Szrj 226*38fd1498Szrj(define_insn_reservation "hsw_fop_store" 3 227*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 228*38fd1498Szrj (and (eq_attr "memory" "store") 229*38fd1498Szrj (eq_attr "type" "fop"))) 230*38fd1498Szrj "hsw_decodern,hsw_p0,hsw_p0,hsw_p0+hsw_p4+hsw_p3") 231*38fd1498Szrj 232*38fd1498Szrj(define_insn_reservation "hsw_fop_both" 5 233*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 234*38fd1498Szrj (and (eq_attr "memory" "both") 235*38fd1498Szrj (eq_attr "type" "fop"))) 236*38fd1498Szrj "hsw_decodern,hsw_p2+hsw_p0,hsw_p0+hsw_p4+hsw_p3") 237*38fd1498Szrj 238*38fd1498Szrj(define_insn_reservation "hsw_fsgn" 1 239*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 240*38fd1498Szrj (eq_attr "type" "fsgn")) 241*38fd1498Szrj "hsw_decodern,hsw_p0") 242*38fd1498Szrj 243*38fd1498Szrj(define_insn_reservation "hsw_fistp" 7 244*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 245*38fd1498Szrj (eq_attr "type" "fistp")) 246*38fd1498Szrj "hsw_decoder0,hsw_p1+hsw_p4+hsw_p23") 247*38fd1498Szrj 248*38fd1498Szrj(define_insn_reservation "hsw_fcmov" 2 249*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 250*38fd1498Szrj (eq_attr "type" "fcmov")) 251*38fd1498Szrj "hsw_decoder0,hsw_p0+hsw_p5,hsw_p0") 252*38fd1498Szrj 253*38fd1498Szrj(define_insn_reservation "hsw_fcmp" 1 254*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 255*38fd1498Szrj (and (eq_attr "memory" "none") 256*38fd1498Szrj (eq_attr "type" "fcmp"))) 257*38fd1498Szrj "hsw_decodern,hsw_p1") 258*38fd1498Szrj 259*38fd1498Szrj(define_insn_reservation "hsw_fcmp_load" 1 260*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 261*38fd1498Szrj (and (eq_attr "memory" "load") 262*38fd1498Szrj (eq_attr "type" "fcmp"))) 263*38fd1498Szrj "hsw_decodern,hsw_p23+hsw_p1") 264*38fd1498Szrj 265*38fd1498Szrj(define_insn_reservation "hsw_fmov" 1 266*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 267*38fd1498Szrj (and (eq_attr "memory" "none") 268*38fd1498Szrj (eq_attr "type" "fmov"))) 269*38fd1498Szrj "hsw_decodern,hsw_p01") 270*38fd1498Szrj 271*38fd1498Szrj(define_insn_reservation "hsw_fmov_load" 3 272*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 273*38fd1498Szrj (and (eq_attr "memory" "load") 274*38fd1498Szrj (and (eq_attr "mode" "!XF") 275*38fd1498Szrj (eq_attr "type" "fmov")))) 276*38fd1498Szrj "hsw_decodern,hsw_p23") 277*38fd1498Szrj 278*38fd1498Szrj(define_insn_reservation "hsw_fmov_XF_load" 3 279*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 280*38fd1498Szrj (and (eq_attr "memory" "load") 281*38fd1498Szrj (and (eq_attr "mode" "XF") 282*38fd1498Szrj (eq_attr "type" "fmov")))) 283*38fd1498Szrj "hsw_decodern,(hsw_p23+hsw_p0)*2") 284*38fd1498Szrj 285*38fd1498Szrj(define_insn_reservation "hsw_fmov_store" 1 286*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 287*38fd1498Szrj (and (eq_attr "memory" "store") 288*38fd1498Szrj (and (eq_attr "mode" "!XF") 289*38fd1498Szrj (eq_attr "type" "fmov")))) 290*38fd1498Szrj "hsw_decodern,hsw_p4p7") 291*38fd1498Szrj 292*38fd1498Szrj(define_insn_reservation "hsw_fmov_XF_store" 3 293*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 294*38fd1498Szrj (and (eq_attr "memory" "store") 295*38fd1498Szrj (and (eq_attr "mode" "XF") 296*38fd1498Szrj (eq_attr "type" "fmov")))) 297*38fd1498Szrj "hsw_decodern,hsw_p4p7,hsw_p4p7") 298*38fd1498Szrj 299*38fd1498Szrj(define_insn_reservation "hsw_fmul" 4 300*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 301*38fd1498Szrj (and (eq_attr "memory" "none") 302*38fd1498Szrj (eq_attr "type" "fmul"))) 303*38fd1498Szrj "hsw_decodern,hsw_p01") 304*38fd1498Szrj 305*38fd1498Szrj(define_insn_reservation "hsw_fmul_load" 4 306*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 307*38fd1498Szrj (and (eq_attr "memory" "load") 308*38fd1498Szrj (eq_attr "type" "fmul"))) 309*38fd1498Szrj "hsw_decodern,hsw_p23+hsw_p01") 310*38fd1498Szrj 311*38fd1498Szrj;; fdiv latencies depend on the mode of the operands. XFmode gives 312*38fd1498Szrj;; a latency of 38 cycles, DFmode gives 32, and SFmode gives latency 18. 313*38fd1498Szrj;; Division by a power of 2 takes only 9 cycles, but we cannot model 314*38fd1498Szrj;; that. Throughput is equal to latency - 1, which we model using the 315*38fd1498Szrj;; hsw_div automaton. 316*38fd1498Szrj(define_insn_reservation "hsw_fdiv_SF" 18 317*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 318*38fd1498Szrj (and (eq_attr "memory" "none") 319*38fd1498Szrj (and (eq_attr "mode" "SF") 320*38fd1498Szrj (eq_attr "type" "fdiv,fpspc")))) 321*38fd1498Szrj "hsw_decodern,hsw_p0+hsw_fdiv,hsw_fdiv*16") 322*38fd1498Szrj 323*38fd1498Szrj(define_insn_reservation "hsw_fdiv_SF_load" 19 324*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 325*38fd1498Szrj (and (eq_attr "memory" "load") 326*38fd1498Szrj (and (eq_attr "mode" "SF") 327*38fd1498Szrj (eq_attr "type" "fdiv,fpspc")))) 328*38fd1498Szrj "hsw_decodern,hsw_p23+hsw_p0+hsw_fdiv,hsw_fdiv*16") 329*38fd1498Szrj 330*38fd1498Szrj(define_insn_reservation "hsw_fdiv_DF" 32 331*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 332*38fd1498Szrj (and (eq_attr "memory" "none") 333*38fd1498Szrj (and (eq_attr "mode" "DF") 334*38fd1498Szrj (eq_attr "type" "fdiv,fpspc")))) 335*38fd1498Szrj "hsw_decodern,hsw_p0+hsw_fdiv,hsw_fdiv*30") 336*38fd1498Szrj 337*38fd1498Szrj(define_insn_reservation "hsw_fdiv_DF_load" 33 338*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 339*38fd1498Szrj (and (eq_attr "memory" "load") 340*38fd1498Szrj (and (eq_attr "mode" "DF") 341*38fd1498Szrj (eq_attr "type" "fdiv,fpspc")))) 342*38fd1498Szrj "hsw_decodern,hsw_p23+hsw_p0+hsw_fdiv,hsw_fdiv*30") 343*38fd1498Szrj 344*38fd1498Szrj(define_insn_reservation "hsw_fdiv_XF" 38 345*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 346*38fd1498Szrj (and (eq_attr "memory" "none") 347*38fd1498Szrj (and (eq_attr "mode" "XF") 348*38fd1498Szrj (eq_attr "type" "fdiv,fpspc")))) 349*38fd1498Szrj "hsw_decodern,hsw_p0+hsw_fdiv,hsw_fdiv*36") 350*38fd1498Szrj 351*38fd1498Szrj(define_insn_reservation "hsw_fdiv_XF_load" 39 352*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 353*38fd1498Szrj (and (eq_attr "memory" "load") 354*38fd1498Szrj (and (eq_attr "mode" "XF") 355*38fd1498Szrj (eq_attr "type" "fdiv,fpspc")))) 356*38fd1498Szrj "hsw_decodern,hsw_p2+hsw_p0+hsw_fdiv,hsw_fdiv*36") 357*38fd1498Szrj 358*38fd1498Szrj;; MMX instructions. 359*38fd1498Szrj 360*38fd1498Szrj(define_insn_reservation "hsw_mmx_add" 1 361*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 362*38fd1498Szrj (and (eq_attr "memory" "none") 363*38fd1498Szrj (eq_attr "type" "mmxadd,sseiadd"))) 364*38fd1498Szrj "hsw_decodern,hsw_p1|hsw_p5") 365*38fd1498Szrj 366*38fd1498Szrj(define_insn_reservation "hsw_mmx_add_load" 2 367*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 368*38fd1498Szrj (and (eq_attr "memory" "load") 369*38fd1498Szrj (eq_attr "type" "mmxadd,sseiadd"))) 370*38fd1498Szrj "hsw_decodern,hsw_p23+(hsw_p1|hsw_p5)") 371*38fd1498Szrj 372*38fd1498Szrj(define_insn_reservation "hsw_mmx_shft" 1 373*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 374*38fd1498Szrj (and (eq_attr "memory" "none") 375*38fd1498Szrj (eq_attr "type" "mmxshft"))) 376*38fd1498Szrj "hsw_decodern,hsw_p0") 377*38fd1498Szrj 378*38fd1498Szrj(define_insn_reservation "hsw_mmx_shft_load" 2 379*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 380*38fd1498Szrj (and (eq_attr "memory" "load") 381*38fd1498Szrj (eq_attr "type" "mmxshft"))) 382*38fd1498Szrj "hsw_decodern,hsw_p23+hsw_p0") 383*38fd1498Szrj 384*38fd1498Szrj(define_insn_reservation "hsw_mmx_sse_shft" 1 385*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 386*38fd1498Szrj (and (eq_attr "memory" "none") 387*38fd1498Szrj (and (eq_attr "type" "sseishft") 388*38fd1498Szrj (eq_attr "length_immediate" "!0")))) 389*38fd1498Szrj "hsw_decodern,hsw_p01") 390*38fd1498Szrj 391*38fd1498Szrj(define_insn_reservation "hsw_mmx_sse_shft_load" 2 392*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 393*38fd1498Szrj (and (eq_attr "memory" "load") 394*38fd1498Szrj (and (eq_attr "type" "sseishft") 395*38fd1498Szrj (eq_attr "length_immediate" "!0")))) 396*38fd1498Szrj "hsw_decodern,hsw_p01+hsw_p23") 397*38fd1498Szrj 398*38fd1498Szrj(define_insn_reservation "hsw_mmx_sse_shft1" 2 399*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 400*38fd1498Szrj (and (eq_attr "memory" "none") 401*38fd1498Szrj (and (eq_attr "type" "sseishft") 402*38fd1498Szrj (eq_attr "length_immediate" "0")))) 403*38fd1498Szrj "hsw_decodern,hsw_p01") 404*38fd1498Szrj 405*38fd1498Szrj(define_insn_reservation "hsw_mmx_sse_shft1_load" 3 406*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 407*38fd1498Szrj (and (eq_attr "memory" "load") 408*38fd1498Szrj (and (eq_attr "type" "sseishft") 409*38fd1498Szrj (eq_attr "length_immediate" "0")))) 410*38fd1498Szrj "hsw_decodern,hsw_p01+hsw_p23") 411*38fd1498Szrj 412*38fd1498Szrj(define_insn_reservation "hsw_mmx_mul" 5 413*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 414*38fd1498Szrj (and (eq_attr "memory" "none") 415*38fd1498Szrj (eq_attr "type" "mmxmul,sseimul"))) 416*38fd1498Szrj "hsw_decodern,hsw_p01") 417*38fd1498Szrj 418*38fd1498Szrj(define_insn_reservation "hsw_mmx_mul_load" 5 419*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 420*38fd1498Szrj (and (eq_attr "memory" "none") 421*38fd1498Szrj (eq_attr "type" "mmxmul,sseimul"))) 422*38fd1498Szrj "hsw_decodern,hsw_p23+hsw_p01") 423*38fd1498Szrj 424*38fd1498Szrj(define_insn_reservation "hsw_sse_mmxcvt" 4 425*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 426*38fd1498Szrj (and (eq_attr "mode" "DI") 427*38fd1498Szrj (eq_attr "type" "mmxcvt"))) 428*38fd1498Szrj "hsw_decodern,hsw_p1") 429*38fd1498Szrj 430*38fd1498Szrj;; (define_insn_reservation "hsw_sse_mmxshft" 2 431*38fd1498Szrj;; (and (eq_attr "cpu" "generic,haswell") 432*38fd1498Szrj;; (and (eq_attr "mode" "TI") 433*38fd1498Szrj;; (eq_attr "type" "mmxshft"))) 434*38fd1498Szrj;; "hsw_decodern,hsw_p01") 435*38fd1498Szrj 436*38fd1498Szrj;; The sfence instruction. 437*38fd1498Szrj(define_insn_reservation "hsw_sse_sfence" 2 438*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 439*38fd1498Szrj (and (eq_attr "memory" "unknown") 440*38fd1498Szrj (eq_attr "type" "sse"))) 441*38fd1498Szrj "hsw_decoder0,hsw_p23+hsw_p4") 442*38fd1498Szrj 443*38fd1498Szrj(define_insn_reservation "hsw_sse_SFDF" 3 444*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 445*38fd1498Szrj (and (eq_attr "mode" "SF,DF") 446*38fd1498Szrj (eq_attr "type" "sse"))) 447*38fd1498Szrj "hsw_decodern,hsw_p01") 448*38fd1498Szrj 449*38fd1498Szrj(define_insn_reservation "hsw_sse_V4SF" 4 450*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 451*38fd1498Szrj (and (eq_attr "mode" "V4SF") 452*38fd1498Szrj (eq_attr "type" "sse"))) 453*38fd1498Szrj "hsw_decodern,hsw_p01") 454*38fd1498Szrj 455*38fd1498Szrj(define_insn_reservation "hsw_sse_V8SF" 4 456*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 457*38fd1498Szrj (and (eq_attr "mode" "V8SF,V4DF") 458*38fd1498Szrj (eq_attr "type" "sse"))) 459*38fd1498Szrj "hsw_decodern,hsw_p01") 460*38fd1498Szrj 461*38fd1498Szrj(define_insn_reservation "hsw_sse_addcmp" 3 462*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 463*38fd1498Szrj (and (eq_attr "memory" "none") 464*38fd1498Szrj (eq_attr "type" "sseadd1,ssecmp,ssecomi"))) 465*38fd1498Szrj "hsw_decodern,hsw_p01") 466*38fd1498Szrj 467*38fd1498Szrj(define_insn_reservation "hsw_sse_addcmp_load" 3 468*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 469*38fd1498Szrj (and (eq_attr "memory" "load") 470*38fd1498Szrj (eq_attr "type" "sseadd1,ssecmp,ssecomi"))) 471*38fd1498Szrj "hsw_decodern,hsw_p23+hsw_p01") 472*38fd1498Szrj 473*38fd1498Szrj(define_insn_reservation "hsw_sse_logic" 1 474*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 475*38fd1498Szrj (and (eq_attr "memory" "none") 476*38fd1498Szrj (eq_attr "type" "sselog,sselog1"))) 477*38fd1498Szrj "hsw_decodern,hsw_p015") 478*38fd1498Szrj 479*38fd1498Szrj(define_insn_reservation "hsw_sse_logic_load" 2 480*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 481*38fd1498Szrj (and (eq_attr "memory" "load") 482*38fd1498Szrj (eq_attr "type" "sselog,sselog1"))) 483*38fd1498Szrj "hsw_decodern,hsw_p015+hsw_p23") 484*38fd1498Szrj 485*38fd1498Szrj(define_insn_reservation "hsw_sse_add" 3 486*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 487*38fd1498Szrj (and (eq_attr "memory" "none") 488*38fd1498Szrj (eq_attr "type" "sseadd"))) 489*38fd1498Szrj "hsw_decodern,hsw_p1|hsw_p5") 490*38fd1498Szrj 491*38fd1498Szrj(define_insn_reservation "hsw_sse_add_load" 3 492*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 493*38fd1498Szrj (and (eq_attr "memory" "load") 494*38fd1498Szrj (eq_attr "type" "sseadd"))) 495*38fd1498Szrj "hsw_decodern,(hsw_p1|hsw_p5)+hsw_p23") 496*38fd1498Szrj 497*38fd1498Szrj(define_insn_reservation "hsw_sse_mul" 5 498*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 499*38fd1498Szrj (and (eq_attr "memory" "none") 500*38fd1498Szrj (eq_attr "type" "ssemul"))) 501*38fd1498Szrj "hsw_decodern,hsw_p0") 502*38fd1498Szrj 503*38fd1498Szrj(define_insn_reservation "hsw_sse_mul_load" 5 504*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 505*38fd1498Szrj (and (eq_attr "memory" "load") 506*38fd1498Szrj (eq_attr "type" "ssemul"))) 507*38fd1498Szrj "hsw_decodern,hsw_p0+hsw_p23") 508*38fd1498Szrj;; Use skylake pipeline. 509*38fd1498Szrj(define_insn_reservation "hsw_sse_muladd" 5 510*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 511*38fd1498Szrj (and (eq_attr "memory" "none") 512*38fd1498Szrj (eq_attr "type" "ssemuladd"))) 513*38fd1498Szrj "hsw_decodern,hsw_p01") 514*38fd1498Szrj 515*38fd1498Szrj(define_insn_reservation "hsw_sse_muladd_load" 5 516*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 517*38fd1498Szrj (and (eq_attr "memory" "load") 518*38fd1498Szrj (eq_attr "type" "ssemuladd"))) 519*38fd1498Szrj "hsw_decodern,hsw_p01+hsw_p23") 520*38fd1498Szrj 521*38fd1498Szrj(define_insn_reservation "hsw_sse_div_SF" 18 522*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 523*38fd1498Szrj (and (eq_attr "memory" "none") 524*38fd1498Szrj (and (eq_attr "mode" "SF,V4SF,V8SF") 525*38fd1498Szrj (eq_attr "type" "ssediv")))) 526*38fd1498Szrj "hsw_decodern,hsw_p0,hsw_ssediv*14") 527*38fd1498Szrj 528*38fd1498Szrj(define_insn_reservation "hsw_sse_div_SF_load" 18 529*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 530*38fd1498Szrj (and (eq_attr "memory" "none") 531*38fd1498Szrj (and (eq_attr "mode" "SF,V4SF,V8SF") 532*38fd1498Szrj (eq_attr "type" "ssediv")))) 533*38fd1498Szrj "hsw_decodern,(hsw_p23+hsw_p0),hsw_ssediv*14") 534*38fd1498Szrj 535*38fd1498Szrj(define_insn_reservation "hsw_sse_div_DF" 28 536*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 537*38fd1498Szrj (and (eq_attr "memory" "none") 538*38fd1498Szrj (and (eq_attr "mode" "DF,V2DF,V4DF") 539*38fd1498Szrj (eq_attr "type" "ssediv")))) 540*38fd1498Szrj "hsw_decodern,hsw_p0,hsw_ssediv*20") 541*38fd1498Szrj 542*38fd1498Szrj(define_insn_reservation "hsw_sse_div_DF_load" 28 543*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 544*38fd1498Szrj (and (eq_attr "memory" "none") 545*38fd1498Szrj (and (eq_attr "mode" "DF,V2DF,V4DF") 546*38fd1498Szrj (eq_attr "type" "ssediv")))) 547*38fd1498Szrj "hsw_decodern,(hsw_p23+hsw_p0),hsw_ssediv*20") 548*38fd1498Szrj 549*38fd1498Szrj(define_insn_reservation "hsw_sse_icvt" 4 550*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 551*38fd1498Szrj (and (eq_attr "memory" "none") 552*38fd1498Szrj (eq_attr "type" "sseicvt"))) 553*38fd1498Szrj "hsw_decodern,hsw_p1") 554*38fd1498Szrj 555*38fd1498Szrj(define_insn_reservation "hsw_sse_icvt_load" 4 556*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 557*38fd1498Szrj (and (eq_attr "memory" "!none") 558*38fd1498Szrj (eq_attr "type" "sseicvt"))) 559*38fd1498Szrj "hsw_decodern,hsw_p23+hsw_p1") 560*38fd1498Szrj 561*38fd1498Szrj 562*38fd1498Szrj(define_insn_reservation "hsw_sse_icvt_SI" 3 563*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 564*38fd1498Szrj (and (eq_attr "memory" "none") 565*38fd1498Szrj (and (eq_attr "mode" "SI") 566*38fd1498Szrj (eq_attr "type" "sseicvt")))) 567*38fd1498Szrj "hsw_decodern,hsw_p1") 568*38fd1498Szrj 569*38fd1498Szrj(define_insn_reservation "hsw_sse_icvt_SI_load" 3 570*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 571*38fd1498Szrj (and (eq_attr "memory" "!none") 572*38fd1498Szrj (and (eq_attr "mode" "SI") 573*38fd1498Szrj (eq_attr "type" "sseicvt")))) 574*38fd1498Szrj "hsw_decodern,hsw_p23+hsw_p1") 575*38fd1498Szrj 576*38fd1498Szrj(define_insn_reservation "hsw_sse_mov" 1 577*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 578*38fd1498Szrj (and (eq_attr "memory" "none") 579*38fd1498Szrj (eq_attr "type" "ssemov"))) 580*38fd1498Szrj "hsw_decodern,hsw_p015") 581*38fd1498Szrj 582*38fd1498Szrj(define_insn_reservation "hsw_sse_mov_load" 2 583*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 584*38fd1498Szrj (and (eq_attr "memory" "load") 585*38fd1498Szrj (eq_attr "type" "ssemov"))) 586*38fd1498Szrj "hsw_decodern,hsw_p23") 587*38fd1498Szrj 588*38fd1498Szrj(define_insn_reservation "hsw_sse_mov_store" 1 589*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 590*38fd1498Szrj (and (eq_attr "memory" "store") 591*38fd1498Szrj (eq_attr "type" "ssemov"))) 592*38fd1498Szrj "hsw_decodern,hsw_p4p7") 593*38fd1498Szrj 594*38fd1498Szrj(define_insn_reservation "hsw_insn" 1 595*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 596*38fd1498Szrj (and (eq_attr "memory" "none,unknown") 597*38fd1498Szrj (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,sseishft1,mmx,mmxcmp"))) 598*38fd1498Szrj "hsw_decodern,hsw_p0156") 599*38fd1498Szrj 600*38fd1498Szrj(define_insn_reservation "hsw_insn_load" 1 601*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 602*38fd1498Szrj (and (eq_attr "memory" "load") 603*38fd1498Szrj (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,pop,sseishft1,mmx,mmxcmp"))) 604*38fd1498Szrj "hsw_decodern,hsw_p23+hsw_p0156") 605*38fd1498Szrj 606*38fd1498Szrj(define_insn_reservation "hsw_insn_store" 1 607*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 608*38fd1498Szrj (and (eq_attr "memory" "store") 609*38fd1498Szrj (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,sseishft1,mmx,mmxcmp"))) 610*38fd1498Szrj "hsw_decodern,hsw_p0156+hsw_p4p7") 611*38fd1498Szrj 612*38fd1498Szrj;; read-modify-store instructions produce 4 uops so they have to be 613*38fd1498Szrj;; decoded on hsw_decoder0 as well. 614*38fd1498Szrj(define_insn_reservation "hsw_insn_both" 4 615*38fd1498Szrj (and (eq_attr "cpu" "generic,haswell") 616*38fd1498Szrj (and (eq_attr "memory" "both") 617*38fd1498Szrj (eq_attr "type" "alu,alu1,negnot,incdec,icmp,test,setcc,pop,sseishft1,mmx,mmxcmp"))) 618*38fd1498Szrj "hsw_decodern,hsw_p23+hsw_p0156+hsw_p4p7") 619