1;; Copyright (C) 2010-2018 Free Software Foundation, Inc. 2;; 3;; This file is part of GCC. 4;; 5;; GCC is free software; you can redistribute it and/or modify 6;; it under the terms of the GNU General Public License as published by 7;; the Free Software Foundation; either version 3, or (at your option) 8;; any later version. 9;; 10;; GCC is distributed in the hope that it will be useful, 11;; but WITHOUT ANY WARRANTY; without even the implied warranty of 12;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13;; GNU General Public License for more details. 14;; 15;; You should have received a copy of the GNU General Public License 16;; along with GCC; see the file COPYING3. If not see 17;; <http://www.gnu.org/licenses/>. 18;; 19;; AMD bdver1 Scheduling 20;; 21;; The bdver1 contains four pipelined FP units, two integer units and 22;; two address generation units. 23;; 24;; The predecode logic is determining boundaries of instructions in the 64 25;; byte cache line. So the cache line straddling problem of K6 might be issue 26;; here as well, but it is not noted in the documentation. 27;; 28;; Three DirectPath instructions decoders and only one VectorPath decoder 29;; is available. They can decode three DirectPath instructions or one 30;; VectorPath instruction per cycle. 31;; 32;; The load/store queue unit is not attached to the schedulers but 33;; communicates with all the execution units separately instead. 34 35 36(define_attr "bdver1_decode" "direct,vector,double" 37 (const_string "direct")) 38 39(define_automaton "bdver1,bdver1_ieu,bdver1_load,bdver1_fp,bdver1_agu") 40 41(define_cpu_unit "bdver1-decode0" "bdver1") 42(define_cpu_unit "bdver1-decode1" "bdver1") 43(define_cpu_unit "bdver1-decode2" "bdver1") 44(define_cpu_unit "bdver1-decodev" "bdver1") 45 46;; Model the fact that double decoded instruction may take 2 cycles 47;; to decode when decoder2 and decoder0 in next cycle 48;; is used (this is needed to allow throughput of 1.5 double decoded 49;; instructions per cycle). 50;; 51;; In order to avoid dependence between reservation of decoder 52;; and other units, we model decoder as two stage fully pipelined unit 53;; and only double decoded instruction may occupy unit in the first cycle. 54;; With this scheme however two double instructions can be issued cycle0. 55;; 56;; Avoid this by using presence set requiring decoder0 to be allocated 57;; too. Vector decoded instructions then can't be issued when modeled 58;; as consuming decoder0+decoder1+decoder2. 59;; We solve that by specialized vector decoder unit and exclusion set. 60(presence_set "bdver1-decode2" "bdver1-decode0") 61(exclusion_set "bdver1-decodev" "bdver1-decode0,bdver1-decode1,bdver1-decode2") 62 63(define_reservation "bdver1-vector" "nothing,bdver1-decodev") 64(define_reservation "bdver1-direct1" "nothing,bdver1-decode1") 65(define_reservation "bdver1-direct" "nothing, 66 (bdver1-decode0 | bdver1-decode1 67 | bdver1-decode2)") 68;; Double instructions behaves like two direct instructions. 69(define_reservation "bdver1-double" "((bdver1-decode2,bdver1-decode0) 70 | (nothing,(bdver1-decode0 + bdver1-decode1)) 71 | (nothing,(bdver1-decode1 + bdver1-decode2)))") 72 73 74(define_cpu_unit "bdver1-ieu0" "bdver1_ieu") 75(define_cpu_unit "bdver1-ieu1" "bdver1_ieu") 76(define_reservation "bdver1-ieu" "(bdver1-ieu0 | bdver1-ieu1)") 77 78(define_cpu_unit "bdver1-agu0" "bdver1_agu") 79(define_cpu_unit "bdver1-agu1" "bdver1_agu") 80(define_reservation "bdver1-agu" "(bdver1-agu0 | bdver1-agu1)") 81 82(define_cpu_unit "bdver1-load0" "bdver1_load") 83(define_cpu_unit "bdver1-load1" "bdver1_load") 84(define_reservation "bdver1-load" "bdver1-agu, 85 (bdver1-load0 | bdver1-load1),nothing") 86;; 128bit SSE instructions issue two loads at once. 87(define_reservation "bdver1-load2" "bdver1-agu, 88 (bdver1-load0 + bdver1-load1),nothing") 89 90(define_reservation "bdver1-store" "(bdver1-load0 | bdver1-load1)") 91;; 128bit SSE instructions issue two stores at once. 92(define_reservation "bdver1-store2" "(bdver1-load0 + bdver1-load1)") 93 94;; vectorpath (microcoded) instructions are single issue instructions. 95;; So, they occupy all the integer units. 96(define_reservation "bdver1-ivector" "bdver1-ieu0+bdver1-ieu1+ 97 bdver1-agu0+bdver1-agu1+ 98 bdver1-load0+bdver1-load1") 99 100;; The FP operations start to execute at stage 12 in the pipeline, while 101;; integer operations start to execute at stage 9 for athlon and 11 for K8 102;; Compensate the difference for athlon because it results in significantly 103;; smaller automata. 104;; NOTE: the above information was just copied from athlon.md, and was not 105;; actually verified for bdver1. 106(define_reservation "bdver1-fpsched" "nothing,nothing,nothing") 107;; The floating point loads. 108(define_reservation "bdver1-fpload" "(bdver1-fpsched + bdver1-load)") 109(define_reservation "bdver1-fpload2" "(bdver1-fpsched + bdver1-load2)") 110 111;; Four FP units. 112(define_cpu_unit "bdver1-ffma0" "bdver1_fp") 113(define_cpu_unit "bdver1-ffma1" "bdver1_fp") 114(define_cpu_unit "bdver1-fmal0" "bdver1_fp") 115(define_cpu_unit "bdver1-fmal1" "bdver1_fp") 116 117(define_reservation "bdver1-ffma" "(bdver1-ffma0 | bdver1-ffma1)") 118(define_reservation "bdver1-fcvt" "bdver1-ffma0") 119(define_reservation "bdver1-fmma" "bdver1-ffma0") 120(define_reservation "bdver1-fxbar" "bdver1-ffma1") 121(define_reservation "bdver1-fmal" "(bdver1-fmal0 | bdver1-fmal1)") 122(define_reservation "bdver1-fsto" "bdver1-fmal1") 123 124;; Vector operations usually consume many of pipes. 125(define_reservation "bdver1-fvector" "(bdver1-ffma0 + bdver1-ffma1 126 + bdver1-fmal0 + bdver1-fmal1)") 127 128;; Jump instructions are executed in the branch unit completely transparent to us. 129(define_insn_reservation "bdver1_call" 0 130 (and (eq_attr "cpu" "bdver1,bdver2") 131 (eq_attr "type" "call,callv")) 132 "bdver1-double,bdver1-agu") 133;; PUSH mem is double path. 134(define_insn_reservation "bdver1_push" 1 135 (and (eq_attr "cpu" "bdver1,bdver2") 136 (eq_attr "type" "push")) 137 "bdver1-direct,bdver1-agu,bdver1-store") 138;; POP r16/mem are double path. 139(define_insn_reservation "bdver1_pop" 1 140 (and (eq_attr "cpu" "bdver1,bdver2") 141 (eq_attr "type" "pop")) 142 "bdver1-direct,bdver1-ivector") 143;; LEAVE no latency info so far, assume same with amdfam10. 144(define_insn_reservation "bdver1_leave" 3 145 (and (eq_attr "cpu" "bdver1,bdver2") 146 (eq_attr "type" "leave")) 147 "bdver1-vector,bdver1-ivector") 148;; LEA executes in AGU unit with 1 cycle latency on BDVER1. 149(define_insn_reservation "bdver1_lea" 1 150 (and (eq_attr "cpu" "bdver1,bdver2") 151 (eq_attr "type" "lea")) 152 "bdver1-direct,bdver1-agu") 153 154;; MUL executes in special multiplier unit attached to IEU1. 155(define_insn_reservation "bdver1_imul_DI" 6 156 (and (eq_attr "cpu" "bdver1,bdver2") 157 (and (eq_attr "type" "imul") 158 (and (eq_attr "mode" "DI") 159 (eq_attr "memory" "none,unknown")))) 160 "bdver1-direct1,bdver1-ieu1") 161(define_insn_reservation "bdver1_imul" 4 162 (and (eq_attr "cpu" "bdver1,bdver2") 163 (and (eq_attr "type" "imul") 164 (eq_attr "memory" "none,unknown"))) 165 "bdver1-direct1,bdver1-ieu1") 166(define_insn_reservation "bdver1_imul_mem_DI" 10 167 (and (eq_attr "cpu" "bdver1,bdver2") 168 (and (eq_attr "type" "imul") 169 (and (eq_attr "mode" "DI") 170 (eq_attr "memory" "load,both")))) 171 "bdver1-direct1,bdver1-load,bdver1-ieu1") 172(define_insn_reservation "bdver1_imul_mem" 8 173 (and (eq_attr "cpu" "bdver1,bdver2") 174 (and (eq_attr "type" "imul") 175 (eq_attr "memory" "load,both"))) 176 "bdver1-direct1,bdver1-load,bdver1-ieu1") 177 178;; IDIV cannot execute in parallel with other instructions. Dealing with it 179;; as with short latency vector instruction is good approximation avoiding 180;; scheduler from trying too hard to can hide it's latency by overlap with 181;; other instructions. 182;; ??? Experiments show that the IDIV can overlap with roughly 6 cycles 183;; of the other code. 184(define_insn_reservation "bdver1_idiv" 6 185 (and (eq_attr "cpu" "bdver1,bdver2") 186 (and (eq_attr "type" "idiv") 187 (eq_attr "memory" "none,unknown"))) 188 "bdver1-vector,(bdver1-ieu0*6+(bdver1-fpsched,bdver1-fvector))") 189 190(define_insn_reservation "bdver1_idiv_mem" 10 191 (and (eq_attr "cpu" "bdver1,bdver2") 192 (and (eq_attr "type" "idiv") 193 (eq_attr "memory" "load,both"))) 194 "bdver1-vector,((bdver1-load,bdver1-ieu0*6)+(bdver1-fpsched,bdver1-fvector))") 195 196;; The parallelism of string instructions is not documented. Model it same way 197;; as IDIV to create smaller automata. This probably does not matter much. 198;; Using the same heuristics for bdver1 as amdfam10 and K8 with IDIV. 199(define_insn_reservation "bdver1_str" 6 200 (and (eq_attr "cpu" "bdver1,bdver2") 201 (and (eq_attr "type" "str") 202 (eq_attr "memory" "load,both,store"))) 203 "bdver1-vector,bdver1-load,bdver1-ieu0*6") 204 205;; Integer instructions. 206(define_insn_reservation "bdver1_idirect" 1 207 (and (eq_attr "cpu" "bdver1,bdver2") 208 (and (eq_attr "bdver1_decode" "direct") 209 (and (eq_attr "unit" "integer,unknown") 210 (eq_attr "memory" "none,unknown")))) 211 "bdver1-direct,bdver1-ieu") 212(define_insn_reservation "bdver1_ivector" 2 213 (and (eq_attr "cpu" "bdver1,bdver2") 214 (and (eq_attr "bdver1_decode" "vector") 215 (and (eq_attr "unit" "integer,unknown") 216 (eq_attr "memory" "none,unknown")))) 217 "bdver1-vector,bdver1-ieu,bdver1-ieu") 218(define_insn_reservation "bdver1_idirect_loadmov" 4 219 (and (eq_attr "cpu" "bdver1,bdver2") 220 (and (eq_attr "type" "imov") 221 (eq_attr "memory" "load"))) 222 "bdver1-direct,bdver1-load") 223(define_insn_reservation "bdver1_idirect_load" 5 224 (and (eq_attr "cpu" "bdver1,bdver2") 225 (and (eq_attr "bdver1_decode" "direct") 226 (and (eq_attr "unit" "integer,unknown") 227 (eq_attr "memory" "load")))) 228 "bdver1-direct,bdver1-load,bdver1-ieu") 229(define_insn_reservation "bdver1_ivector_load" 6 230 (and (eq_attr "cpu" "bdver1,bdver2") 231 (and (eq_attr "bdver1_decode" "vector") 232 (and (eq_attr "unit" "integer,unknown") 233 (eq_attr "memory" "load")))) 234 "bdver1-vector,bdver1-load,bdver1-ieu,bdver1-ieu") 235(define_insn_reservation "bdver1_idirect_movstore" 4 236 (and (eq_attr "cpu" "bdver1,bdver2") 237 (and (eq_attr "type" "imov") 238 (eq_attr "memory" "store"))) 239 "bdver1-direct,bdver1-agu,bdver1-store") 240(define_insn_reservation "bdver1_idirect_both" 4 241 (and (eq_attr "cpu" "bdver1,bdver2") 242 (and (eq_attr "bdver1_decode" "direct") 243 (and (eq_attr "unit" "integer,unknown") 244 (eq_attr "memory" "both")))) 245 "bdver1-direct,bdver1-load, 246 bdver1-ieu,bdver1-store, 247 bdver1-store") 248(define_insn_reservation "bdver1_ivector_both" 5 249 (and (eq_attr "cpu" "bdver1,bdver2") 250 (and (eq_attr "bdver1_decode" "vector") 251 (and (eq_attr "unit" "integer,unknown") 252 (eq_attr "memory" "both")))) 253 "bdver1-vector,bdver1-load, 254 bdver1-ieu, 255 bdver1-ieu, 256 bdver1-store") 257(define_insn_reservation "bdver1_idirect_store" 4 258 (and (eq_attr "cpu" "bdver1,bdver2") 259 (and (eq_attr "bdver1_decode" "direct") 260 (and (eq_attr "unit" "integer,unknown") 261 (eq_attr "memory" "store")))) 262 "bdver1-direct,(bdver1-ieu+bdver1-agu), 263 bdver1-store") 264(define_insn_reservation "bdver1_ivector_store" 5 265 (and (eq_attr "cpu" "bdver1,bdver2") 266 (and (eq_attr "bdver1_decode" "vector") 267 (and (eq_attr "unit" "integer,unknown") 268 (eq_attr "memory" "store")))) 269 "bdver1-vector,(bdver1-ieu+bdver1-agu),bdver1-ieu, 270 bdver1-store") 271 272;; BDVER1 floating point units. 273(define_insn_reservation "bdver1_fldxf" 13 274 (and (eq_attr "cpu" "bdver1,bdver2") 275 (and (eq_attr "type" "fmov") 276 (and (eq_attr "memory" "load") 277 (eq_attr "mode" "XF")))) 278 "bdver1-vector,bdver1-fpload2,bdver1-fvector*9") 279(define_insn_reservation "bdver1_fld" 5 280 (and (eq_attr "cpu" "bdver1,bdver2") 281 (and (eq_attr "type" "fmov") 282 (eq_attr "memory" "load"))) 283 "bdver1-direct,bdver1-fpload,bdver1-ffma") 284(define_insn_reservation "bdver1_fstxf" 8 285 (and (eq_attr "cpu" "bdver1,bdver2") 286 (and (eq_attr "type" "fmov") 287 (and (eq_attr "memory" "store,both") 288 (eq_attr "mode" "XF")))) 289 "bdver1-vector,(bdver1-fpsched+bdver1-agu),(bdver1-store2+(bdver1-fvector*6))") 290(define_insn_reservation "bdver1_fst" 2 291 (and (eq_attr "cpu" "bdver1,bdver2") 292 (and (eq_attr "type" "fmov") 293 (eq_attr "memory" "store,both"))) 294 "bdver1-double,(bdver1-fpsched+bdver1-agu),(bdver1-fsto+bdver1-store)") 295(define_insn_reservation "bdver1_fist" 2 296 (and (eq_attr "cpu" "bdver1,bdver2") 297 (eq_attr "type" "fistp,fisttp")) 298 "bdver1-double,(bdver1-fpsched+bdver1-agu),(bdver1-fsto+bdver1-store)") 299(define_insn_reservation "bdver1_fmov_bdver1" 2 300 (and (eq_attr "cpu" "bdver1,bdver2") 301 (eq_attr "type" "fmov")) 302 "bdver1-direct,bdver1-fpsched,bdver1-ffma") 303(define_insn_reservation "bdver1_fadd_load" 10 304 (and (eq_attr "cpu" "bdver1,bdver2") 305 (and (eq_attr "type" "fop") 306 (eq_attr "memory" "load"))) 307 "bdver1-direct,bdver1-fpload,bdver1-ffma") 308(define_insn_reservation "bdver1_fadd" 6 309 (and (eq_attr "cpu" "bdver1,bdver2") 310 (eq_attr "type" "fop")) 311 "bdver1-direct,bdver1-fpsched,bdver1-ffma") 312(define_insn_reservation "bdver1_fmul_load" 10 313 (and (eq_attr "cpu" "bdver1,bdver2") 314 (and (eq_attr "type" "fmul") 315 (eq_attr "memory" "load"))) 316 "bdver1-double,bdver1-fpload,bdver1-ffma") 317(define_insn_reservation "bdver1_fmul" 6 318 (and (eq_attr "cpu" "bdver1,bdver2") 319 (eq_attr "type" "fmul")) 320 "bdver1-direct,bdver1-fpsched,bdver1-ffma") 321(define_insn_reservation "bdver1_fsgn" 2 322 (and (eq_attr "cpu" "bdver1,bdver2") 323 (eq_attr "type" "fsgn")) 324 "bdver1-direct,bdver1-fpsched,bdver1-ffma") 325(define_insn_reservation "bdver1_fdiv_load" 46 326 (and (eq_attr "cpu" "bdver1,bdver2") 327 (and (eq_attr "type" "fdiv") 328 (eq_attr "memory" "load"))) 329 "bdver1-direct,bdver1-fpload,bdver1-ffma") 330(define_insn_reservation "bdver1_fdiv" 42 331 (and (eq_attr "cpu" "bdver1,bdver2") 332 (eq_attr "type" "fdiv")) 333 "bdver1-direct,bdver1-fpsched,bdver1-ffma") 334(define_insn_reservation "bdver1_fpspc_load" 103 335 (and (eq_attr "cpu" "bdver1,bdver2") 336 (and (eq_attr "type" "fpspc") 337 (eq_attr "memory" "load"))) 338 "bdver1-vector,bdver1-fpload,bdver1-fvector") 339(define_insn_reservation "bdver1_fpspc" 100 340 (and (eq_attr "cpu" "bdver1,bdver2") 341 (and (eq_attr "type" "fpspc") 342 (eq_attr "memory" "load"))) 343 "bdver1-vector,bdver1-fpload,bdver1-fvector") 344(define_insn_reservation "bdver1_fcmov_load" 17 345 (and (eq_attr "cpu" "bdver1,bdver2") 346 (and (eq_attr "type" "fcmov") 347 (eq_attr "memory" "load"))) 348 "bdver1-vector,bdver1-fpload,bdver1-fvector") 349(define_insn_reservation "bdver1_fcmov" 15 350 (and (eq_attr "cpu" "bdver1,bdver2") 351 (eq_attr "type" "fcmov")) 352 "bdver1-vector,bdver1-fpsched,bdver1-fvector") 353(define_insn_reservation "bdver1_fcomi_load" 6 354 (and (eq_attr "cpu" "bdver1,bdver2") 355 (and (eq_attr "type" "fcmp") 356 (and (eq_attr "bdver1_decode" "double") 357 (eq_attr "memory" "load")))) 358 "bdver1-double,bdver1-fpload,(bdver1-ffma | bdver1-fsto)") 359(define_insn_reservation "bdver1_fcomi" 2 360 (and (eq_attr "cpu" "bdver1,bdver2") 361 (and (eq_attr "bdver1_decode" "double") 362 (eq_attr "type" "fcmp"))) 363 "bdver1-double,bdver1-fpsched,(bdver1-ffma | bdver1-fsto)") 364(define_insn_reservation "bdver1_fcom_load" 6 365 (and (eq_attr "cpu" "bdver1,bdver2") 366 (and (eq_attr "type" "fcmp") 367 (eq_attr "memory" "load"))) 368 "bdver1-direct,bdver1-fpload,bdver1-ffma") 369(define_insn_reservation "bdver1_fcom" 2 370 (and (eq_attr "cpu" "bdver1,bdver2") 371 (eq_attr "type" "fcmp")) 372 "bdver1-direct,bdver1-fpsched,bdver1-ffma") 373(define_insn_reservation "bdver1_fxch" 2 374 (and (eq_attr "cpu" "bdver1,bdver2") 375 (eq_attr "type" "fxch")) 376 "bdver1-direct,bdver1-fpsched,bdver1-ffma") 377 378;; SSE loads. 379(define_insn_reservation "bdver1_ssevector_avx128_unaligned_load" 4 380 (and (eq_attr "cpu" "bdver1,bdver2") 381 (and (eq_attr "type" "ssemov") 382 (and (eq_attr "prefix" "vex") 383 (and (eq_attr "movu" "1") 384 (and (eq_attr "mode" "V4SF,V2DF") 385 (eq_attr "memory" "load")))))) 386 "bdver1-direct,bdver1-fpload") 387(define_insn_reservation "bdver1_ssevector_avx256_unaligned_load" 5 388 (and (eq_attr "cpu" "bdver1,bdver2") 389 (and (eq_attr "type" "ssemov") 390 (and (eq_attr "movu" "1") 391 (and (eq_attr "mode" "V8SF,V4DF") 392 (eq_attr "memory" "load"))))) 393 "bdver1-double,bdver1-fpload") 394(define_insn_reservation "bdver1_ssevector_sse128_unaligned_load" 4 395 (and (eq_attr "cpu" "bdver1,bdver2") 396 (and (eq_attr "type" "ssemov") 397 (and (eq_attr "movu" "1") 398 (and (eq_attr "mode" "V4SF,V2DF") 399 (eq_attr "memory" "load"))))) 400 "bdver1-direct,bdver1-fpload,bdver1-fmal") 401(define_insn_reservation "bdver1_ssevector_avx128_load" 4 402 (and (eq_attr "cpu" "bdver1,bdver2") 403 (and (eq_attr "type" "ssemov") 404 (and (eq_attr "prefix" "vex") 405 (and (eq_attr "mode" "V4SF,V2DF,TI") 406 (eq_attr "memory" "load"))))) 407 "bdver1-direct,bdver1-fpload,bdver1-fmal") 408(define_insn_reservation "bdver1_ssevector_avx256_load" 5 409 (and (eq_attr "cpu" "bdver1,bdver2") 410 (and (eq_attr "type" "ssemov") 411 (and (eq_attr "mode" "V8SF,V4DF,OI") 412 (eq_attr "memory" "load")))) 413 "bdver1-double,bdver1-fpload,bdver1-fmal") 414(define_insn_reservation "bdver1_ssevector_sse128_load" 4 415 (and (eq_attr "cpu" "bdver1,bdver2") 416 (and (eq_attr "type" "ssemov") 417 (and (eq_attr "mode" "V4SF,V2DF,TI") 418 (eq_attr "memory" "load")))) 419 "bdver1-direct,bdver1-fpload") 420(define_insn_reservation "bdver1_ssescalar_movq_load" 4 421 (and (eq_attr "cpu" "bdver1,bdver2") 422 (and (eq_attr "type" "ssemov") 423 (and (eq_attr "mode" "DI") 424 (eq_attr "memory" "load")))) 425 "bdver1-direct,bdver1-fpload,bdver1-fmal") 426(define_insn_reservation "bdver1_ssescalar_vmovss_load" 4 427 (and (eq_attr "cpu" "bdver1,bdver2") 428 (and (eq_attr "type" "ssemov") 429 (and (eq_attr "prefix" "vex") 430 (and (eq_attr "mode" "SF") 431 (eq_attr "memory" "load"))))) 432 "bdver1-direct,bdver1-fpload") 433(define_insn_reservation "bdver1_ssescalar_sse128_load" 4 434 (and (eq_attr "cpu" "bdver1,bdver2") 435 (and (eq_attr "type" "ssemov") 436 (and (eq_attr "mode" "SF,DF") 437 (eq_attr "memory" "load")))) 438 "bdver1-direct,bdver1-fpload, bdver1-ffma") 439(define_insn_reservation "bdver1_mmxsse_load" 4 440 (and (eq_attr "cpu" "bdver1,bdver2") 441 (and (eq_attr "type" "mmxmov,ssemov") 442 (eq_attr "memory" "load"))) 443 "bdver1-direct,bdver1-fpload, bdver1-fmal") 444 445;; SSE stores. 446(define_insn_reservation "bdver1_sse_store_avx256" 5 447 (and (eq_attr "cpu" "bdver1,bdver2") 448 (and (eq_attr "type" "ssemov") 449 (and (eq_attr "mode" "V8SF,V4DF,OI") 450 (eq_attr "memory" "store,both")))) 451 "bdver1-double,(bdver1-fpsched+bdver1-agu),((bdver1-fsto+bdver1-store)*2)") 452(define_insn_reservation "bdver1_sse_store" 4 453 (and (eq_attr "cpu" "bdver1,bdver2") 454 (and (eq_attr "type" "ssemov") 455 (and (eq_attr "mode" "V4SF,V2DF,TI") 456 (eq_attr "memory" "store,both")))) 457 "bdver1-direct,(bdver1-fpsched+bdver1-agu),((bdver1-fsto+bdver1-store)*2)") 458(define_insn_reservation "bdver1_mmxsse_store_short" 4 459 (and (eq_attr "cpu" "bdver1,bdver2") 460 (and (eq_attr "type" "mmxmov,ssemov") 461 (eq_attr "memory" "store,both"))) 462 "bdver1-direct,(bdver1-fpsched+bdver1-agu),(bdver1-fsto+bdver1-store)") 463 464;; Register moves. 465(define_insn_reservation "bdver1_ssevector_avx256" 3 466 (and (eq_attr "cpu" "bdver1,bdver2") 467 (and (eq_attr "type" "ssemov") 468 (and (eq_attr "mode" "V8SF,V4DF,OI") 469 (eq_attr "memory" "none")))) 470 "bdver1-double,bdver1-fpsched,bdver1-fmal") 471(define_insn_reservation "bdver1_movss_movsd" 2 472 (and (eq_attr "cpu" "bdver1,bdver2") 473 (and (eq_attr "type" "ssemov") 474 (and (eq_attr "mode" "SF,DF") 475 (eq_attr "memory" "none")))) 476 "bdver1-direct,bdver1-fpsched,bdver1-ffma") 477(define_insn_reservation "bdver1_mmxssemov" 2 478 (and (eq_attr "cpu" "bdver1,bdver2") 479 (and (eq_attr "type" "mmxmov,ssemov") 480 (eq_attr "memory" "none"))) 481 "bdver1-direct,bdver1-fpsched,bdver1-fmal") 482;; SSE logs. 483(define_insn_reservation "bdver1_sselog_load_256" 7 484 (and (eq_attr "cpu" "bdver1,bdver2") 485 (and (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1") 486 (and (eq_attr "mode" "V8SF") 487 (eq_attr "memory" "load")))) 488 "bdver1-double,bdver1-fpload,bdver1-fmal") 489(define_insn_reservation "bdver1_sselog_256" 3 490 (and (eq_attr "cpu" "bdver1,bdver2") 491 (and (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1") 492 (eq_attr "mode" "V8SF"))) 493 "bdver1-double,bdver1-fpsched,bdver1-fmal") 494(define_insn_reservation "bdver1_sselog_load" 6 495 (and (eq_attr "cpu" "bdver1,bdver2") 496 (and (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1") 497 (eq_attr "memory" "load"))) 498 "bdver1-direct,bdver1-fpload,bdver1-fxbar") 499(define_insn_reservation "bdver1_sselog" 2 500 (and (eq_attr "cpu" "bdver1,bdver2") 501 (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1")) 502 "bdver1-direct,bdver1-fpsched,bdver1-fxbar") 503 504;; PCMP actually executes in FMAL. 505(define_insn_reservation "bdver1_ssecmp_load" 6 506 (and (eq_attr "cpu" "bdver1,bdver2") 507 (and (eq_attr "type" "ssecmp") 508 (eq_attr "memory" "load"))) 509 "bdver1-direct,bdver1-fpload,bdver1-ffma") 510(define_insn_reservation "bdver1_ssecmp" 2 511 (and (eq_attr "cpu" "bdver1,bdver2") 512 (eq_attr "type" "ssecmp")) 513 "bdver1-direct,bdver1-fpsched,bdver1-ffma") 514(define_insn_reservation "bdver1_ssecomi_load" 6 515 (and (eq_attr "cpu" "bdver1,bdver2") 516 (and (eq_attr "type" "ssecomi") 517 (eq_attr "memory" "load"))) 518 "bdver1-double,bdver1-fpload,(bdver1-ffma | bdver1-fsto)") 519(define_insn_reservation "bdver1_ssecomi" 2 520 (and (eq_attr "cpu" "bdver1,bdver2") 521 (eq_attr "type" "ssecomi")) 522 "bdver1-double,bdver1-fpsched,(bdver1-ffma | bdver1-fsto)") 523 524;; Conversions behaves very irregularly and the scheduling is critical here. 525;; Take each instruction separately. 526 527;; 256 bit conversion. 528(define_insn_reservation "bdver1_vcvtX2Y_avx256_load" 8 529 (and (eq_attr "cpu" "bdver1,bdver2") 530 (and (eq_attr "type" "ssecvt") 531 (and (eq_attr "memory" "load") 532 (ior (ior (match_operand:V4DF 0 "register_operand") 533 (ior (match_operand:V8SF 0 "register_operand") 534 (match_operand:V8SI 0 "register_operand"))) 535 (ior (match_operand:V4DF 1 "nonimmediate_operand") 536 (ior (match_operand:V8SF 1 "nonimmediate_operand") 537 (match_operand:V8SI 1 "nonimmediate_operand"))))))) 538 "bdver1-vector,bdver1-fpload,bdver1-fvector") 539(define_insn_reservation "bdver1_vcvtX2Y_avx256" 4 540 (and (eq_attr "cpu" "bdver1,bdver2") 541 (and (eq_attr "type" "ssecvt") 542 (and (eq_attr "memory" "none") 543 (ior (ior (match_operand:V4DF 0 "register_operand") 544 (ior (match_operand:V8SF 0 "register_operand") 545 (match_operand:V8SI 0 "register_operand"))) 546 (ior (match_operand:V4DF 1 "nonimmediate_operand") 547 (ior (match_operand:V8SF 1 "nonimmediate_operand") 548 (match_operand:V8SI 1 "nonimmediate_operand"))))))) 549 "bdver1-vector,bdver1-fpsched,bdver1-fvector") 550;; CVTSS2SD, CVTSD2SS. 551(define_insn_reservation "bdver1_ssecvt_cvtss2sd_load" 8 552 (and (eq_attr "cpu" "bdver1,bdver2") 553 (and (eq_attr "type" "ssecvt") 554 (and (eq_attr "mode" "SF,DF") 555 (eq_attr "memory" "load")))) 556 "bdver1-direct,bdver1-fpload,bdver1-fcvt") 557(define_insn_reservation "bdver1_ssecvt_cvtss2sd" 4 558 (and (eq_attr "cpu" "bdver1,bdver2") 559 (and (eq_attr "type" "ssecvt") 560 (and (eq_attr "mode" "SF,DF") 561 (eq_attr "memory" "none")))) 562 "bdver1-direct,bdver1-fpsched,bdver1-fcvt") 563;; CVTSI2SD, CVTSI2SS, CVTSI2SDQ, CVTSI2SSQ. 564(define_insn_reservation "bdver1_sseicvt_cvtsi2sd_load" 8 565 (and (eq_attr "cpu" "bdver1,bdver2") 566 (and (eq_attr "type" "sseicvt") 567 (and (eq_attr "mode" "SF,DF") 568 (eq_attr "memory" "load")))) 569 "bdver1-direct,bdver1-fpload,bdver1-fcvt") 570(define_insn_reservation "bdver1_sseicvt_cvtsi2sd" 4 571 (and (eq_attr "cpu" "bdver1,bdver2") 572 (and (eq_attr "type" "sseicvt") 573 (and (eq_attr "mode" "SF,DF") 574 (eq_attr "memory" "none")))) 575 "bdver1-double,bdver1-fpsched,(nothing | bdver1-fcvt)") 576;; CVTPD2PS. 577(define_insn_reservation "bdver1_ssecvt_cvtpd2ps_load" 8 578 (and (eq_attr "cpu" "bdver1,bdver2") 579 (and (eq_attr "type" "ssecvt") 580 (and (eq_attr "memory" "load") 581 (and (match_operand:V4SF 0 "register_operand") 582 (match_operand:V2DF 1 "nonimmediate_operand"))))) 583 "bdver1-double,bdver1-fpload,(bdver1-fxbar | bdver1-fcvt)") 584(define_insn_reservation "bdver1_ssecvt_cvtpd2ps" 4 585 (and (eq_attr "cpu" "bdver1,bdver2") 586 (and (eq_attr "type" "ssecvt") 587 (and (eq_attr "memory" "none") 588 (and (match_operand:V4SF 0 "register_operand") 589 (match_operand:V2DF 1 "nonimmediate_operand"))))) 590 "bdver1-double,bdver1-fpsched,(bdver1-fxbar | bdver1-fcvt)") 591;; CVTPI2PS, CVTDQ2PS. 592(define_insn_reservation "bdver1_ssecvt_cvtdq2ps_load" 8 593 (and (eq_attr "cpu" "bdver1,bdver2") 594 (and (eq_attr "type" "ssecvt") 595 (and (eq_attr "memory" "load") 596 (and (match_operand:V4SF 0 "register_operand") 597 (ior (match_operand:V2SI 1 "nonimmediate_operand") 598 (match_operand:V4SI 1 "nonimmediate_operand")))))) 599 "bdver1-direct,bdver1-fpload,bdver1-fcvt") 600(define_insn_reservation "bdver1_ssecvt_cvtdq2ps" 4 601 (and (eq_attr "cpu" "bdver1,bdver2") 602 (and (eq_attr "type" "ssecvt") 603 (and (eq_attr "memory" "none") 604 (and (match_operand:V4SF 0 "register_operand") 605 (ior (match_operand:V2SI 1 "nonimmediate_operand") 606 (match_operand:V4SI 1 "nonimmediate_operand")))))) 607 "bdver1-direct,bdver1-fpsched,bdver1-fcvt") 608;; CVTDQ2PD. 609(define_insn_reservation "bdver1_ssecvt_cvtdq2pd_load" 8 610 (and (eq_attr "cpu" "bdver1,bdver2") 611 (and (eq_attr "type" "ssecvt") 612 (and (eq_attr "memory" "load") 613 (and (match_operand:V2DF 0 "register_operand") 614 (match_operand:V4SI 1 "nonimmediate_operand"))))) 615 "bdver1-double,bdver1-fpload,(bdver1-fxbar | bdver1-fcvt)") 616(define_insn_reservation "bdver1_ssecvt_cvtdq2pd" 4 617 (and (eq_attr "cpu" "bdver1,bdver2") 618 (and (eq_attr "type" "ssecvt") 619 (and (eq_attr "memory" "none") 620 (and (match_operand:V2DF 0 "register_operand") 621 (match_operand:V4SI 1 "nonimmediate_operand"))))) 622 "bdver1-double,bdver1-fpsched,(bdver1-fxbar | bdver1-fcvt)") 623;; CVTPS2PD, CVTPI2PD. 624(define_insn_reservation "bdver1_ssecvt_cvtps2pd_load" 6 625 (and (eq_attr "cpu" "bdver1,bdver2") 626 (and (eq_attr "type" "ssecvt") 627 (and (eq_attr "memory" "load") 628 (and (match_operand:V2DF 0 "register_operand") 629 (ior (match_operand:V2SI 1 "nonimmediate_operand") 630 (match_operand:V4SF 1 "nonimmediate_operand")))))) 631 "bdver1-double,bdver1-fpload,(bdver1-fxbar | bdver1-fcvt)") 632(define_insn_reservation "bdver1_ssecvt_cvtps2pd" 2 633 (and (eq_attr "cpu" "bdver1,bdver2") 634 (and (eq_attr "type" "ssecvt") 635 (and (eq_attr "memory" "load") 636 (and (match_operand:V2DF 0 "register_operand") 637 (ior (match_operand:V2SI 1 "nonimmediate_operand") 638 (match_operand:V4SF 1 "nonimmediate_operand")))))) 639 "bdver1-double,bdver1-fpsched,(bdver1-fxbar | bdver1-fcvt)") 640;; CVTSD2SI, CVTSD2SIQ, CVTSS2SI, CVTSS2SIQ, CVTTSD2SI, CVTTSD2SIQ, CVTTSS2SI, CVTTSS2SIQ. 641(define_insn_reservation "bdver1_ssecvt_cvtsX2si_load" 8 642 (and (eq_attr "cpu" "bdver1,bdver2") 643 (and (eq_attr "type" "sseicvt") 644 (and (eq_attr "mode" "SI,DI") 645 (eq_attr "memory" "load")))) 646 "bdver1-double,bdver1-fpload,(bdver1-fcvt | bdver1-fsto)") 647(define_insn_reservation "bdver1_ssecvt_cvtsX2si" 4 648 (and (eq_attr "cpu" "bdver1,bdver2") 649 (and (eq_attr "type" "sseicvt") 650 (and (eq_attr "mode" "SI,DI") 651 (eq_attr "memory" "none")))) 652 "bdver1-double,bdver1-fpsched,(bdver1-fcvt | bdver1-fsto)") 653;; CVTPD2PI, CVTTPD2PI. 654(define_insn_reservation "bdver1_ssecvt_cvtpd2pi_load" 8 655 (and (eq_attr "cpu" "bdver1,bdver2") 656 (and (eq_attr "type" "ssecvt") 657 (and (eq_attr "memory" "load") 658 (and (match_operand:V2DF 1 "nonimmediate_operand") 659 (match_operand:V2SI 0 "register_operand"))))) 660 "bdver1-double,bdver1-fpload,(bdver1-fcvt | bdver1-fxbar)") 661(define_insn_reservation "bdver1_ssecvt_cvtpd2pi" 4 662 (and (eq_attr "cpu" "bdver1,bdver2") 663 (and (eq_attr "type" "ssecvt") 664 (and (eq_attr "memory" "none") 665 (and (match_operand:V2DF 1 "nonimmediate_operand") 666 (match_operand:V2SI 0 "register_operand"))))) 667 "bdver1-double,bdver1-fpsched,(bdver1-fcvt | bdver1-fxbar)") 668;; CVTPD2DQ, CVTTPD2DQ. 669(define_insn_reservation "bdver1_ssecvt_cvtpd2dq_load" 6 670 (and (eq_attr "cpu" "bdver1,bdver2") 671 (and (eq_attr "type" "ssecvt") 672 (and (eq_attr "memory" "load") 673 (and (match_operand:V2DF 1 "nonimmediate_operand") 674 (match_operand:V4SI 0 "register_operand"))))) 675 "bdver1-double,bdver1-fpload,(bdver1-fcvt | bdver1-fxbar)") 676(define_insn_reservation "bdver1_ssecvt_cvtpd2dq" 2 677 (and (eq_attr "cpu" "bdver1,bdver2") 678 (and (eq_attr "type" "ssecvt") 679 (and (eq_attr "memory" "none") 680 (and (match_operand:V2DF 1 "nonimmediate_operand") 681 (match_operand:V4SI 0 "register_operand"))))) 682 "bdver1-double,bdver1-fpsched,(bdver1-fcvt | bdver1-fxbar)") 683;; CVTPS2PI, CVTTPS2PI, CVTPS2DQ, CVTTPS2DQ. 684(define_insn_reservation "bdver1_ssecvt_cvtps2pi_load" 8 685 (and (eq_attr "cpu" "bdver1,bdver2") 686 (and (eq_attr "type" "ssecvt") 687 (and (eq_attr "memory" "load") 688 (and (match_operand:V4SF 1 "nonimmediate_operand") 689 (ior (match_operand: V2SI 0 "register_operand") 690 (match_operand: V4SI 0 "register_operand")))))) 691 "bdver1-direct,bdver1-fpload,bdver1-fcvt") 692(define_insn_reservation "bdver1_ssecvt_cvtps2pi" 4 693 (and (eq_attr "cpu" "bdver1,bdver2") 694 (and (eq_attr "type" "ssecvt") 695 (and (eq_attr "memory" "none") 696 (and (match_operand:V4SF 1 "nonimmediate_operand") 697 (ior (match_operand: V2SI 0 "register_operand") 698 (match_operand: V4SI 0 "register_operand")))))) 699 "bdver1-direct,bdver1-fpsched,bdver1-fcvt") 700 701;; SSE MUL, ADD, and MULADD. 702(define_insn_reservation "bdver1_ssemuladd_load_256" 11 703 (and (eq_attr "cpu" "bdver1,bdver2") 704 (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd") 705 (and (eq_attr "mode" "V8SF,V4DF") 706 (eq_attr "memory" "load")))) 707 "bdver1-double,bdver1-fpload,bdver1-ffma") 708(define_insn_reservation "bdver1_ssemuladd_256" 7 709 (and (eq_attr "cpu" "bdver1,bdver2") 710 (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd") 711 (and (eq_attr "mode" "V8SF,V4DF") 712 (eq_attr "memory" "none")))) 713 "bdver1-double,bdver1-fpsched,bdver1-ffma") 714(define_insn_reservation "bdver1_ssemuladd_load" 10 715 (and (eq_attr "cpu" "bdver1,bdver2") 716 (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd") 717 (eq_attr "memory" "load"))) 718 "bdver1-direct,bdver1-fpload,bdver1-ffma") 719(define_insn_reservation "bdver1_ssemuladd" 6 720 (and (eq_attr "cpu" "bdver1,bdver2") 721 (and (eq_attr "type" "ssemul,sseadd,sseadd1,ssemuladd") 722 (eq_attr "memory" "none"))) 723 "bdver1-direct,bdver1-fpsched,bdver1-ffma") 724(define_insn_reservation "bdver1_sseimul_load" 8 725 (and (eq_attr "cpu" "bdver1,bdver2") 726 (and (eq_attr "type" "sseimul") 727 (eq_attr "memory" "load"))) 728 "bdver1-direct,bdver1-fpload,bdver1-fmma") 729(define_insn_reservation "bdver1_sseimul" 4 730 (and (eq_attr "cpu" "bdver1,bdver2") 731 (and (eq_attr "type" "sseimul") 732 (eq_attr "memory" "none"))) 733 "bdver1-direct,bdver1-fpsched,bdver1-fmma") 734(define_insn_reservation "bdver1_sseiadd_load" 6 735 (and (eq_attr "cpu" "bdver1,bdver2") 736 (and (eq_attr "type" "sseiadd") 737 (eq_attr "memory" "load"))) 738 "bdver1-direct,bdver1-fpload,bdver1-fmal") 739(define_insn_reservation "bdver1_sseiadd" 2 740 (and (eq_attr "cpu" "bdver1,bdver2") 741 (and (eq_attr "type" "sseiadd") 742 (eq_attr "memory" "none"))) 743 "bdver1-direct,bdver1-fpsched,bdver1-fmal") 744 745;; SSE DIV: no throughput information (assume same as amdfam10). 746(define_insn_reservation "bdver1_ssediv_double_load_256" 31 747 (and (eq_attr "cpu" "bdver1,bdver2") 748 (and (eq_attr "type" "ssediv") 749 (and (eq_attr "mode" "V4DF") 750 (eq_attr "memory" "load")))) 751 "bdver1-double,bdver1-fpload,(bdver1-ffma0*17 | bdver1-ffma1*17)") 752(define_insn_reservation "bdver1_ssediv_double_256" 27 753 (and (eq_attr "cpu" "bdver1,bdver2") 754 (and (eq_attr "type" "ssediv") 755 (and (eq_attr "mode" "V4DF") 756 (eq_attr "memory" "none")))) 757 "bdver1-double,bdver1-fpsched,(bdver1-ffma0*17 | bdver1-ffma1*17)") 758(define_insn_reservation "bdver1_ssediv_single_load_256" 28 759 (and (eq_attr "cpu" "bdver1,bdver2") 760 (and (eq_attr "type" "ssediv") 761 (and (eq_attr "mode" "V8SF") 762 (eq_attr "memory" "load")))) 763 "bdver1-double,bdver1-fpload,(bdver1-ffma0*17 | bdver1-ffma1*17)") 764(define_insn_reservation "bdver1_ssediv_single_256" 24 765 (and (eq_attr "cpu" "bdver1,bdver2") 766 (and (eq_attr "type" "ssediv") 767 (and (eq_attr "mode" "V8SF") 768 (eq_attr "memory" "none")))) 769 "bdver1-double,bdver1-fpsched,(bdver1-ffma0*17 | bdver1-ffma1*17)") 770(define_insn_reservation "bdver1_ssediv_double_load" 31 771 (and (eq_attr "cpu" "bdver1,bdver2") 772 (and (eq_attr "type" "ssediv") 773 (and (eq_attr "mode" "DF,V2DF") 774 (eq_attr "memory" "load")))) 775 "bdver1-direct,bdver1-fpload,(bdver1-ffma0*17 | bdver1-ffma1*17)") 776(define_insn_reservation "bdver1_ssediv_double" 27 777 (and (eq_attr "cpu" "bdver1,bdver2") 778 (and (eq_attr "type" "ssediv") 779 (and (eq_attr "mode" "DF,V2DF") 780 (eq_attr "memory" "none")))) 781 "bdver1-direct,bdver1-fpsched,(bdver1-ffma0*17 | bdver1-ffma1*17)") 782(define_insn_reservation "bdver1_ssediv_single_load" 28 783 (and (eq_attr "cpu" "bdver1,bdver2") 784 (and (eq_attr "type" "ssediv") 785 (and (eq_attr "mode" "SF,V4SF") 786 (eq_attr "memory" "load")))) 787 "bdver1-direct,bdver1-fpload,(bdver1-ffma0*17 | bdver1-ffma1*17)") 788(define_insn_reservation "bdver1_ssediv_single" 24 789 (and (eq_attr "cpu" "bdver1,bdver2") 790 (and (eq_attr "type" "ssediv") 791 (and (eq_attr "mode" "SF,V4SF") 792 (eq_attr "memory" "none")))) 793 "bdver1-direct,bdver1-fpsched,(bdver1-ffma0*17 | bdver1-ffma1*17)") 794 795(define_insn_reservation "bdver1_sseins" 3 796 (and (eq_attr "cpu" "bdver1,bdver2") 797 (and (eq_attr "type" "sseins") 798 (eq_attr "mode" "TI"))) 799 "bdver1-direct,bdver1-fpsched,bdver1-fxbar") 800 801