1;; Copyright (C) 2010, Free Software Foundation, Inc. 2;; 3;; This file is part of GCC. 4;; 5;; GCC is free software; you can redistribute it and/or modify 6;; it under the terms of the GNU General Public License as published by 7;; the Free Software Foundation; either version 3, or (at your option) 8;; any later version. 9;; 10;; GCC is distributed in the hope that it will be useful, 11;; but WITHOUT ANY WARRANTY; without even the implied warranty of 12;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13;; GNU General Public License for more details. 14;; 15;; You should have received a copy of the GNU General Public License 16;; along with GCC; see the file COPYING3. If not see 17;; <http://www.gnu.org/licenses/>. 18;; 19;; AMD bdver1 Scheduling 20;; 21;; The bdver1 contains four pipelined FP units, two integer units and 22;; two address generation units. 23;; 24;; The predecode logic is determining boundaries of instructions in the 64 25;; byte cache line. So the cache line straddling problem of K6 might be issue 26;; here as well, but it is not noted in the documentation. 27;; 28;; Three DirectPath instructions decoders and only one VectorPath decoder 29;; is available. They can decode three DirectPath instructions or one 30;; VectorPath instruction per cycle. 31;; 32;; The load/store queue unit is not attached to the schedulers but 33;; communicates with all the execution units separately instead. 34 35 36(define_attr "bdver1_decode" "direct,vector,double" 37 (const_string "direct")) 38 39(define_automaton "bdver1,bdver1_int,bdver1_load,bdver1_mult,bdver1_fp") 40 41(define_cpu_unit "bdver1-decode0" "bdver1") 42(define_cpu_unit "bdver1-decode1" "bdver1") 43(define_cpu_unit "bdver1-decode2" "bdver1") 44(define_cpu_unit "bdver1-decodev" "bdver1") 45 46;; Model the fact that double decoded instruction may take 2 cycles 47;; to decode when decoder2 and decoder0 in next cycle 48;; is used (this is needed to allow throughput of 1.5 double decoded 49;; instructions per cycle). 50;; 51;; In order to avoid dependence between reservation of decoder 52;; and other units, we model decoder as two stage fully pipelined unit 53;; and only double decoded instruction may occupy unit in the first cycle. 54;; With this scheme however two double instructions can be issued cycle0. 55;; 56;; Avoid this by using presence set requiring decoder0 to be allocated 57;; too. Vector decoded instructions then can't be issued when modeled 58;; as consuming decoder0+decoder1+decoder2. 59;; We solve that by specialized vector decoder unit and exclusion set. 60(presence_set "bdver1-decode2" "bdver1-decode0") 61(exclusion_set "bdver1-decodev" "bdver1-decode0,bdver1-decode1,bdver1-decode2") 62 63(define_reservation "bdver1-vector" "nothing,bdver1-decodev") 64(define_reservation "bdver1-direct1" "nothing,bdver1-decode1") 65(define_reservation "bdver1-direct" "nothing, 66 (bdver1-decode0 | bdver1-decode1 67 | bdver1-decode2)") 68;; Double instructions behaves like two direct instructions. 69(define_reservation "bdver1-double" "((bdver1-decode2,bdver1-decode0) 70 | (nothing,(bdver1-decode0 + bdver1-decode1)) 71 | (nothing,(bdver1-decode1 + bdver1-decode2)))") 72 73 74(define_cpu_unit "bdver1-ieu0" "bdver1_int") 75(define_cpu_unit "bdver1-ieu1" "bdver1_int") 76(define_reservation "bdver1-ieu" "(bdver1-ieu0 | bdver1-ieu1)") 77 78(define_cpu_unit "bdver1-agu0" "bdver1_int") 79(define_cpu_unit "bdver1-agu1" "bdver1_int") 80(define_reservation "bdver1-agu" "(bdver1-agu0 | bdver1-agu1)") 81 82(define_cpu_unit "bdver1-mult" "bdver1_mult") 83 84(define_cpu_unit "bdver1-load0" "bdver1_load") 85(define_cpu_unit "bdver1-load1" "bdver1_load") 86(define_reservation "bdver1-load" "bdver1-agu, 87 (bdver1-load0 | bdver1-load1),nothing") 88;; 128bit SSE instructions issue two loads at once. 89(define_reservation "bdver1-load2" "bdver1-agu, 90 (bdver1-load0 + bdver1-load1),nothing") 91 92(define_reservation "bdver1-store" "(bdver1-load0 | bdver1-load1)") 93;; 128bit SSE instructions issue two stores at once. 94(define_reservation "bdver1-store2" "(bdver1-load0 + bdver1-load1)") 95 96;; The FP operations start to execute at stage 12 in the pipeline, while 97;; integer operations start to execute at stage 9 for athlon and 11 for K8 98;; Compensate the difference for athlon because it results in significantly 99;; smaller automata. 100;; NOTE: the above information was just copied from athlon.md, and was not 101;; actually verified for bdver1. 102(define_reservation "bdver1-fpsched" "nothing,nothing,nothing") 103;; The floating point loads. 104(define_reservation "bdver1-fpload" "(bdver1-fpsched + bdver1-load)") 105(define_reservation "bdver1-fpload2" "(bdver1-fpsched + bdver1-load2)") 106 107;; Four FP units. 108(define_cpu_unit "bdver1-ffma0" "bdver1_fp") 109(define_cpu_unit "bdver1-ffma1" "bdver1_fp") 110(define_cpu_unit "bdver1-fmal0" "bdver1_fp") 111(define_cpu_unit "bdver1-fmal1" "bdver1_fp") 112 113(define_reservation "bdver1-ffma" "(bdver1-ffma0 | bdver1-ffma1)") 114(define_reservation "bdver1-fcvt" "bdver1-ffma0") 115(define_reservation "bdver1-fmma" "bdver1-ffma0") 116(define_reservation "bdver1-fxbar" "bdver1-ffma1") 117(define_reservation "bdver1-fmal" "(bdver1-fmal0 | bdver1-fmal1)") 118(define_reservation "bdver1-fsto" "bdver1-fmal1") 119 120;; Vector operations usually consume many of pipes. 121(define_reservation "bdver1-fvector" "(bdver1-ffma0 + bdver1-ffma1 122 + bdver1-fmal0 + bdver1-fmal1)") 123 124;; Jump instructions are executed in the branch unit completely transparent to us. 125(define_insn_reservation "bdver1_call" 0 126 (and (eq_attr "cpu" "bdver1,bdver2") 127 (eq_attr "type" "call,callv")) 128 "bdver1-double,bdver1-agu,bdver1-ieu") 129;; PUSH mem is double path. 130(define_insn_reservation "bdver1_push" 1 131 (and (eq_attr "cpu" "bdver1,bdver2") 132 (eq_attr "type" "push")) 133 "bdver1-direct,bdver1-agu,bdver1-store") 134;; POP r16/mem are double path. 135(define_insn_reservation "bdver1_pop" 1 136 (and (eq_attr "cpu" "bdver1,bdver2") 137 (eq_attr "type" "pop")) 138 "bdver1-direct,(bdver1-ieu+bdver1-load)") 139;; LEAVE no latency info so far, assume same with amdfam10. 140(define_insn_reservation "bdver1_leave" 3 141 (and (eq_attr "cpu" "bdver1,bdver2") 142 (eq_attr "type" "leave")) 143 "bdver1-vector,(bdver1-ieu+bdver1-load)") 144;; LEA executes in AGU unit with 1 cycle latency on BDVER1. 145(define_insn_reservation "bdver1_lea" 1 146 (and (eq_attr "cpu" "bdver1,bdver2") 147 (eq_attr "type" "lea")) 148 "bdver1-direct,bdver1-agu,nothing") 149 150;; MUL executes in special multiplier unit attached to IEU1. 151(define_insn_reservation "bdver1_imul_DI" 6 152 (and (eq_attr "cpu" "bdver1,bdver2") 153 (and (eq_attr "type" "imul") 154 (and (eq_attr "mode" "DI") 155 (eq_attr "memory" "none,unknown")))) 156 "bdver1-direct1,bdver1-ieu1,bdver1-mult,nothing,bdver1-ieu1") 157(define_insn_reservation "bdver1_imul" 4 158 (and (eq_attr "cpu" "bdver1,bdver2") 159 (and (eq_attr "type" "imul") 160 (eq_attr "memory" "none,unknown"))) 161 "bdver1-direct1,bdver1-ieu1,bdver1-mult,bdver1-ieu1") 162(define_insn_reservation "bdver1_imul_mem_DI" 10 163 (and (eq_attr "cpu" "bdver1,bdver2") 164 (and (eq_attr "type" "imul") 165 (and (eq_attr "mode" "DI") 166 (eq_attr "memory" "load,both")))) 167 "bdver1-direct1,bdver1-load,bdver1-ieu,bdver1-mult,nothing,bdver1-ieu") 168(define_insn_reservation "bdver1_imul_mem" 8 169 (and (eq_attr "cpu" "bdver1,bdver2") 170 (and (eq_attr "type" "imul") 171 (eq_attr "memory" "load,both"))) 172 "bdver1-direct1,bdver1-load,bdver1-ieu,bdver1-mult,bdver1-ieu") 173 174;; IDIV cannot execute in parallel with other instructions. Dealing with it 175;; as with short latency vector instruction is good approximation avoiding 176;; scheduler from trying too hard to can hide it's latency by overlap with 177;; other instructions. 178;; ??? Experiments show that the IDIV can overlap with roughly 6 cycles 179;; of the other code. 180(define_insn_reservation "bdver1_idiv" 6 181 (and (eq_attr "cpu" "bdver1,bdver2") 182 (and (eq_attr "type" "idiv") 183 (eq_attr "memory" "none,unknown"))) 184 "bdver1-vector,(bdver1-ieu0*6+(bdver1-fpsched,bdver1-fvector))") 185 186(define_insn_reservation "bdver1_idiv_mem" 10 187 (and (eq_attr "cpu" "bdver1,bdver2") 188 (and (eq_attr "type" "idiv") 189 (eq_attr "memory" "load,both"))) 190 "bdver1-vector,((bdver1-load,bdver1-ieu0*6)+(bdver1-fpsched,bdver1-fvector))") 191 192;; The parallelism of string instructions is not documented. Model it same way 193;; as IDIV to create smaller automata. This probably does not matter much. 194;; Using the same heuristics for bdver1 as amdfam10 and K8 with IDIV. 195(define_insn_reservation "bdver1_str" 6 196 (and (eq_attr "cpu" "bdver1,bdver2") 197 (and (eq_attr "type" "str") 198 (eq_attr "memory" "load,both,store"))) 199 "bdver1-vector,bdver1-load,bdver1-ieu0*6") 200 201;; Integer instructions. 202(define_insn_reservation "bdver1_idirect" 1 203 (and (eq_attr "cpu" "bdver1,bdver2") 204 (and (eq_attr "bdver1_decode" "direct") 205 (and (eq_attr "unit" "integer,unknown") 206 (eq_attr "memory" "none,unknown")))) 207 "bdver1-direct,bdver1-ieu") 208(define_insn_reservation "bdver1_ivector" 2 209 (and (eq_attr "cpu" "bdver1,bdver2") 210 (and (eq_attr "bdver1_decode" "vector") 211 (and (eq_attr "unit" "integer,unknown") 212 (eq_attr "memory" "none,unknown")))) 213 "bdver1-vector,bdver1-ieu,bdver1-ieu") 214(define_insn_reservation "bdver1_idirect_loadmov" 4 215 (and (eq_attr "cpu" "bdver1,bdver2") 216 (and (eq_attr "type" "imov") 217 (eq_attr "memory" "load"))) 218 "bdver1-direct,bdver1-load") 219(define_insn_reservation "bdver1_idirect_load" 5 220 (and (eq_attr "cpu" "bdver1,bdver2") 221 (and (eq_attr "bdver1_decode" "direct") 222 (and (eq_attr "unit" "integer,unknown") 223 (eq_attr "memory" "load")))) 224 "bdver1-direct,bdver1-load,bdver1-ieu") 225(define_insn_reservation "bdver1_ivector_load" 6 226 (and (eq_attr "cpu" "bdver1,bdver2") 227 (and (eq_attr "bdver1_decode" "vector") 228 (and (eq_attr "unit" "integer,unknown") 229 (eq_attr "memory" "load")))) 230 "bdver1-vector,bdver1-load,bdver1-ieu,bdver1-ieu") 231(define_insn_reservation "bdver1_idirect_movstore" 4 232 (and (eq_attr "cpu" "bdver1,bdver2") 233 (and (eq_attr "type" "imov") 234 (eq_attr "memory" "store"))) 235 "bdver1-direct,bdver1-agu,bdver1-store") 236(define_insn_reservation "bdver1_idirect_both" 4 237 (and (eq_attr "cpu" "bdver1,bdver2") 238 (and (eq_attr "bdver1_decode" "direct") 239 (and (eq_attr "unit" "integer,unknown") 240 (eq_attr "memory" "both")))) 241 "bdver1-direct,bdver1-load, 242 bdver1-ieu,bdver1-store, 243 bdver1-store") 244(define_insn_reservation "bdver1_ivector_both" 5 245 (and (eq_attr "cpu" "bdver1,bdver2") 246 (and (eq_attr "bdver1_decode" "vector") 247 (and (eq_attr "unit" "integer,unknown") 248 (eq_attr "memory" "both")))) 249 "bdver1-vector,bdver1-load, 250 bdver1-ieu, 251 bdver1-ieu, 252 bdver1-store") 253(define_insn_reservation "bdver1_idirect_store" 4 254 (and (eq_attr "cpu" "bdver1,bdver2") 255 (and (eq_attr "bdver1_decode" "direct") 256 (and (eq_attr "unit" "integer,unknown") 257 (eq_attr "memory" "store")))) 258 "bdver1-direct,(bdver1-ieu+bdver1-agu), 259 bdver1-store") 260(define_insn_reservation "bdver1_ivector_store" 5 261 (and (eq_attr "cpu" "bdver1,bdver2") 262 (and (eq_attr "bdver1_decode" "vector") 263 (and (eq_attr "unit" "integer,unknown") 264 (eq_attr "memory" "store")))) 265 "bdver1-vector,(bdver1-ieu+bdver1-agu),bdver1-ieu, 266 bdver1-store") 267 268;; BDVER1 floating point units. 269(define_insn_reservation "bdver1_fldxf" 13 270 (and (eq_attr "cpu" "bdver1,bdver2") 271 (and (eq_attr "type" "fmov") 272 (and (eq_attr "memory" "load") 273 (eq_attr "mode" "XF")))) 274 "bdver1-vector,bdver1-fpload2,bdver1-fvector*9") 275(define_insn_reservation "bdver1_fld" 5 276 (and (eq_attr "cpu" "bdver1,bdver2") 277 (and (eq_attr "type" "fmov") 278 (eq_attr "memory" "load"))) 279 "bdver1-direct,bdver1-fpload,bdver1-ffma") 280(define_insn_reservation "bdver1_fstxf" 8 281 (and (eq_attr "cpu" "bdver1,bdver2") 282 (and (eq_attr "type" "fmov") 283 (and (eq_attr "memory" "store,both") 284 (eq_attr "mode" "XF")))) 285 "bdver1-vector,(bdver1-fpsched+bdver1-agu),(bdver1-store2+(bdver1-fvector*6))") 286(define_insn_reservation "bdver1_fst" 2 287 (and (eq_attr "cpu" "bdver1,bdver2") 288 (and (eq_attr "type" "fmov") 289 (eq_attr "memory" "store,both"))) 290 "bdver1-double,(bdver1-fpsched+bdver1-agu),(bdver1-fsto+bdver1-store)") 291(define_insn_reservation "bdver1_fist" 2 292 (and (eq_attr "cpu" "bdver1,bdver2") 293 (eq_attr "type" "fistp,fisttp")) 294 "bdver1-double,(bdver1-fpsched+bdver1-agu),(bdver1-fsto+bdver1-store)") 295(define_insn_reservation "bdver1_fmov_bdver1" 2 296 (and (eq_attr "cpu" "bdver1,bdver2") 297 (eq_attr "type" "fmov")) 298 "bdver1-direct,bdver1-fpsched,bdver1-ffma") 299(define_insn_reservation "bdver1_fadd_load" 10 300 (and (eq_attr "cpu" "bdver1,bdver2") 301 (and (eq_attr "type" "fop") 302 (eq_attr "memory" "load"))) 303 "bdver1-direct,bdver1-fpload,bdver1-ffma") 304(define_insn_reservation "bdver1_fadd" 6 305 (and (eq_attr "cpu" "bdver1,bdver2") 306 (eq_attr "type" "fop")) 307 "bdver1-direct,bdver1-fpsched,bdver1-ffma") 308(define_insn_reservation "bdver1_fmul_load" 10 309 (and (eq_attr "cpu" "bdver1,bdver2") 310 (and (eq_attr "type" "fmul") 311 (eq_attr "memory" "load"))) 312 "bdver1-double,bdver1-fpload,bdver1-ffma") 313(define_insn_reservation "bdver1_fmul" 6 314 (and (eq_attr "cpu" "bdver1,bdver2") 315 (eq_attr "type" "fmul")) 316 "bdver1-direct,bdver1-fpsched,bdver1-ffma") 317(define_insn_reservation "bdver1_fsgn" 2 318 (and (eq_attr "cpu" "bdver1,bdver2") 319 (eq_attr "type" "fsgn")) 320 "bdver1-direct,bdver1-fpsched,bdver1-ffma") 321(define_insn_reservation "bdver1_fdiv_load" 46 322 (and (eq_attr "cpu" "bdver1,bdver2") 323 (and (eq_attr "type" "fdiv") 324 (eq_attr "memory" "load"))) 325 "bdver1-direct,bdver1-fpload,bdver1-ffma") 326(define_insn_reservation "bdver1_fdiv" 42 327 (and (eq_attr "cpu" "bdver1,bdver2") 328 (eq_attr "type" "fdiv")) 329 "bdver1-direct,bdver1-fpsched,bdver1-ffma") 330(define_insn_reservation "bdver1_fpspc_load" 103 331 (and (eq_attr "cpu" "bdver1,bdver2") 332 (and (eq_attr "type" "fpspc") 333 (eq_attr "memory" "load"))) 334 "bdver1-vector,bdver1-fpload,bdver1-fvector") 335(define_insn_reservation "bdver1_fpspc" 100 336 (and (eq_attr "cpu" "bdver1,bdver2") 337 (and (eq_attr "type" "fpspc") 338 (eq_attr "memory" "load"))) 339 "bdver1-vector,bdver1-fpload,bdver1-fvector") 340(define_insn_reservation "bdver1_fcmov_load" 17 341 (and (eq_attr "cpu" "bdver1,bdver2") 342 (and (eq_attr "type" "fcmov") 343 (eq_attr "memory" "load"))) 344 "bdver1-vector,bdver1-fpload,bdver1-fvector") 345(define_insn_reservation "bdver1_fcmov" 15 346 (and (eq_attr "cpu" "bdver1,bdver2") 347 (eq_attr "type" "fcmov")) 348 "bdver1-vector,bdver1-fpsched,bdver1-fvector") 349(define_insn_reservation "bdver1_fcomi_load" 6 350 (and (eq_attr "cpu" "bdver1,bdver2") 351 (and (eq_attr "type" "fcmp") 352 (and (eq_attr "bdver1_decode" "double") 353 (eq_attr "memory" "load")))) 354 "bdver1-double,bdver1-fpload,(bdver1-ffma | bdver1-fsto)") 355(define_insn_reservation "bdver1_fcomi" 2 356 (and (eq_attr "cpu" "bdver1,bdver2") 357 (and (eq_attr "bdver1_decode" "double") 358 (eq_attr "type" "fcmp"))) 359 "bdver1-double,bdver1-fpsched,(bdver1-ffma | bdver1-fsto)") 360(define_insn_reservation "bdver1_fcom_load" 6 361 (and (eq_attr "cpu" "bdver1,bdver2") 362 (and (eq_attr "type" "fcmp") 363 (eq_attr "memory" "load"))) 364 "bdver1-direct,bdver1-fpload,bdver1-ffma") 365(define_insn_reservation "bdver1_fcom" 2 366 (and (eq_attr "cpu" "bdver1,bdver2") 367 (eq_attr "type" "fcmp")) 368 "bdver1-direct,bdver1-fpsched,bdver1-ffma") 369(define_insn_reservation "bdver1_fxch" 2 370 (and (eq_attr "cpu" "bdver1,bdver2") 371 (eq_attr "type" "fxch")) 372 "bdver1-direct,bdver1-fpsched,bdver1-ffma") 373 374;; SSE loads. 375(define_insn_reservation "bdver1_ssevector_avx128_unaligned_load" 4 376 (and (eq_attr "cpu" "bdver1,bdver2") 377 (and (eq_attr "type" "ssemov") 378 (and (eq_attr "prefix" "vex") 379 (and (eq_attr "movu" "1") 380 (and (eq_attr "mode" "V4SF,V2DF") 381 (eq_attr "memory" "load")))))) 382 "bdver1-direct,bdver1-fpload") 383(define_insn_reservation "bdver1_ssevector_avx256_unaligned_load" 5 384 (and (eq_attr "cpu" "bdver1,bdver2") 385 (and (eq_attr "type" "ssemov") 386 (and (eq_attr "movu" "1") 387 (and (eq_attr "mode" "V8SF,V4DF") 388 (eq_attr "memory" "load"))))) 389 "bdver1-double,bdver1-fpload") 390(define_insn_reservation "bdver1_ssevector_sse128_unaligned_load" 4 391 (and (eq_attr "cpu" "bdver1,bdver2") 392 (and (eq_attr "type" "ssemov") 393 (and (eq_attr "movu" "1") 394 (and (eq_attr "mode" "V4SF,V2DF") 395 (eq_attr "memory" "load"))))) 396 "bdver1-direct,bdver1-fpload,bdver1-fmal") 397(define_insn_reservation "bdver1_ssevector_avx128_load" 4 398 (and (eq_attr "cpu" "bdver1,bdver2") 399 (and (eq_attr "type" "ssemov") 400 (and (eq_attr "prefix" "vex") 401 (and (eq_attr "mode" "V4SF,V2DF,TI") 402 (eq_attr "memory" "load"))))) 403 "bdver1-direct,bdver1-fpload,bdver1-fmal") 404(define_insn_reservation "bdver1_ssevector_avx256_load" 5 405 (and (eq_attr "cpu" "bdver1,bdver2") 406 (and (eq_attr "type" "ssemov") 407 (and (eq_attr "mode" "V8SF,V4DF,OI") 408 (eq_attr "memory" "load")))) 409 "bdver1-double,bdver1-fpload,bdver1-fmal") 410(define_insn_reservation "bdver1_ssevector_sse128_load" 4 411 (and (eq_attr "cpu" "bdver1,bdver2") 412 (and (eq_attr "type" "ssemov") 413 (and (eq_attr "mode" "V4SF,V2DF,TI") 414 (eq_attr "memory" "load")))) 415 "bdver1-direct,bdver1-fpload") 416(define_insn_reservation "bdver1_ssescalar_movq_load" 4 417 (and (eq_attr "cpu" "bdver1,bdver2") 418 (and (eq_attr "type" "ssemov") 419 (and (eq_attr "mode" "DI") 420 (eq_attr "memory" "load")))) 421 "bdver1-direct,bdver1-fpload,bdver1-fmal") 422(define_insn_reservation "bdver1_ssescalar_vmovss_load" 4 423 (and (eq_attr "cpu" "bdver1,bdver2") 424 (and (eq_attr "type" "ssemov") 425 (and (eq_attr "prefix" "vex") 426 (and (eq_attr "mode" "SF") 427 (eq_attr "memory" "load"))))) 428 "bdver1-direct,bdver1-fpload") 429(define_insn_reservation "bdver1_ssescalar_sse128_load" 4 430 (and (eq_attr "cpu" "bdver1,bdver2") 431 (and (eq_attr "type" "ssemov") 432 (and (eq_attr "mode" "SF,DF") 433 (eq_attr "memory" "load")))) 434 "bdver1-direct,bdver1-fpload, bdver1-ffma") 435(define_insn_reservation "bdver1_mmxsse_load" 4 436 (and (eq_attr "cpu" "bdver1,bdver2") 437 (and (eq_attr "type" "mmxmov,ssemov") 438 (eq_attr "memory" "load"))) 439 "bdver1-direct,bdver1-fpload, bdver1-fmal") 440 441;; SSE stores. 442(define_insn_reservation "bdver1_sse_store_avx256" 5 443 (and (eq_attr "cpu" "bdver1,bdver2") 444 (and (eq_attr "type" "ssemov") 445 (and (eq_attr "mode" "V8SF,V4DF,OI") 446 (eq_attr "memory" "store,both")))) 447 "bdver1-double,(bdver1-fpsched+bdver1-agu),((bdver1-fsto+bdver1-store)*2)") 448(define_insn_reservation "bdver1_sse_store" 4 449 (and (eq_attr "cpu" "bdver1,bdver2") 450 (and (eq_attr "type" "ssemov") 451 (and (eq_attr "mode" "V4SF,V2DF,TI") 452 (eq_attr "memory" "store,both")))) 453 "bdver1-direct,(bdver1-fpsched+bdver1-agu),((bdver1-fsto+bdver1-store)*2)") 454(define_insn_reservation "bdver1_mmxsse_store_short" 4 455 (and (eq_attr "cpu" "bdver1,bdver2") 456 (and (eq_attr "type" "mmxmov,ssemov") 457 (eq_attr "memory" "store,both"))) 458 "bdver1-direct,(bdver1-fpsched+bdver1-agu),(bdver1-fsto+bdver1-store)") 459 460;; Register moves. 461(define_insn_reservation "bdver1_ssevector_avx256" 3 462 (and (eq_attr "cpu" "bdver1,bdver2") 463 (and (eq_attr "type" "ssemov") 464 (and (eq_attr "mode" "V8SF,V4DF,OI") 465 (eq_attr "memory" "none")))) 466 "bdver1-double,bdver1-fpsched,bdver1-fmal") 467(define_insn_reservation "bdver1_movss_movsd" 2 468 (and (eq_attr "cpu" "bdver1,bdver2") 469 (and (eq_attr "type" "ssemov") 470 (and (eq_attr "mode" "SF,DF") 471 (eq_attr "memory" "none")))) 472 "bdver1-direct,bdver1-fpsched,bdver1-ffma") 473(define_insn_reservation "bdver1_mmxssemov" 2 474 (and (eq_attr "cpu" "bdver1,bdver2") 475 (and (eq_attr "type" "mmxmov,ssemov") 476 (eq_attr "memory" "none"))) 477 "bdver1-direct,bdver1-fpsched,bdver1-fmal") 478;; SSE logs. 479(define_insn_reservation "bdver1_sselog_load_256" 7 480 (and (eq_attr "cpu" "bdver1,bdver2") 481 (and (eq_attr "type" "sselog,sselog1") 482 (and (eq_attr "mode" "V8SF") 483 (eq_attr "memory" "load")))) 484 "bdver1-double,bdver1-fpload,bdver1-fmal") 485(define_insn_reservation "bdver1_sselog_256" 3 486 (and (eq_attr "cpu" "bdver1,bdver2") 487 (and (eq_attr "type" "sselog,sselog1") 488 (eq_attr "mode" "V8SF"))) 489 "bdver1-double,bdver1-fpsched,bdver1-fmal") 490(define_insn_reservation "bdver1_sselog_load" 6 491 (and (eq_attr "cpu" "bdver1,bdver2") 492 (and (eq_attr "type" "sselog,sselog1") 493 (eq_attr "memory" "load"))) 494 "bdver1-direct,bdver1-fpload,bdver1-fxbar") 495(define_insn_reservation "bdver1_sselog" 2 496 (and (eq_attr "cpu" "bdver1,bdver2") 497 (eq_attr "type" "sselog,sselog1")) 498 "bdver1-direct,bdver1-fpsched,bdver1-fxbar") 499 500;; PCMP actually executes in FMAL. 501(define_insn_reservation "bdver1_ssecmp_load" 6 502 (and (eq_attr "cpu" "bdver1,bdver2") 503 (and (eq_attr "type" "ssecmp") 504 (eq_attr "memory" "load"))) 505 "bdver1-direct,bdver1-fpload,bdver1-ffma") 506(define_insn_reservation "bdver1_ssecmp" 2 507 (and (eq_attr "cpu" "bdver1,bdver2") 508 (eq_attr "type" "ssecmp")) 509 "bdver1-direct,bdver1-fpsched,bdver1-ffma") 510(define_insn_reservation "bdver1_ssecomi_load" 6 511 (and (eq_attr "cpu" "bdver1,bdver2") 512 (and (eq_attr "type" "ssecomi") 513 (eq_attr "memory" "load"))) 514 "bdver1-double,bdver1-fpload,(bdver1-ffma | bdver1-fsto)") 515(define_insn_reservation "bdver1_ssecomi" 2 516 (and (eq_attr "cpu" "bdver1,bdver2") 517 (eq_attr "type" "ssecomi")) 518 "bdver1-double,bdver1-fpsched,(bdver1-ffma | bdver1-fsto)") 519 520;; Conversions behaves very irregularly and the scheduling is critical here. 521;; Take each instruction separately. 522 523;; 256 bit conversion. 524(define_insn_reservation "bdver1_vcvtX2Y_avx256_load" 8 525 (and (eq_attr "cpu" "bdver1,bdver2") 526 (and (eq_attr "type" "ssecvt") 527 (and (eq_attr "memory" "load") 528 (ior (ior (match_operand:V4DF 0 "register_operand") 529 (ior (match_operand:V8SF 0 "register_operand") 530 (match_operand:V8SI 0 "register_operand"))) 531 (ior (match_operand:V4DF 1 "nonimmediate_operand") 532 (ior (match_operand:V8SF 1 "nonimmediate_operand") 533 (match_operand:V8SI 1 "nonimmediate_operand"))))))) 534 "bdver1-vector,bdver1-fpload,bdver1-fvector") 535(define_insn_reservation "bdver1_vcvtX2Y_avx256" 4 536 (and (eq_attr "cpu" "bdver1,bdver2") 537 (and (eq_attr "type" "ssecvt") 538 (and (eq_attr "memory" "none") 539 (ior (ior (match_operand:V4DF 0 "register_operand") 540 (ior (match_operand:V8SF 0 "register_operand") 541 (match_operand:V8SI 0 "register_operand"))) 542 (ior (match_operand:V4DF 1 "nonimmediate_operand") 543 (ior (match_operand:V8SF 1 "nonimmediate_operand") 544 (match_operand:V8SI 1 "nonimmediate_operand"))))))) 545 "bdver1-vector,bdver1-fpsched,bdver1-fvector") 546;; CVTSS2SD, CVTSD2SS. 547(define_insn_reservation "bdver1_ssecvt_cvtss2sd_load" 8 548 (and (eq_attr "cpu" "bdver1,bdver2") 549 (and (eq_attr "type" "ssecvt") 550 (and (eq_attr "mode" "SF,DF") 551 (eq_attr "memory" "load")))) 552 "bdver1-direct,bdver1-fpload,bdver1-fcvt") 553(define_insn_reservation "bdver1_ssecvt_cvtss2sd" 4 554 (and (eq_attr "cpu" "bdver1,bdver2") 555 (and (eq_attr "type" "ssecvt") 556 (and (eq_attr "mode" "SF,DF") 557 (eq_attr "memory" "none")))) 558 "bdver1-direct,bdver1-fpsched,bdver1-fcvt") 559;; CVTSI2SD, CVTSI2SS, CVTSI2SDQ, CVTSI2SSQ. 560(define_insn_reservation "bdver1_sseicvt_cvtsi2sd_load" 8 561 (and (eq_attr "cpu" "bdver1,bdver2") 562 (and (eq_attr "type" "sseicvt") 563 (and (eq_attr "mode" "SF,DF") 564 (eq_attr "memory" "load")))) 565 "bdver1-direct,bdver1-fpload,bdver1-fcvt") 566(define_insn_reservation "bdver1_sseicvt_cvtsi2sd" 4 567 (and (eq_attr "cpu" "bdver1,bdver2") 568 (and (eq_attr "type" "sseicvt") 569 (and (eq_attr "mode" "SF,DF") 570 (eq_attr "memory" "none")))) 571 "bdver1-double,bdver1-fpsched,(nothing | bdver1-fcvt)") 572;; CVTPD2PS. 573(define_insn_reservation "bdver1_ssecvt_cvtpd2ps_load" 8 574 (and (eq_attr "cpu" "bdver1,bdver2") 575 (and (eq_attr "type" "ssecvt") 576 (and (eq_attr "memory" "load") 577 (and (match_operand:V4SF 0 "register_operand") 578 (match_operand:V2DF 1 "nonimmediate_operand"))))) 579 "bdver1-double,bdver1-fpload,(bdver1-fxbar | bdver1-fcvt)") 580(define_insn_reservation "bdver1_ssecvt_cvtpd2ps" 4 581 (and (eq_attr "cpu" "bdver1,bdver2") 582 (and (eq_attr "type" "ssecvt") 583 (and (eq_attr "memory" "none") 584 (and (match_operand:V4SF 0 "register_operand") 585 (match_operand:V2DF 1 "nonimmediate_operand"))))) 586 "bdver1-double,bdver1-fpsched,(bdver1-fxbar | bdver1-fcvt)") 587;; CVTPI2PS, CVTDQ2PS. 588(define_insn_reservation "bdver1_ssecvt_cvtdq2ps_load" 8 589 (and (eq_attr "cpu" "bdver1,bdver2") 590 (and (eq_attr "type" "ssecvt") 591 (and (eq_attr "memory" "load") 592 (and (match_operand:V4SF 0 "register_operand") 593 (ior (match_operand:V2SI 1 "nonimmediate_operand") 594 (match_operand:V4SI 1 "nonimmediate_operand")))))) 595 "bdver1-direct,bdver1-fpload,bdver1-fcvt") 596(define_insn_reservation "bdver1_ssecvt_cvtdq2ps" 4 597 (and (eq_attr "cpu" "bdver1,bdver2") 598 (and (eq_attr "type" "ssecvt") 599 (and (eq_attr "memory" "none") 600 (and (match_operand:V4SF 0 "register_operand") 601 (ior (match_operand:V2SI 1 "nonimmediate_operand") 602 (match_operand:V4SI 1 "nonimmediate_operand")))))) 603 "bdver1-direct,bdver1-fpsched,bdver1-fcvt") 604;; CVTDQ2PD. 605(define_insn_reservation "bdver1_ssecvt_cvtdq2pd_load" 8 606 (and (eq_attr "cpu" "bdver1,bdver2") 607 (and (eq_attr "type" "ssecvt") 608 (and (eq_attr "memory" "load") 609 (and (match_operand:V2DF 0 "register_operand") 610 (match_operand:V4SI 1 "nonimmediate_operand"))))) 611 "bdver1-double,bdver1-fpload,(bdver1-fxbar | bdver1-fcvt)") 612(define_insn_reservation "bdver1_ssecvt_cvtdq2pd" 4 613 (and (eq_attr "cpu" "bdver1,bdver2") 614 (and (eq_attr "type" "ssecvt") 615 (and (eq_attr "memory" "none") 616 (and (match_operand:V2DF 0 "register_operand") 617 (match_operand:V4SI 1 "nonimmediate_operand"))))) 618 "bdver1-double,bdver1-fpsched,(bdver1-fxbar | bdver1-fcvt)") 619;; CVTPS2PD, CVTPI2PD. 620(define_insn_reservation "bdver1_ssecvt_cvtps2pd_load" 6 621 (and (eq_attr "cpu" "bdver1,bdver2") 622 (and (eq_attr "type" "ssecvt") 623 (and (eq_attr "memory" "load") 624 (and (match_operand:V2DF 0 "register_operand") 625 (ior (match_operand:V2SI 1 "nonimmediate_operand") 626 (match_operand:V4SF 1 "nonimmediate_operand")))))) 627 "bdver1-double,bdver1-fpload,(bdver1-fxbar | bdver1-fcvt)") 628(define_insn_reservation "bdver1_ssecvt_cvtps2pd" 2 629 (and (eq_attr "cpu" "bdver1,bdver2") 630 (and (eq_attr "type" "ssecvt") 631 (and (eq_attr "memory" "load") 632 (and (match_operand:V2DF 0 "register_operand") 633 (ior (match_operand:V2SI 1 "nonimmediate_operand") 634 (match_operand:V4SF 1 "nonimmediate_operand")))))) 635 "bdver1-double,bdver1-fpsched,(bdver1-fxbar | bdver1-fcvt)") 636;; CVTSD2SI, CVTSD2SIQ, CVTSS2SI, CVTSS2SIQ, CVTTSD2SI, CVTTSD2SIQ, CVTTSS2SI, CVTTSS2SIQ. 637(define_insn_reservation "bdver1_ssecvt_cvtsX2si_load" 8 638 (and (eq_attr "cpu" "bdver1,bdver2") 639 (and (eq_attr "type" "sseicvt") 640 (and (eq_attr "mode" "SI,DI") 641 (eq_attr "memory" "load")))) 642 "bdver1-double,bdver1-fpload,(bdver1-fcvt | bdver1-fsto)") 643(define_insn_reservation "bdver1_ssecvt_cvtsX2si" 4 644 (and (eq_attr "cpu" "bdver1,bdver2") 645 (and (eq_attr "type" "sseicvt") 646 (and (eq_attr "mode" "SI,DI") 647 (eq_attr "memory" "none")))) 648 "bdver1-double,bdver1-fpsched,(bdver1-fcvt | bdver1-fsto)") 649;; CVTPD2PI, CVTTPD2PI. 650(define_insn_reservation "bdver1_ssecvt_cvtpd2pi_load" 8 651 (and (eq_attr "cpu" "bdver1,bdver2") 652 (and (eq_attr "type" "ssecvt") 653 (and (eq_attr "memory" "load") 654 (and (match_operand:V2DF 1 "nonimmediate_operand") 655 (match_operand:V2SI 0 "register_operand"))))) 656 "bdver1-double,bdver1-fpload,(bdver1-fcvt | bdver1-fxbar)") 657(define_insn_reservation "bdver1_ssecvt_cvtpd2pi" 4 658 (and (eq_attr "cpu" "bdver1,bdver2") 659 (and (eq_attr "type" "ssecvt") 660 (and (eq_attr "memory" "none") 661 (and (match_operand:V2DF 1 "nonimmediate_operand") 662 (match_operand:V2SI 0 "register_operand"))))) 663 "bdver1-double,bdver1-fpsched,(bdver1-fcvt | bdver1-fxbar)") 664;; CVTPD2DQ, CVTTPD2DQ. 665(define_insn_reservation "bdver1_ssecvt_cvtpd2dq_load" 6 666 (and (eq_attr "cpu" "bdver1,bdver2") 667 (and (eq_attr "type" "ssecvt") 668 (and (eq_attr "memory" "load") 669 (and (match_operand:V2DF 1 "nonimmediate_operand") 670 (match_operand:V4SI 0 "register_operand"))))) 671 "bdver1-double,bdver1-fpload,(bdver1-fcvt | bdver1-fxbar)") 672(define_insn_reservation "bdver1_ssecvt_cvtpd2dq" 2 673 (and (eq_attr "cpu" "bdver1,bdver2") 674 (and (eq_attr "type" "ssecvt") 675 (and (eq_attr "memory" "none") 676 (and (match_operand:V2DF 1 "nonimmediate_operand") 677 (match_operand:V4SI 0 "register_operand"))))) 678 "bdver1-double,bdver1-fpsched,(bdver1-fcvt | bdver1-fxbar)") 679;; CVTPS2PI, CVTTPS2PI, CVTPS2DQ, CVTTPS2DQ. 680(define_insn_reservation "bdver1_ssecvt_cvtps2pi_load" 8 681 (and (eq_attr "cpu" "bdver1,bdver2") 682 (and (eq_attr "type" "ssecvt") 683 (and (eq_attr "memory" "load") 684 (and (match_operand:V4SF 1 "nonimmediate_operand") 685 (ior (match_operand: V2SI 0 "register_operand") 686 (match_operand: V4SI 0 "register_operand")))))) 687 "bdver1-direct,bdver1-fpload,bdver1-fcvt") 688(define_insn_reservation "bdver1_ssecvt_cvtps2pi" 4 689 (and (eq_attr "cpu" "bdver1,bdver2") 690 (and (eq_attr "type" "ssecvt") 691 (and (eq_attr "memory" "none") 692 (and (match_operand:V4SF 1 "nonimmediate_operand") 693 (ior (match_operand: V2SI 0 "register_operand") 694 (match_operand: V4SI 0 "register_operand")))))) 695 "bdver1-direct,bdver1-fpsched,bdver1-fcvt") 696 697;; SSE MUL, ADD, and MULADD. 698(define_insn_reservation "bdver1_ssemuladd_load_256" 11 699 (and (eq_attr "cpu" "bdver1,bdver2") 700 (and (eq_attr "type" "ssemul,sseadd,ssemuladd") 701 (and (eq_attr "mode" "V8SF,V4DF") 702 (eq_attr "memory" "load")))) 703 "bdver1-double,bdver1-fpload,bdver1-ffma") 704(define_insn_reservation "bdver1_ssemuladd_256" 7 705 (and (eq_attr "cpu" "bdver1,bdver2") 706 (and (eq_attr "type" "ssemul,sseadd,ssemuladd") 707 (and (eq_attr "mode" "V8SF,V4DF") 708 (eq_attr "memory" "none")))) 709 "bdver1-double,bdver1-fpsched,bdver1-ffma") 710(define_insn_reservation "bdver1_ssemuladd_load" 10 711 (and (eq_attr "cpu" "bdver1,bdver2") 712 (and (eq_attr "type" "ssemul,sseadd,ssemuladd") 713 (eq_attr "memory" "load"))) 714 "bdver1-direct,bdver1-fpload,bdver1-ffma") 715(define_insn_reservation "bdver1_ssemuladd" 6 716 (and (eq_attr "cpu" "bdver1,bdver2") 717 (and (eq_attr "type" "ssemul,sseadd,ssemuladd") 718 (eq_attr "memory" "none"))) 719 "bdver1-direct,bdver1-fpsched,bdver1-ffma") 720(define_insn_reservation "bdver1_sseimul_load" 8 721 (and (eq_attr "cpu" "bdver1,bdver2") 722 (and (eq_attr "type" "sseimul") 723 (eq_attr "memory" "load"))) 724 "bdver1-direct,bdver1-fpload,bdver1-fmma") 725(define_insn_reservation "bdver1_sseimul" 4 726 (and (eq_attr "cpu" "bdver1,bdver2") 727 (and (eq_attr "type" "sseimul") 728 (eq_attr "memory" "none"))) 729 "bdver1-direct,bdver1-fpsched,bdver1-fmma") 730(define_insn_reservation "bdver1_sseiadd_load" 6 731 (and (eq_attr "cpu" "bdver1,bdver2") 732 (and (eq_attr "type" "sseiadd") 733 (eq_attr "memory" "load"))) 734 "bdver1-direct,bdver1-fpload,bdver1-fmal") 735(define_insn_reservation "bdver1_sseiadd" 2 736 (and (eq_attr "cpu" "bdver1,bdver2") 737 (and (eq_attr "type" "sseiadd") 738 (eq_attr "memory" "none"))) 739 "bdver1-direct,bdver1-fpsched,bdver1-fmal") 740 741;; SSE DIV: no throughput information (assume same as amdfam10). 742(define_insn_reservation "bdver1_ssediv_double_load_256" 31 743 (and (eq_attr "cpu" "bdver1,bdver2") 744 (and (eq_attr "type" "ssediv") 745 (and (eq_attr "mode" "V4DF") 746 (eq_attr "memory" "load")))) 747 "bdver1-double,bdver1-fpload,(bdver1-ffma0*17 | bdver1-ffma1*17)") 748(define_insn_reservation "bdver1_ssediv_double_256" 27 749 (and (eq_attr "cpu" "bdver1,bdver2") 750 (and (eq_attr "type" "ssediv") 751 (and (eq_attr "mode" "V4DF") 752 (eq_attr "memory" "none")))) 753 "bdver1-double,bdver1-fpsched,(bdver1-ffma0*17 | bdver1-ffma1*17)") 754(define_insn_reservation "bdver1_ssediv_single_load_256" 28 755 (and (eq_attr "cpu" "bdver1,bdver2") 756 (and (eq_attr "type" "ssediv") 757 (and (eq_attr "mode" "V8SF") 758 (eq_attr "memory" "load")))) 759 "bdver1-double,bdver1-fpload,(bdver1-ffma0*17 | bdver1-ffma1*17)") 760(define_insn_reservation "bdver1_ssediv_single_256" 24 761 (and (eq_attr "cpu" "bdver1,bdver2") 762 (and (eq_attr "type" "ssediv") 763 (and (eq_attr "mode" "V8SF") 764 (eq_attr "memory" "none")))) 765 "bdver1-double,bdver1-fpsched,(bdver1-ffma0*17 | bdver1-ffma1*17)") 766(define_insn_reservation "bdver1_ssediv_double_load" 31 767 (and (eq_attr "cpu" "bdver1,bdver2") 768 (and (eq_attr "type" "ssediv") 769 (and (eq_attr "mode" "DF,V2DF") 770 (eq_attr "memory" "load")))) 771 "bdver1-direct,bdver1-fpload,(bdver1-ffma0*17 | bdver1-ffma1*17)") 772(define_insn_reservation "bdver1_ssediv_double" 27 773 (and (eq_attr "cpu" "bdver1,bdver2") 774 (and (eq_attr "type" "ssediv") 775 (and (eq_attr "mode" "DF,V2DF") 776 (eq_attr "memory" "none")))) 777 "bdver1-direct,bdver1-fpsched,(bdver1-ffma0*17 | bdver1-ffma1*17)") 778(define_insn_reservation "bdver1_ssediv_single_load" 28 779 (and (eq_attr "cpu" "bdver1,bdver2") 780 (and (eq_attr "type" "ssediv") 781 (and (eq_attr "mode" "SF,V4SF") 782 (eq_attr "memory" "load")))) 783 "bdver1-direct,bdver1-fpload,(bdver1-ffma0*17 | bdver1-ffma1*17)") 784(define_insn_reservation "bdver1_ssediv_single" 24 785 (and (eq_attr "cpu" "bdver1,bdver2") 786 (and (eq_attr "type" "ssediv") 787 (and (eq_attr "mode" "SF,V4SF") 788 (eq_attr "memory" "none")))) 789 "bdver1-direct,bdver1-fpsched,(bdver1-ffma0*17 | bdver1-ffma1*17)") 790 791(define_insn_reservation "bdver1_sseins" 3 792 (and (eq_attr "cpu" "bdver1,bdver2") 793 (and (eq_attr "type" "sseins") 794 (eq_attr "mode" "TI"))) 795 "bdver1-direct,bdver1-fpsched,bdver1-fxbar") 796 797