1;; Copyright (C) 2002, 2003, 2004, 2005, 2006, 2;; 2007 Free Software Foundation, Inc. 3;; 4;; This file is part of GCC. 5;; 6;; GCC is free software; you can redistribute it and/or modify 7;; it under the terms of the GNU General Public License as published by 8;; the Free Software Foundation; either version 3, or (at your option) 9;; any later version. 10;; 11;; GCC is distributed in the hope that it will be useful, 12;; but WITHOUT ANY WARRANTY; without even the implied warranty of 13;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14;; GNU General Public License for more details. 15;; 16;; You should have received a copy of the GNU General Public License 17;; along with GCC; see the file COPYING3. If not see 18;; <http://www.gnu.org/licenses/>. 19;; 20;; AMD Athlon Scheduling 21;; 22;; The Athlon does contain three pipelined FP units, three integer units and 23;; three address generation units. 24;; 25;; The predecode logic is determining boundaries of instructions in the 64 26;; byte cache line. So the cache line straddling problem of K6 might be issue 27;; here as well, but it is not noted in the documentation. 28;; 29;; Three DirectPath instructions decoders and only one VectorPath decoder 30;; is available. They can decode three DirectPath instructions or one VectorPath 31;; instruction per cycle. 32;; Decoded macro instructions are then passed to 72 entry instruction control 33;; unit, that passes 34;; it to the specialized integer (18 entry) and fp (36 entry) schedulers. 35;; 36;; The load/store queue unit is not attached to the schedulers but 37;; communicates with all the execution units separately instead. 38 39(define_attr "athlon_decode" "direct,vector,double" 40 (cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc,str,pop,leave") 41 (const_string "vector") 42 (and (eq_attr "type" "push") 43 (match_operand 1 "memory_operand" "")) 44 (const_string "vector") 45 (and (eq_attr "type" "fmov") 46 (and (eq_attr "memory" "load,store") 47 (eq_attr "mode" "XF"))) 48 (const_string "vector")] 49 (const_string "direct"))) 50 51(define_attr "amdfam10_decode" "direct,vector,double" 52 (const_string "direct")) 53;; 54;; decode0 decode1 decode2 55;; \ | / 56;; instruction control unit (72 entry scheduler) 57;; | | 58;; integer scheduler (18) stack map 59;; / | | | | \ stack rename 60;; ieu0 agu0 ieu1 agu1 ieu2 agu2 scheduler 61;; | agu0 | agu1 agu2 register file 62;; | \ | | / | | | 63;; \ /\ | / fadd fmul fstore 64;; \ / \ | / fadd fmul fstore 65;; imul load/store (2x) fadd fmul fstore 66 67(define_automaton "athlon,athlon_load,athlon_mult,athlon_fp") 68(define_cpu_unit "athlon-decode0" "athlon") 69(define_cpu_unit "athlon-decode1" "athlon") 70(define_cpu_unit "athlon-decode2" "athlon") 71(define_cpu_unit "athlon-decodev" "athlon") 72;; Model the fact that double decoded instruction may take 2 cycles 73;; to decode when decoder2 and decoder0 in next cycle 74;; is used (this is needed to allow troughput of 1.5 double decoded 75;; instructions per cycle). 76;; 77;; In order to avoid dependence between reservation of decoder 78;; and other units, we model decoder as two stage fully pipelined unit 79;; and only double decoded instruction may occupy unit in the first cycle. 80;; With this scheme however two double instructions can be issued cycle0. 81;; 82;; Avoid this by using presence set requiring decoder0 to be allocated 83;; too. Vector decoded instructions then can't be issued when 84;; modeled as consuming decoder0+decoder1+decoder2. 85;; We solve that by specialized vector decoder unit and exclusion set. 86(presence_set "athlon-decode2" "athlon-decode0") 87(exclusion_set "athlon-decodev" "athlon-decode0,athlon-decode1,athlon-decode2") 88(define_reservation "athlon-vector" "nothing,athlon-decodev") 89(define_reservation "athlon-direct0" "nothing,athlon-decode0") 90(define_reservation "athlon-direct" "nothing, 91 (athlon-decode0 | athlon-decode1 92 | athlon-decode2)") 93;; Double instructions behaves like two direct instructions. 94(define_reservation "athlon-double" "((athlon-decode2, athlon-decode0) 95 | (nothing,(athlon-decode0 + athlon-decode1)) 96 | (nothing,(athlon-decode1 + athlon-decode2)))") 97 98;; Agu and ieu unit results in extremely large automatons and 99;; in our approximation they are hardly filled in. Only ieu 100;; unit can, as issue rate is 3 and agu unit is always used 101;; first in the insn reservations. Skip the models. 102 103;(define_cpu_unit "athlon-ieu0" "athlon_ieu") 104;(define_cpu_unit "athlon-ieu1" "athlon_ieu") 105;(define_cpu_unit "athlon-ieu2" "athlon_ieu") 106;(define_reservation "athlon-ieu" "(athlon-ieu0 | athlon-ieu1 | athlon-ieu2)") 107(define_reservation "athlon-ieu" "nothing") 108(define_cpu_unit "athlon-ieu0" "athlon") 109;(define_cpu_unit "athlon-agu0" "athlon_agu") 110;(define_cpu_unit "athlon-agu1" "athlon_agu") 111;(define_cpu_unit "athlon-agu2" "athlon_agu") 112;(define_reservation "athlon-agu" "(athlon-agu0 | athlon-agu1 | athlon-agu2)") 113(define_reservation "athlon-agu" "nothing") 114 115(define_cpu_unit "athlon-mult" "athlon_mult") 116 117(define_cpu_unit "athlon-load0" "athlon_load") 118(define_cpu_unit "athlon-load1" "athlon_load") 119(define_reservation "athlon-load" "athlon-agu, 120 (athlon-load0 | athlon-load1),nothing") 121;; 128bit SSE instructions issue two loads at once 122(define_reservation "athlon-load2" "athlon-agu, 123 (athlon-load0 + athlon-load1),nothing") 124 125(define_reservation "athlon-store" "(athlon-load0 | athlon-load1)") 126;; 128bit SSE instructions issue two stores at once 127(define_reservation "athlon-store2" "(athlon-load0 + athlon-load1)") 128 129 130;; The FP operations start to execute at stage 12 in the pipeline, while 131;; integer operations start to execute at stage 9 for Athlon and 11 for K8 132;; Compensate the difference for Athlon because it results in significantly 133;; smaller automata. 134(define_reservation "athlon-fpsched" "nothing,nothing,nothing") 135;; The floating point loads. 136(define_reservation "athlon-fpload" "(athlon-fpsched + athlon-load)") 137(define_reservation "athlon-fpload2" "(athlon-fpsched + athlon-load2)") 138(define_reservation "athlon-fploadk8" "(athlon-fpsched + athlon-load)") 139(define_reservation "athlon-fpload2k8" "(athlon-fpsched + athlon-load2)") 140 141 142;; The three fp units are fully pipelined with latency of 3 143(define_cpu_unit "athlon-fadd" "athlon_fp") 144(define_cpu_unit "athlon-fmul" "athlon_fp") 145(define_cpu_unit "athlon-fstore" "athlon_fp") 146(define_reservation "athlon-fany" "(athlon-fstore | athlon-fmul | athlon-fadd)") 147(define_reservation "athlon-faddmul" "(athlon-fadd | athlon-fmul)") 148 149;; Vector operations usually consume many of pipes. 150(define_reservation "athlon-fvector" "(athlon-fadd + athlon-fmul + athlon-fstore)") 151 152 153;; Jump instructions are executed in the branch unit completely transparent to us 154(define_insn_reservation "athlon_branch" 0 155 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") 156 (eq_attr "type" "ibr")) 157 "athlon-direct,athlon-ieu") 158(define_insn_reservation "athlon_call" 0 159 (and (eq_attr "cpu" "athlon,k8,generic64") 160 (eq_attr "type" "call,callv")) 161 "athlon-vector,athlon-ieu") 162(define_insn_reservation "athlon_call_amdfam10" 0 163 (and (eq_attr "cpu" "amdfam10") 164 (eq_attr "type" "call,callv")) 165 "athlon-double,athlon-ieu") 166 167;; Latency of push operation is 3 cycles, but ESP value is available 168;; earlier 169(define_insn_reservation "athlon_push" 2 170 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") 171 (eq_attr "type" "push")) 172 "athlon-direct,athlon-agu,athlon-store") 173(define_insn_reservation "athlon_pop" 4 174 (and (eq_attr "cpu" "athlon,k8,generic64") 175 (eq_attr "type" "pop")) 176 "athlon-vector,athlon-load,athlon-ieu") 177(define_insn_reservation "athlon_pop_k8" 3 178 (and (eq_attr "cpu" "k8,generic64") 179 (eq_attr "type" "pop")) 180 "athlon-double,(athlon-ieu+athlon-load)") 181(define_insn_reservation "athlon_pop_amdfam10" 3 182 (and (eq_attr "cpu" "amdfam10") 183 (eq_attr "type" "pop")) 184 "athlon-direct,(athlon-ieu+athlon-load)") 185(define_insn_reservation "athlon_leave" 3 186 (and (eq_attr "cpu" "athlon") 187 (eq_attr "type" "leave")) 188 "athlon-vector,(athlon-ieu+athlon-load)") 189(define_insn_reservation "athlon_leave_k8" 3 190 (and (eq_attr "cpu" "k8,generic64,amdfam10") 191 (eq_attr "type" "leave")) 192 "athlon-double,(athlon-ieu+athlon-load)") 193 194;; Lea executes in AGU unit with 2 cycles latency. 195(define_insn_reservation "athlon_lea" 2 196 (and (eq_attr "cpu" "athlon,k8,generic64") 197 (eq_attr "type" "lea")) 198 "athlon-direct,athlon-agu,nothing") 199;; Lea executes in AGU unit with 1 cycle latency on AMDFAM10 200(define_insn_reservation "athlon_lea_amdfam10" 1 201 (and (eq_attr "cpu" "amdfam10") 202 (eq_attr "type" "lea")) 203 "athlon-direct,athlon-agu,nothing") 204 205;; Mul executes in special multiplier unit attached to IEU0 206(define_insn_reservation "athlon_imul" 5 207 (and (eq_attr "cpu" "athlon") 208 (and (eq_attr "type" "imul") 209 (eq_attr "memory" "none,unknown"))) 210 "athlon-vector,athlon-ieu0,athlon-mult,nothing,nothing,athlon-ieu0") 211;; ??? Widening multiply is vector or double. 212(define_insn_reservation "athlon_imul_k8_DI" 4 213 (and (eq_attr "cpu" "k8,generic64,amdfam10") 214 (and (eq_attr "type" "imul") 215 (and (eq_attr "mode" "DI") 216 (eq_attr "memory" "none,unknown")))) 217 "athlon-direct0,athlon-ieu0,athlon-mult,nothing,athlon-ieu0") 218(define_insn_reservation "athlon_imul_k8" 3 219 (and (eq_attr "cpu" "k8,generic64,amdfam10") 220 (and (eq_attr "type" "imul") 221 (eq_attr "memory" "none,unknown"))) 222 "athlon-direct0,athlon-ieu0,athlon-mult,athlon-ieu0") 223(define_insn_reservation "athlon_imul_amdfam10_HI" 4 224 (and (eq_attr "cpu" "amdfam10") 225 (and (eq_attr "type" "imul") 226 (and (eq_attr "mode" "HI") 227 (eq_attr "memory" "none,unknown")))) 228 "athlon-vector,athlon-ieu0,athlon-mult,nothing,athlon-ieu0") 229(define_insn_reservation "athlon_imul_mem" 8 230 (and (eq_attr "cpu" "athlon") 231 (and (eq_attr "type" "imul") 232 (eq_attr "memory" "load,both"))) 233 "athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,nothing,athlon-ieu") 234(define_insn_reservation "athlon_imul_mem_k8_DI" 7 235 (and (eq_attr "cpu" "k8,generic64,amdfam10") 236 (and (eq_attr "type" "imul") 237 (and (eq_attr "mode" "DI") 238 (eq_attr "memory" "load,both")))) 239 "athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,athlon-ieu") 240(define_insn_reservation "athlon_imul_mem_k8" 6 241 (and (eq_attr "cpu" "k8,generic64,amdfam10") 242 (and (eq_attr "type" "imul") 243 (eq_attr "memory" "load,both"))) 244 "athlon-vector,athlon-load,athlon-ieu,athlon-mult,athlon-ieu") 245 246;; Idiv cannot execute in parallel with other instructions. Dealing with it 247;; as with short latency vector instruction is good approximation avoiding 248;; scheduler from trying too hard to can hide it's latency by overlap with 249;; other instructions. 250;; ??? Experiments show that the idiv can overlap with roughly 6 cycles 251;; of the other code 252;; Using the same heuristics for amdfam10 as K8 with idiv 253 254(define_insn_reservation "athlon_idiv" 6 255 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") 256 (and (eq_attr "type" "idiv") 257 (eq_attr "memory" "none,unknown"))) 258 "athlon-vector,(athlon-ieu0*6+(athlon-fpsched,athlon-fvector))") 259(define_insn_reservation "athlon_idiv_mem" 9 260 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") 261 (and (eq_attr "type" "idiv") 262 (eq_attr "memory" "load,both"))) 263 "athlon-vector,((athlon-load,athlon-ieu0*6)+(athlon-fpsched,athlon-fvector))") 264;; The parallelism of string instructions is not documented. Model it same way 265;; as idiv to create smaller automata. This probably does not matter much. 266;; Using the same heuristics for amdfam10 as K8 with idiv 267(define_insn_reservation "athlon_str" 6 268 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") 269 (and (eq_attr "type" "str") 270 (eq_attr "memory" "load,both,store"))) 271 "athlon-vector,athlon-load,athlon-ieu0*6") 272 273(define_insn_reservation "athlon_idirect" 1 274 (and (eq_attr "cpu" "athlon,k8,generic64") 275 (and (eq_attr "athlon_decode" "direct") 276 (and (eq_attr "unit" "integer,unknown") 277 (eq_attr "memory" "none,unknown")))) 278 "athlon-direct,athlon-ieu") 279(define_insn_reservation "athlon_idirect_amdfam10" 1 280 (and (eq_attr "cpu" "amdfam10") 281 (and (eq_attr "amdfam10_decode" "direct") 282 (and (eq_attr "unit" "integer,unknown") 283 (eq_attr "memory" "none,unknown")))) 284 "athlon-direct,athlon-ieu") 285(define_insn_reservation "athlon_ivector" 2 286 (and (eq_attr "cpu" "athlon,k8,generic64") 287 (and (eq_attr "athlon_decode" "vector") 288 (and (eq_attr "unit" "integer,unknown") 289 (eq_attr "memory" "none,unknown")))) 290 "athlon-vector,athlon-ieu,athlon-ieu") 291(define_insn_reservation "athlon_ivector_amdfam10" 2 292 (and (eq_attr "cpu" "amdfam10") 293 (and (eq_attr "amdfam10_decode" "vector") 294 (and (eq_attr "unit" "integer,unknown") 295 (eq_attr "memory" "none,unknown")))) 296 "athlon-vector,athlon-ieu,athlon-ieu") 297 298(define_insn_reservation "athlon_idirect_loadmov" 3 299 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") 300 (and (eq_attr "type" "imov") 301 (eq_attr "memory" "load"))) 302 "athlon-direct,athlon-load") 303 304(define_insn_reservation "athlon_idirect_load" 4 305 (and (eq_attr "cpu" "athlon,k8,generic64") 306 (and (eq_attr "athlon_decode" "direct") 307 (and (eq_attr "unit" "integer,unknown") 308 (eq_attr "memory" "load")))) 309 "athlon-direct,athlon-load,athlon-ieu") 310(define_insn_reservation "athlon_idirect_load_amdfam10" 4 311 (and (eq_attr "cpu" "amdfam10") 312 (and (eq_attr "amdfam10_decode" "direct") 313 (and (eq_attr "unit" "integer,unknown") 314 (eq_attr "memory" "load")))) 315 "athlon-direct,athlon-load,athlon-ieu") 316(define_insn_reservation "athlon_ivector_load" 6 317 (and (eq_attr "cpu" "athlon,k8,generic64") 318 (and (eq_attr "athlon_decode" "vector") 319 (and (eq_attr "unit" "integer,unknown") 320 (eq_attr "memory" "load")))) 321 "athlon-vector,athlon-load,athlon-ieu,athlon-ieu") 322(define_insn_reservation "athlon_ivector_load_amdfam10" 6 323 (and (eq_attr "cpu" "amdfam10") 324 (and (eq_attr "amdfam10_decode" "vector") 325 (and (eq_attr "unit" "integer,unknown") 326 (eq_attr "memory" "load")))) 327 "athlon-vector,athlon-load,athlon-ieu,athlon-ieu") 328 329(define_insn_reservation "athlon_idirect_movstore" 1 330 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") 331 (and (eq_attr "type" "imov") 332 (eq_attr "memory" "store"))) 333 "athlon-direct,athlon-agu,athlon-store") 334 335(define_insn_reservation "athlon_idirect_both" 4 336 (and (eq_attr "cpu" "athlon,k8,generic64") 337 (and (eq_attr "athlon_decode" "direct") 338 (and (eq_attr "unit" "integer,unknown") 339 (eq_attr "memory" "both")))) 340 "athlon-direct,athlon-load, 341 athlon-ieu,athlon-store, 342 athlon-store") 343(define_insn_reservation "athlon_idirect_both_amdfam10" 4 344 (and (eq_attr "cpu" "amdfam10") 345 (and (eq_attr "amdfam10_decode" "direct") 346 (and (eq_attr "unit" "integer,unknown") 347 (eq_attr "memory" "both")))) 348 "athlon-direct,athlon-load, 349 athlon-ieu,athlon-store, 350 athlon-store") 351 352(define_insn_reservation "athlon_ivector_both" 6 353 (and (eq_attr "cpu" "athlon,k8,generic64") 354 (and (eq_attr "athlon_decode" "vector") 355 (and (eq_attr "unit" "integer,unknown") 356 (eq_attr "memory" "both")))) 357 "athlon-vector,athlon-load, 358 athlon-ieu, 359 athlon-ieu, 360 athlon-store") 361(define_insn_reservation "athlon_ivector_both_amdfam10" 6 362 (and (eq_attr "cpu" "amdfam10") 363 (and (eq_attr "amdfam10_decode" "vector") 364 (and (eq_attr "unit" "integer,unknown") 365 (eq_attr "memory" "both")))) 366 "athlon-vector,athlon-load, 367 athlon-ieu, 368 athlon-ieu, 369 athlon-store") 370 371(define_insn_reservation "athlon_idirect_store" 1 372 (and (eq_attr "cpu" "athlon,k8,generic64") 373 (and (eq_attr "athlon_decode" "direct") 374 (and (eq_attr "unit" "integer,unknown") 375 (eq_attr "memory" "store")))) 376 "athlon-direct,(athlon-ieu+athlon-agu), 377 athlon-store") 378(define_insn_reservation "athlon_idirect_store_amdfam10" 1 379 (and (eq_attr "cpu" "amdfam10") 380 (and (eq_attr "amdfam10_decode" "direct") 381 (and (eq_attr "unit" "integer,unknown") 382 (eq_attr "memory" "store")))) 383 "athlon-direct,(athlon-ieu+athlon-agu), 384 athlon-store") 385 386(define_insn_reservation "athlon_ivector_store" 2 387 (and (eq_attr "cpu" "athlon,k8,generic64") 388 (and (eq_attr "athlon_decode" "vector") 389 (and (eq_attr "unit" "integer,unknown") 390 (eq_attr "memory" "store")))) 391 "athlon-vector,(athlon-ieu+athlon-agu),athlon-ieu, 392 athlon-store") 393(define_insn_reservation "athlon_ivector_store_amdfam10" 2 394 (and (eq_attr "cpu" "amdfam10") 395 (and (eq_attr "amdfam10_decode" "vector") 396 (and (eq_attr "unit" "integer,unknown") 397 (eq_attr "memory" "store")))) 398 "athlon-vector,(athlon-ieu+athlon-agu),athlon-ieu, 399 athlon-store") 400 401;; Athlon floatin point unit 402(define_insn_reservation "athlon_fldxf" 12 403 (and (eq_attr "cpu" "athlon") 404 (and (eq_attr "type" "fmov") 405 (and (eq_attr "memory" "load") 406 (eq_attr "mode" "XF")))) 407 "athlon-vector,athlon-fpload2,athlon-fvector*9") 408(define_insn_reservation "athlon_fldxf_k8" 13 409 (and (eq_attr "cpu" "k8,generic64,amdfam10") 410 (and (eq_attr "type" "fmov") 411 (and (eq_attr "memory" "load") 412 (eq_attr "mode" "XF")))) 413 "athlon-vector,athlon-fpload2k8,athlon-fvector*9") 414;; Assume superforwarding to take place so effective latency of fany op is 0. 415(define_insn_reservation "athlon_fld" 0 416 (and (eq_attr "cpu" "athlon") 417 (and (eq_attr "type" "fmov") 418 (eq_attr "memory" "load"))) 419 "athlon-direct,athlon-fpload,athlon-fany") 420(define_insn_reservation "athlon_fld_k8" 2 421 (and (eq_attr "cpu" "k8,generic64,amdfam10") 422 (and (eq_attr "type" "fmov") 423 (eq_attr "memory" "load"))) 424 "athlon-direct,athlon-fploadk8,athlon-fstore") 425 426(define_insn_reservation "athlon_fstxf" 10 427 (and (eq_attr "cpu" "athlon") 428 (and (eq_attr "type" "fmov") 429 (and (eq_attr "memory" "store,both") 430 (eq_attr "mode" "XF")))) 431 "athlon-vector,(athlon-fpsched+athlon-agu),(athlon-store2+(athlon-fvector*7))") 432(define_insn_reservation "athlon_fstxf_k8" 8 433 (and (eq_attr "cpu" "k8,generic64,amdfam10") 434 (and (eq_attr "type" "fmov") 435 (and (eq_attr "memory" "store,both") 436 (eq_attr "mode" "XF")))) 437 "athlon-vector,(athlon-fpsched+athlon-agu),(athlon-store2+(athlon-fvector*6))") 438(define_insn_reservation "athlon_fst" 4 439 (and (eq_attr "cpu" "athlon") 440 (and (eq_attr "type" "fmov") 441 (eq_attr "memory" "store,both"))) 442 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)") 443(define_insn_reservation "athlon_fst_k8" 2 444 (and (eq_attr "cpu" "k8,generic64,amdfam10") 445 (and (eq_attr "type" "fmov") 446 (eq_attr "memory" "store,both"))) 447 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)") 448(define_insn_reservation "athlon_fist" 4 449 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") 450 (eq_attr "type" "fistp,fisttp")) 451 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)") 452(define_insn_reservation "athlon_fmov" 2 453 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") 454 (eq_attr "type" "fmov")) 455 "athlon-direct,athlon-fpsched,athlon-faddmul") 456(define_insn_reservation "athlon_fadd_load" 4 457 (and (eq_attr "cpu" "athlon") 458 (and (eq_attr "type" "fop") 459 (eq_attr "memory" "load"))) 460 "athlon-direct,athlon-fpload,athlon-fadd") 461(define_insn_reservation "athlon_fadd_load_k8" 6 462 (and (eq_attr "cpu" "k8,generic64,amdfam10") 463 (and (eq_attr "type" "fop") 464 (eq_attr "memory" "load"))) 465 "athlon-direct,athlon-fploadk8,athlon-fadd") 466(define_insn_reservation "athlon_fadd" 4 467 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") 468 (eq_attr "type" "fop")) 469 "athlon-direct,athlon-fpsched,athlon-fadd") 470(define_insn_reservation "athlon_fmul_load" 4 471 (and (eq_attr "cpu" "athlon") 472 (and (eq_attr "type" "fmul") 473 (eq_attr "memory" "load"))) 474 "athlon-direct,athlon-fpload,athlon-fmul") 475(define_insn_reservation "athlon_fmul_load_k8" 6 476 (and (eq_attr "cpu" "k8,generic64,amdfam10") 477 (and (eq_attr "type" "fmul") 478 (eq_attr "memory" "load"))) 479 "athlon-direct,athlon-fploadk8,athlon-fmul") 480(define_insn_reservation "athlon_fmul" 4 481 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") 482 (eq_attr "type" "fmul")) 483 "athlon-direct,athlon-fpsched,athlon-fmul") 484(define_insn_reservation "athlon_fsgn" 2 485 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") 486 (eq_attr "type" "fsgn")) 487 "athlon-direct,athlon-fpsched,athlon-fmul") 488(define_insn_reservation "athlon_fdiv_load" 24 489 (and (eq_attr "cpu" "athlon") 490 (and (eq_attr "type" "fdiv") 491 (eq_attr "memory" "load"))) 492 "athlon-direct,athlon-fpload,athlon-fmul") 493(define_insn_reservation "athlon_fdiv_load_k8" 13 494 (and (eq_attr "cpu" "k8,generic64,amdfam10") 495 (and (eq_attr "type" "fdiv") 496 (eq_attr "memory" "load"))) 497 "athlon-direct,athlon-fploadk8,athlon-fmul") 498(define_insn_reservation "athlon_fdiv" 24 499 (and (eq_attr "cpu" "athlon") 500 (eq_attr "type" "fdiv")) 501 "athlon-direct,athlon-fpsched,athlon-fmul") 502(define_insn_reservation "athlon_fdiv_k8" 11 503 (and (eq_attr "cpu" "k8,generic64,amdfam10") 504 (eq_attr "type" "fdiv")) 505 "athlon-direct,athlon-fpsched,athlon-fmul") 506(define_insn_reservation "athlon_fpspc_load" 103 507 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") 508 (and (eq_attr "type" "fpspc") 509 (eq_attr "memory" "load"))) 510 "athlon-vector,athlon-fpload,athlon-fvector") 511(define_insn_reservation "athlon_fpspc" 100 512 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") 513 (eq_attr "type" "fpspc")) 514 "athlon-vector,athlon-fpsched,athlon-fvector") 515(define_insn_reservation "athlon_fcmov_load" 7 516 (and (eq_attr "cpu" "athlon") 517 (and (eq_attr "type" "fcmov") 518 (eq_attr "memory" "load"))) 519 "athlon-vector,athlon-fpload,athlon-fvector") 520(define_insn_reservation "athlon_fcmov" 7 521 (and (eq_attr "cpu" "athlon") 522 (eq_attr "type" "fcmov")) 523 "athlon-vector,athlon-fpsched,athlon-fvector") 524(define_insn_reservation "athlon_fcmov_load_k8" 17 525 (and (eq_attr "cpu" "k8,generic64,amdfam10") 526 (and (eq_attr "type" "fcmov") 527 (eq_attr "memory" "load"))) 528 "athlon-vector,athlon-fploadk8,athlon-fvector") 529(define_insn_reservation "athlon_fcmov_k8" 15 530 (and (eq_attr "cpu" "k8,generic64,amdfam10") 531 (eq_attr "type" "fcmov")) 532 "athlon-vector,athlon-fpsched,athlon-fvector") 533;; fcomi is vector decoded by uses only one pipe. 534(define_insn_reservation "athlon_fcomi_load" 3 535 (and (eq_attr "cpu" "athlon") 536 (and (eq_attr "type" "fcmp") 537 (and (eq_attr "athlon_decode" "vector") 538 (eq_attr "memory" "load")))) 539 "athlon-vector,athlon-fpload,athlon-fadd") 540(define_insn_reservation "athlon_fcomi_load_k8" 5 541 (and (eq_attr "cpu" "k8,generic64,amdfam10") 542 (and (eq_attr "type" "fcmp") 543 (and (eq_attr "athlon_decode" "vector") 544 (eq_attr "memory" "load")))) 545 "athlon-vector,athlon-fploadk8,athlon-fadd") 546(define_insn_reservation "athlon_fcomi" 3 547 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") 548 (and (eq_attr "athlon_decode" "vector") 549 (eq_attr "type" "fcmp"))) 550 "athlon-vector,athlon-fpsched,athlon-fadd") 551(define_insn_reservation "athlon_fcom_load" 2 552 (and (eq_attr "cpu" "athlon") 553 (and (eq_attr "type" "fcmp") 554 (eq_attr "memory" "load"))) 555 "athlon-direct,athlon-fpload,athlon-fadd") 556(define_insn_reservation "athlon_fcom_load_k8" 4 557 (and (eq_attr "cpu" "k8,generic64,amdfam10") 558 (and (eq_attr "type" "fcmp") 559 (eq_attr "memory" "load"))) 560 "athlon-direct,athlon-fploadk8,athlon-fadd") 561(define_insn_reservation "athlon_fcom" 2 562 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") 563 (eq_attr "type" "fcmp")) 564 "athlon-direct,athlon-fpsched,athlon-fadd") 565;; Never seen by the scheduler because we still don't do post reg-stack 566;; scheduling. 567;(define_insn_reservation "athlon_fxch" 2 568; (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") 569; (eq_attr "type" "fxch")) 570; "athlon-direct,athlon-fpsched,athlon-fany") 571 572;; Athlon handle MMX operations in the FPU unit with shorter latencies 573 574(define_insn_reservation "athlon_movlpd_load" 0 575 (and (eq_attr "cpu" "athlon") 576 (and (eq_attr "type" "ssemov") 577 (match_operand:DF 1 "memory_operand" ""))) 578 "athlon-direct,athlon-fpload,athlon-fany") 579(define_insn_reservation "athlon_movlpd_load_k8" 2 580 (and (eq_attr "cpu" "k8") 581 (and (eq_attr "type" "ssemov") 582 (match_operand:DF 1 "memory_operand" ""))) 583 "athlon-direct,athlon-fploadk8,athlon-fstore") 584(define_insn_reservation "athlon_movsd_load_generic64" 2 585 (and (eq_attr "cpu" "generic64") 586 (and (eq_attr "type" "ssemov") 587 (match_operand:DF 1 "memory_operand" ""))) 588 "athlon-double,athlon-fploadk8,(athlon-fstore+athlon-fmul)") 589(define_insn_reservation "athlon_movaps_load_k8" 2 590 (and (eq_attr "cpu" "k8,generic64") 591 (and (eq_attr "type" "ssemov") 592 (and (eq_attr "mode" "V4SF,V2DF,TI") 593 (eq_attr "memory" "load")))) 594 "athlon-double,athlon-fpload2k8,athlon-fstore,athlon-fstore") 595(define_insn_reservation "athlon_movaps_load" 0 596 (and (eq_attr "cpu" "athlon") 597 (and (eq_attr "type" "ssemov") 598 (and (eq_attr "mode" "V4SF,V2DF,TI") 599 (eq_attr "memory" "load")))) 600 "athlon-vector,athlon-fpload2,(athlon-fany+athlon-fany)") 601(define_insn_reservation "athlon_movss_load" 1 602 (and (eq_attr "cpu" "athlon") 603 (and (eq_attr "type" "ssemov") 604 (and (eq_attr "mode" "SF,DI") 605 (eq_attr "memory" "load")))) 606 "athlon-vector,athlon-fpload,(athlon-fany*2)") 607(define_insn_reservation "athlon_movss_load_k8" 1 608 (and (eq_attr "cpu" "k8,generic64") 609 (and (eq_attr "type" "ssemov") 610 (and (eq_attr "mode" "SF,DI") 611 (eq_attr "memory" "load")))) 612 "athlon-double,athlon-fploadk8,(athlon-fstore+athlon-fany)") 613(define_insn_reservation "athlon_mmxsseld" 0 614 (and (eq_attr "cpu" "athlon") 615 (and (eq_attr "type" "mmxmov,ssemov") 616 (eq_attr "memory" "load"))) 617 "athlon-direct,athlon-fpload,athlon-fany") 618(define_insn_reservation "athlon_mmxsseld_k8" 2 619 (and (eq_attr "cpu" "k8,generic64") 620 (and (eq_attr "type" "mmxmov,ssemov") 621 (eq_attr "memory" "load"))) 622 "athlon-direct,athlon-fploadk8,athlon-fstore") 623;; On AMDFAM10 all double, single and integer packed and scalar SSEx data 624;; loads generated are direct path, latency of 2 and do not use any FP 625;; executions units. No separate entries for movlpx/movhpx loads, which 626;; are direct path, latency of 4 and use the FADD/FMUL FP execution units, 627;; as they will not be generated. 628(define_insn_reservation "athlon_sseld_amdfam10" 2 629 (and (eq_attr "cpu" "amdfam10") 630 (and (eq_attr "type" "ssemov") 631 (eq_attr "memory" "load"))) 632 "athlon-direct,athlon-fploadk8") 633;; On AMDFAM10 MMX data loads generated are direct path, latency of 4 634;; and can use any FP executions units 635(define_insn_reservation "athlon_mmxld_amdfam10" 4 636 (and (eq_attr "cpu" "amdfam10") 637 (and (eq_attr "type" "mmxmov") 638 (eq_attr "memory" "load"))) 639 "athlon-direct,athlon-fploadk8, athlon-fany") 640(define_insn_reservation "athlon_mmxssest" 3 641 (and (eq_attr "cpu" "k8,generic64") 642 (and (eq_attr "type" "mmxmov,ssemov") 643 (and (eq_attr "mode" "V4SF,V2DF,TI") 644 (eq_attr "memory" "store,both")))) 645 "athlon-vector,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store2)*2)") 646(define_insn_reservation "athlon_mmxssest_k8" 3 647 (and (eq_attr "cpu" "k8,generic64") 648 (and (eq_attr "type" "mmxmov,ssemov") 649 (and (eq_attr "mode" "V4SF,V2DF,TI") 650 (eq_attr "memory" "store,both")))) 651 "athlon-double,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store2)*2)") 652(define_insn_reservation "athlon_mmxssest_short" 2 653 (and (eq_attr "cpu" "athlon,k8,generic64") 654 (and (eq_attr "type" "mmxmov,ssemov") 655 (eq_attr "memory" "store,both"))) 656 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)") 657;; On AMDFAM10 all double, single and integer packed SSEx data stores 658;; generated are all double path, latency of 2 and use the FSTORE FP 659;; execution unit. No entries separate for movupx/movdqu, which are 660;; vector path, latency of 3 and use the FSTORE*2 FP execution unit, 661;; as they will not be generated. 662(define_insn_reservation "athlon_ssest_amdfam10" 2 663 (and (eq_attr "cpu" "amdfam10") 664 (and (eq_attr "type" "ssemov") 665 (and (eq_attr "mode" "V4SF,V2DF,TI") 666 (eq_attr "memory" "store,both")))) 667 "athlon-double,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store)*2)") 668;; On AMDFAM10 all double, single and integer scalar SSEx and MMX 669;; data stores generated are all direct path, latency of 2 and use 670;; the FSTORE FP execution unit 671(define_insn_reservation "athlon_mmxssest_short_amdfam10" 2 672 (and (eq_attr "cpu" "amdfam10") 673 (and (eq_attr "type" "mmxmov,ssemov") 674 (eq_attr "memory" "store,both"))) 675 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)") 676(define_insn_reservation "athlon_movaps_k8" 2 677 (and (eq_attr "cpu" "k8,generic64") 678 (and (eq_attr "type" "ssemov") 679 (eq_attr "mode" "V4SF,V2DF,TI"))) 680 "athlon-double,athlon-fpsched,((athlon-faddmul+athlon-faddmul) | (athlon-faddmul, athlon-faddmul))") 681(define_insn_reservation "athlon_movaps" 2 682 (and (eq_attr "cpu" "athlon") 683 (and (eq_attr "type" "ssemov") 684 (eq_attr "mode" "V4SF,V2DF,TI"))) 685 "athlon-vector,athlon-fpsched,(athlon-faddmul+athlon-faddmul)") 686(define_insn_reservation "athlon_mmxssemov" 2 687 (and (eq_attr "cpu" "athlon,k8,generic64") 688 (eq_attr "type" "mmxmov,ssemov")) 689 "athlon-direct,athlon-fpsched,athlon-faddmul") 690(define_insn_reservation "athlon_mmxmul_load" 4 691 (and (eq_attr "cpu" "athlon,k8,generic64") 692 (and (eq_attr "type" "mmxmul") 693 (eq_attr "memory" "load"))) 694 "athlon-direct,athlon-fpload,athlon-fmul") 695(define_insn_reservation "athlon_mmxmul" 3 696 (and (eq_attr "cpu" "athlon,k8,generic64") 697 (eq_attr "type" "mmxmul")) 698 "athlon-direct,athlon-fpsched,athlon-fmul") 699(define_insn_reservation "athlon_mmx_load" 3 700 (and (eq_attr "cpu" "athlon,k8,generic64") 701 (and (eq_attr "unit" "mmx") 702 (eq_attr "memory" "load"))) 703 "athlon-direct,athlon-fpload,athlon-faddmul") 704(define_insn_reservation "athlon_mmx" 2 705 (and (eq_attr "cpu" "athlon,k8,generic64") 706 (eq_attr "unit" "mmx")) 707 "athlon-direct,athlon-fpsched,athlon-faddmul") 708;; SSE operations are handled by the i387 unit as well. The latency 709;; is same as for i387 operations for scalar operations 710 711(define_insn_reservation "athlon_sselog_load" 3 712 (and (eq_attr "cpu" "athlon") 713 (and (eq_attr "type" "sselog,sselog1") 714 (eq_attr "memory" "load"))) 715 "athlon-vector,athlon-fpload2,(athlon-fmul*2)") 716(define_insn_reservation "athlon_sselog_load_k8" 5 717 (and (eq_attr "cpu" "k8,generic64") 718 (and (eq_attr "type" "sselog,sselog1") 719 (eq_attr "memory" "load"))) 720 "athlon-double,athlon-fpload2k8,(athlon-fmul*2)") 721(define_insn_reservation "athlon_sselog_load_amdfam10" 4 722 (and (eq_attr "cpu" "amdfam10") 723 (and (eq_attr "type" "sselog,sselog1") 724 (eq_attr "memory" "load"))) 725 "athlon-direct,athlon-fploadk8,(athlon-fadd|athlon-fmul)") 726(define_insn_reservation "athlon_sselog" 3 727 (and (eq_attr "cpu" "athlon") 728 (eq_attr "type" "sselog,sselog1")) 729 "athlon-vector,athlon-fpsched,athlon-fmul*2") 730(define_insn_reservation "athlon_sselog_k8" 3 731 (and (eq_attr "cpu" "k8,generic64") 732 (eq_attr "type" "sselog,sselog1")) 733 "athlon-double,athlon-fpsched,athlon-fmul") 734(define_insn_reservation "athlon_sselog_amdfam10" 2 735 (and (eq_attr "cpu" "amdfam10") 736 (eq_attr "type" "sselog,sselog1")) 737 "athlon-direct,athlon-fpsched,(athlon-fadd|athlon-fmul)") 738 739;; ??? pcmp executes in addmul, probably not worthwhile to bother about that. 740(define_insn_reservation "athlon_ssecmp_load" 2 741 (and (eq_attr "cpu" "athlon") 742 (and (eq_attr "type" "ssecmp") 743 (and (eq_attr "mode" "SF,DF,DI") 744 (eq_attr "memory" "load")))) 745 "athlon-direct,athlon-fpload,athlon-fadd") 746(define_insn_reservation "athlon_ssecmp_load_k8" 4 747 (and (eq_attr "cpu" "k8,generic64,amdfam10") 748 (and (eq_attr "type" "ssecmp") 749 (and (eq_attr "mode" "SF,DF,DI,TI") 750 (eq_attr "memory" "load")))) 751 "athlon-direct,athlon-fploadk8,athlon-fadd") 752(define_insn_reservation "athlon_ssecmp" 2 753 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") 754 (and (eq_attr "type" "ssecmp") 755 (eq_attr "mode" "SF,DF,DI,TI"))) 756 "athlon-direct,athlon-fpsched,athlon-fadd") 757(define_insn_reservation "athlon_ssecmpvector_load" 3 758 (and (eq_attr "cpu" "athlon") 759 (and (eq_attr "type" "ssecmp") 760 (eq_attr "memory" "load"))) 761 "athlon-vector,athlon-fpload2,(athlon-fadd*2)") 762(define_insn_reservation "athlon_ssecmpvector_load_k8" 5 763 (and (eq_attr "cpu" "k8,generic64") 764 (and (eq_attr "type" "ssecmp") 765 (eq_attr "memory" "load"))) 766 "athlon-double,athlon-fpload2k8,(athlon-fadd*2)") 767(define_insn_reservation "athlon_ssecmpvector_load_amdfam10" 4 768 (and (eq_attr "cpu" "amdfam10") 769 (and (eq_attr "type" "ssecmp") 770 (eq_attr "memory" "load"))) 771 "athlon-direct,athlon-fploadk8,athlon-fadd") 772(define_insn_reservation "athlon_ssecmpvector" 3 773 (and (eq_attr "cpu" "athlon") 774 (eq_attr "type" "ssecmp")) 775 "athlon-vector,athlon-fpsched,(athlon-fadd*2)") 776(define_insn_reservation "athlon_ssecmpvector_k8" 3 777 (and (eq_attr "cpu" "k8,generic64") 778 (eq_attr "type" "ssecmp")) 779 "athlon-double,athlon-fpsched,(athlon-fadd*2)") 780(define_insn_reservation "athlon_ssecmpvector_amdfam10" 2 781 (and (eq_attr "cpu" "amdfam10") 782 (eq_attr "type" "ssecmp")) 783 "athlon-direct,athlon-fpsched,athlon-fadd") 784(define_insn_reservation "athlon_ssecomi_load" 4 785 (and (eq_attr "cpu" "athlon") 786 (and (eq_attr "type" "ssecomi") 787 (eq_attr "memory" "load"))) 788 "athlon-vector,athlon-fpload,athlon-fadd") 789(define_insn_reservation "athlon_ssecomi_load_k8" 6 790 (and (eq_attr "cpu" "k8,generic64") 791 (and (eq_attr "type" "ssecomi") 792 (eq_attr "memory" "load"))) 793 "athlon-vector,athlon-fploadk8,athlon-fadd") 794(define_insn_reservation "athlon_ssecomi_load_amdfam10" 5 795 (and (eq_attr "cpu" "amdfam10") 796 (and (eq_attr "type" "ssecomi") 797 (eq_attr "memory" "load"))) 798 "athlon-direct,athlon-fploadk8,athlon-fadd") 799(define_insn_reservation "athlon_ssecomi" 4 800 (and (eq_attr "cpu" "athlon,k8,generic64") 801 (eq_attr "type" "ssecomi")) 802 "athlon-vector,athlon-fpsched,athlon-fadd") 803(define_insn_reservation "athlon_ssecomi_amdfam10" 3 804 (and (eq_attr "cpu" "amdfam10") 805;; It seems athlon_ssecomi has a bug in the attr_type, fixed for amdfam10 806 (eq_attr "type" "ssecomi")) 807 "athlon-direct,athlon-fpsched,athlon-fadd") 808(define_insn_reservation "athlon_sseadd_load" 4 809 (and (eq_attr "cpu" "athlon") 810 (and (eq_attr "type" "sseadd") 811 (and (eq_attr "mode" "SF,DF,DI") 812 (eq_attr "memory" "load")))) 813 "athlon-direct,athlon-fpload,athlon-fadd") 814(define_insn_reservation "athlon_sseadd_load_k8" 6 815 (and (eq_attr "cpu" "k8,generic64,amdfam10") 816 (and (eq_attr "type" "sseadd") 817 (and (eq_attr "mode" "SF,DF,DI") 818 (eq_attr "memory" "load")))) 819 "athlon-direct,athlon-fploadk8,athlon-fadd") 820(define_insn_reservation "athlon_sseadd" 4 821 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") 822 (and (eq_attr "type" "sseadd") 823 (eq_attr "mode" "SF,DF,DI"))) 824 "athlon-direct,athlon-fpsched,athlon-fadd") 825(define_insn_reservation "athlon_sseaddvector_load" 5 826 (and (eq_attr "cpu" "athlon") 827 (and (eq_attr "type" "sseadd") 828 (eq_attr "memory" "load"))) 829 "athlon-vector,athlon-fpload2,(athlon-fadd*2)") 830(define_insn_reservation "athlon_sseaddvector_load_k8" 7 831 (and (eq_attr "cpu" "k8,generic64") 832 (and (eq_attr "type" "sseadd") 833 (eq_attr "memory" "load"))) 834 "athlon-double,athlon-fpload2k8,(athlon-fadd*2)") 835(define_insn_reservation "athlon_sseaddvector_load_amdfam10" 6 836 (and (eq_attr "cpu" "amdfam10") 837 (and (eq_attr "type" "sseadd") 838 (eq_attr "memory" "load"))) 839 "athlon-direct,athlon-fploadk8,athlon-fadd") 840(define_insn_reservation "athlon_sseaddvector" 5 841 (and (eq_attr "cpu" "athlon") 842 (eq_attr "type" "sseadd")) 843 "athlon-vector,athlon-fpsched,(athlon-fadd*2)") 844(define_insn_reservation "athlon_sseaddvector_k8" 5 845 (and (eq_attr "cpu" "k8,generic64") 846 (eq_attr "type" "sseadd")) 847 "athlon-double,athlon-fpsched,(athlon-fadd*2)") 848(define_insn_reservation "athlon_sseaddvector_amdfam10" 4 849 (and (eq_attr "cpu" "amdfam10") 850 (eq_attr "type" "sseadd")) 851 "athlon-direct,athlon-fpsched,athlon-fadd") 852 853;; Conversions behaves very irregularly and the scheduling is critical here. 854;; Take each instruction separately. Assume that the mode is always set to the 855;; destination one and athlon_decode is set to the K8 versions. 856 857;; cvtss2sd 858(define_insn_reservation "athlon_ssecvt_cvtss2sd_load_k8" 4 859 (and (eq_attr "cpu" "k8,athlon,generic64") 860 (and (eq_attr "type" "ssecvt") 861 (and (eq_attr "athlon_decode" "direct") 862 (and (eq_attr "mode" "DF") 863 (eq_attr "memory" "load"))))) 864 "athlon-direct,athlon-fploadk8,athlon-fstore") 865(define_insn_reservation "athlon_ssecvt_cvtss2sd_load_amdfam10" 7 866 (and (eq_attr "cpu" "amdfam10") 867 (and (eq_attr "type" "ssecvt") 868 (and (eq_attr "amdfam10_decode" "double") 869 (and (eq_attr "mode" "DF") 870 (eq_attr "memory" "load"))))) 871 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)") 872(define_insn_reservation "athlon_ssecvt_cvtss2sd" 2 873 (and (eq_attr "cpu" "athlon,k8,generic64") 874 (and (eq_attr "type" "ssecvt") 875 (and (eq_attr "athlon_decode" "direct") 876 (eq_attr "mode" "DF")))) 877 "athlon-direct,athlon-fpsched,athlon-fstore") 878(define_insn_reservation "athlon_ssecvt_cvtss2sd_amdfam10" 7 879 (and (eq_attr "cpu" "amdfam10") 880 (and (eq_attr "type" "ssecvt") 881 (and (eq_attr "amdfam10_decode" "vector") 882 (eq_attr "mode" "DF")))) 883 "athlon-vector,athlon-fpsched,athlon-faddmul,(athlon-fstore*2)") 884;; cvtps2pd. Model same way the other double decoded FP conversions. 885(define_insn_reservation "athlon_ssecvt_cvtps2pd_load_k8" 5 886 (and (eq_attr "cpu" "k8,athlon,generic64") 887 (and (eq_attr "type" "ssecvt") 888 (and (eq_attr "athlon_decode" "double") 889 (and (eq_attr "mode" "V2DF,V4SF,TI") 890 (eq_attr "memory" "load"))))) 891 "athlon-double,athlon-fpload2k8,(athlon-fstore*2)") 892(define_insn_reservation "athlon_ssecvt_cvtps2pd_load_amdfam10" 4 893 (and (eq_attr "cpu" "amdfam10") 894 (and (eq_attr "type" "ssecvt") 895 (and (eq_attr "amdfam10_decode" "direct") 896 (and (eq_attr "mode" "V2DF,V4SF,TI") 897 (eq_attr "memory" "load"))))) 898 "athlon-direct,athlon-fploadk8,athlon-fstore") 899(define_insn_reservation "athlon_ssecvt_cvtps2pd_k8" 3 900 (and (eq_attr "cpu" "k8,athlon,generic64") 901 (and (eq_attr "type" "ssecvt") 902 (and (eq_attr "athlon_decode" "double") 903 (eq_attr "mode" "V2DF,V4SF,TI")))) 904 "athlon-double,athlon-fpsched,athlon-fstore,athlon-fstore") 905(define_insn_reservation "athlon_ssecvt_cvtps2pd_amdfam10" 2 906 (and (eq_attr "cpu" "amdfam10") 907 (and (eq_attr "type" "ssecvt") 908 (and (eq_attr "amdfam10_decode" "direct") 909 (eq_attr "mode" "V2DF,V4SF,TI")))) 910 "athlon-direct,athlon-fpsched,athlon-fstore") 911;; cvtsi2sd mem,reg is directpath path (cvtsi2sd reg,reg is doublepath) 912;; cvtsi2sd has troughput 1 and is executed in store unit with latency of 6 913(define_insn_reservation "athlon_sseicvt_cvtsi2sd_load" 6 914 (and (eq_attr "cpu" "athlon,k8") 915 (and (eq_attr "type" "sseicvt") 916 (and (eq_attr "athlon_decode" "direct") 917 (and (eq_attr "mode" "SF,DF") 918 (eq_attr "memory" "load"))))) 919 "athlon-direct,athlon-fploadk8,athlon-fstore") 920(define_insn_reservation "athlon_sseicvt_cvtsi2sd_load_amdfam10" 9 921 (and (eq_attr "cpu" "amdfam10") 922 (and (eq_attr "type" "sseicvt") 923 (and (eq_attr "amdfam10_decode" "double") 924 (and (eq_attr "mode" "SF,DF") 925 (eq_attr "memory" "load"))))) 926 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)") 927;; cvtsi2ss mem, reg is doublepath 928(define_insn_reservation "athlon_sseicvt_cvtsi2ss_load" 9 929 (and (eq_attr "cpu" "athlon") 930 (and (eq_attr "type" "sseicvt") 931 (and (eq_attr "athlon_decode" "double") 932 (and (eq_attr "mode" "SF,DF") 933 (eq_attr "memory" "load"))))) 934 "athlon-vector,athlon-fpload,(athlon-fstore*2)") 935(define_insn_reservation "athlon_sseicvt_cvtsi2ss_load_k8" 9 936 (and (eq_attr "cpu" "k8,generic64") 937 (and (eq_attr "type" "sseicvt") 938 (and (eq_attr "athlon_decode" "double") 939 (and (eq_attr "mode" "SF,DF") 940 (eq_attr "memory" "load"))))) 941 "athlon-double,athlon-fploadk8,(athlon-fstore*2)") 942(define_insn_reservation "athlon_sseicvt_cvtsi2ss_load_amdfam10" 9 943 (and (eq_attr "cpu" "amdfam10") 944 (and (eq_attr "type" "sseicvt") 945 (and (eq_attr "amdfam10_decode" "double") 946 (and (eq_attr "mode" "SF,DF") 947 (eq_attr "memory" "load"))))) 948 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)") 949;; cvtsi2sd reg,reg is double decoded (vector on Athlon) 950(define_insn_reservation "athlon_sseicvt_cvtsi2sd_k8" 11 951 (and (eq_attr "cpu" "k8,athlon,generic64") 952 (and (eq_attr "type" "sseicvt") 953 (and (eq_attr "athlon_decode" "double") 954 (and (eq_attr "mode" "SF,DF") 955 (eq_attr "memory" "none"))))) 956 "athlon-double,athlon-fploadk8,athlon-fstore") 957(define_insn_reservation "athlon_sseicvt_cvtsi2sd_amdfam10" 14 958 (and (eq_attr "cpu" "amdfam10") 959 (and (eq_attr "type" "sseicvt") 960 (and (eq_attr "amdfam10_decode" "vector") 961 (and (eq_attr "mode" "SF,DF") 962 (eq_attr "memory" "none"))))) 963 "athlon-vector,athlon-fploadk8,(athlon-faddmul+athlon-fstore)") 964;; cvtsi2ss reg, reg is doublepath 965(define_insn_reservation "athlon_sseicvt_cvtsi2ss" 14 966 (and (eq_attr "cpu" "athlon,k8,generic64") 967 (and (eq_attr "type" "sseicvt") 968 (and (eq_attr "athlon_decode" "vector") 969 (and (eq_attr "mode" "SF,DF") 970 (eq_attr "memory" "none"))))) 971 "athlon-vector,athlon-fploadk8,(athlon-fvector*2)") 972(define_insn_reservation "athlon_sseicvt_cvtsi2ss_amdfam10" 14 973 (and (eq_attr "cpu" "amdfam10") 974 (and (eq_attr "type" "sseicvt") 975 (and (eq_attr "amdfam10_decode" "vector") 976 (and (eq_attr "mode" "SF,DF") 977 (eq_attr "memory" "none"))))) 978 "athlon-vector,athlon-fploadk8,(athlon-faddmul+athlon-fstore)") 979;; cvtsd2ss mem,reg is doublepath, troughput unknown, latency 9 980(define_insn_reservation "athlon_ssecvt_cvtsd2ss_load_k8" 9 981 (and (eq_attr "cpu" "k8,athlon,generic64") 982 (and (eq_attr "type" "ssecvt") 983 (and (eq_attr "athlon_decode" "double") 984 (and (eq_attr "mode" "SF") 985 (eq_attr "memory" "load"))))) 986 "athlon-double,athlon-fploadk8,(athlon-fstore*3)") 987(define_insn_reservation "athlon_ssecvt_cvtsd2ss_load_amdfam10" 9 988 (and (eq_attr "cpu" "amdfam10") 989 (and (eq_attr "type" "ssecvt") 990 (and (eq_attr "amdfam10_decode" "double") 991 (and (eq_attr "mode" "SF") 992 (eq_attr "memory" "load"))))) 993 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)") 994;; cvtsd2ss reg,reg is vectorpath, troughput unknown, latency 12 995(define_insn_reservation "athlon_ssecvt_cvtsd2ss" 12 996 (and (eq_attr "cpu" "athlon,k8,generic64") 997 (and (eq_attr "type" "ssecvt") 998 (and (eq_attr "athlon_decode" "vector") 999 (and (eq_attr "mode" "SF") 1000 (eq_attr "memory" "none"))))) 1001 "athlon-vector,athlon-fpsched,(athlon-fvector*3)") 1002(define_insn_reservation "athlon_ssecvt_cvtsd2ss_amdfam10" 8 1003 (and (eq_attr "cpu" "amdfam10") 1004 (and (eq_attr "type" "ssecvt") 1005 (and (eq_attr "amdfam10_decode" "vector") 1006 (and (eq_attr "mode" "SF") 1007 (eq_attr "memory" "none"))))) 1008 "athlon-vector,athlon-fpsched,athlon-faddmul,(athlon-fstore*2)") 1009(define_insn_reservation "athlon_ssecvt_cvtpd2ps_load_k8" 8 1010 (and (eq_attr "cpu" "athlon,k8,generic64") 1011 (and (eq_attr "type" "ssecvt") 1012 (and (eq_attr "athlon_decode" "vector") 1013 (and (eq_attr "mode" "V4SF,V2DF,TI") 1014 (eq_attr "memory" "load"))))) 1015 "athlon-double,athlon-fpload2k8,(athlon-fstore*3)") 1016(define_insn_reservation "athlon_ssecvt_cvtpd2ps_load_amdfam10" 9 1017 (and (eq_attr "cpu" "amdfam10") 1018 (and (eq_attr "type" "ssecvt") 1019 (and (eq_attr "amdfam10_decode" "double") 1020 (and (eq_attr "mode" "V4SF,V2DF,TI") 1021 (eq_attr "memory" "load"))))) 1022 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)") 1023;; cvtpd2ps mem,reg is vectorpath, troughput unknown, latency 10 1024;; ??? Why it is fater than cvtsd2ss? 1025(define_insn_reservation "athlon_ssecvt_cvtpd2ps" 8 1026 (and (eq_attr "cpu" "athlon,k8,generic64") 1027 (and (eq_attr "type" "ssecvt") 1028 (and (eq_attr "athlon_decode" "vector") 1029 (and (eq_attr "mode" "V4SF,V2DF,TI") 1030 (eq_attr "memory" "none"))))) 1031 "athlon-vector,athlon-fpsched,athlon-fvector*2") 1032(define_insn_reservation "athlon_ssecvt_cvtpd2ps_amdfam10" 7 1033 (and (eq_attr "cpu" "amdfam10") 1034 (and (eq_attr "type" "ssecvt") 1035 (and (eq_attr "amdfam10_decode" "double") 1036 (and (eq_attr "mode" "V4SF,V2DF,TI") 1037 (eq_attr "memory" "none"))))) 1038 "athlon-double,athlon-fpsched,(athlon-faddmul+athlon-fstore)") 1039;; cvtsd2si mem,reg is doublepath, troughput 1, latency 9 1040(define_insn_reservation "athlon_secvt_cvtsX2si_load" 9 1041 (and (eq_attr "cpu" "athlon,k8,generic64") 1042 (and (eq_attr "type" "sseicvt") 1043 (and (eq_attr "athlon_decode" "vector") 1044 (and (eq_attr "mode" "SI,DI") 1045 (eq_attr "memory" "load"))))) 1046 "athlon-vector,athlon-fploadk8,athlon-fvector") 1047(define_insn_reservation "athlon_secvt_cvtsX2si_load_amdfam10" 10 1048 (and (eq_attr "cpu" "amdfam10") 1049 (and (eq_attr "type" "sseicvt") 1050 (and (eq_attr "amdfam10_decode" "double") 1051 (and (eq_attr "mode" "SI,DI") 1052 (eq_attr "memory" "load"))))) 1053 "athlon-double,athlon-fploadk8,(athlon-fadd+athlon-fstore)") 1054;; cvtsd2si reg,reg is doublepath, troughput 1, latency 9 1055(define_insn_reservation "athlon_ssecvt_cvtsX2si" 9 1056 (and (eq_attr "cpu" "athlon") 1057 (and (eq_attr "type" "sseicvt") 1058 (and (eq_attr "athlon_decode" "double") 1059 (and (eq_attr "mode" "SI,DI") 1060 (eq_attr "memory" "none"))))) 1061 "athlon-vector,athlon-fpsched,athlon-fvector") 1062(define_insn_reservation "athlon_ssecvt_cvtsX2si_k8" 9 1063 (and (eq_attr "cpu" "k8,generic64") 1064 (and (eq_attr "type" "sseicvt") 1065 (and (eq_attr "athlon_decode" "double") 1066 (and (eq_attr "mode" "SI,DI") 1067 (eq_attr "memory" "none"))))) 1068 "athlon-double,athlon-fpsched,athlon-fstore") 1069(define_insn_reservation "athlon_ssecvt_cvtsX2si_amdfam10" 8 1070 (and (eq_attr "cpu" "amdfam10") 1071 (and (eq_attr "type" "sseicvt") 1072 (and (eq_attr "amdfam10_decode" "double") 1073 (and (eq_attr "mode" "SI,DI") 1074 (eq_attr "memory" "none"))))) 1075 "athlon-double,athlon-fpsched,(athlon-fadd+athlon-fstore)") 1076;; cvtpd2dq reg,mem is doublepath, troughput 1, latency 9 on amdfam10 1077(define_insn_reservation "athlon_sseicvt_cvtpd2dq_load_amdfam10" 9 1078 (and (eq_attr "cpu" "amdfam10") 1079 (and (eq_attr "type" "sseicvt") 1080 (and (eq_attr "amdfam10_decode" "double") 1081 (and (eq_attr "mode" "TI") 1082 (eq_attr "memory" "load"))))) 1083 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)") 1084;; cvtpd2dq reg,mem is doublepath, troughput 1, latency 7 on amdfam10 1085(define_insn_reservation "athlon_sseicvt_cvtpd2dq_amdfam10" 7 1086 (and (eq_attr "cpu" "amdfam10") 1087 (and (eq_attr "type" "sseicvt") 1088 (and (eq_attr "amdfam10_decode" "double") 1089 (and (eq_attr "mode" "TI") 1090 (eq_attr "memory" "none"))))) 1091 "athlon-double,athlon-fpsched,(athlon-faddmul+athlon-fstore)") 1092 1093 1094(define_insn_reservation "athlon_ssemul_load" 4 1095 (and (eq_attr "cpu" "athlon") 1096 (and (eq_attr "type" "ssemul") 1097 (and (eq_attr "mode" "SF,DF") 1098 (eq_attr "memory" "load")))) 1099 "athlon-direct,athlon-fpload,athlon-fmul") 1100(define_insn_reservation "athlon_ssemul_load_k8" 6 1101 (and (eq_attr "cpu" "k8,generic64,amdfam10") 1102 (and (eq_attr "type" "ssemul") 1103 (and (eq_attr "mode" "SF,DF") 1104 (eq_attr "memory" "load")))) 1105 "athlon-direct,athlon-fploadk8,athlon-fmul") 1106(define_insn_reservation "athlon_ssemul" 4 1107 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") 1108 (and (eq_attr "type" "ssemul") 1109 (eq_attr "mode" "SF,DF"))) 1110 "athlon-direct,athlon-fpsched,athlon-fmul") 1111(define_insn_reservation "athlon_ssemulvector_load" 5 1112 (and (eq_attr "cpu" "athlon") 1113 (and (eq_attr "type" "ssemul") 1114 (eq_attr "memory" "load"))) 1115 "athlon-vector,athlon-fpload2,(athlon-fmul*2)") 1116(define_insn_reservation "athlon_ssemulvector_load_k8" 7 1117 (and (eq_attr "cpu" "k8,generic64") 1118 (and (eq_attr "type" "ssemul") 1119 (eq_attr "memory" "load"))) 1120 "athlon-double,athlon-fpload2k8,(athlon-fmul*2)") 1121(define_insn_reservation "athlon_ssemulvector_load_amdfam10" 6 1122 (and (eq_attr "cpu" "amdfam10") 1123 (and (eq_attr "type" "ssemul") 1124 (eq_attr "memory" "load"))) 1125 "athlon-direct,athlon-fploadk8,athlon-fmul") 1126(define_insn_reservation "athlon_ssemulvector" 5 1127 (and (eq_attr "cpu" "athlon") 1128 (eq_attr "type" "ssemul")) 1129 "athlon-vector,athlon-fpsched,(athlon-fmul*2)") 1130(define_insn_reservation "athlon_ssemulvector_k8" 5 1131 (and (eq_attr "cpu" "k8,generic64") 1132 (eq_attr "type" "ssemul")) 1133 "athlon-double,athlon-fpsched,(athlon-fmul*2)") 1134(define_insn_reservation "athlon_ssemulvector_amdfam10" 4 1135 (and (eq_attr "cpu" "amdfam10") 1136 (eq_attr "type" "ssemul")) 1137 "athlon-direct,athlon-fpsched,athlon-fmul") 1138;; divsd timings. divss is faster 1139(define_insn_reservation "athlon_ssediv_load" 20 1140 (and (eq_attr "cpu" "athlon") 1141 (and (eq_attr "type" "ssediv") 1142 (and (eq_attr "mode" "SF,DF") 1143 (eq_attr "memory" "load")))) 1144 "athlon-direct,athlon-fpload,athlon-fmul*17") 1145(define_insn_reservation "athlon_ssediv_load_k8" 22 1146 (and (eq_attr "cpu" "k8,generic64,amdfam10") 1147 (and (eq_attr "type" "ssediv") 1148 (and (eq_attr "mode" "SF,DF") 1149 (eq_attr "memory" "load")))) 1150 "athlon-direct,athlon-fploadk8,athlon-fmul*17") 1151(define_insn_reservation "athlon_ssediv" 20 1152 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10") 1153 (and (eq_attr "type" "ssediv") 1154 (eq_attr "mode" "SF,DF"))) 1155 "athlon-direct,athlon-fpsched,athlon-fmul*17") 1156(define_insn_reservation "athlon_ssedivvector_load" 39 1157 (and (eq_attr "cpu" "athlon") 1158 (and (eq_attr "type" "ssediv") 1159 (eq_attr "memory" "load"))) 1160 "athlon-vector,athlon-fpload2,athlon-fmul*34") 1161(define_insn_reservation "athlon_ssedivvector_load_k8" 35 1162 (and (eq_attr "cpu" "k8,generic64") 1163 (and (eq_attr "type" "ssediv") 1164 (eq_attr "memory" "load"))) 1165 "athlon-double,athlon-fpload2k8,athlon-fmul*34") 1166(define_insn_reservation "athlon_ssedivvector_load_amdfam10" 22 1167 (and (eq_attr "cpu" "amdfam10") 1168 (and (eq_attr "type" "ssediv") 1169 (eq_attr "memory" "load"))) 1170 "athlon-direct,athlon-fploadk8,athlon-fmul*17") 1171(define_insn_reservation "athlon_ssedivvector" 39 1172 (and (eq_attr "cpu" "athlon") 1173 (eq_attr "type" "ssediv")) 1174 "athlon-vector,athlon-fmul*34") 1175(define_insn_reservation "athlon_ssedivvector_k8" 39 1176 (and (eq_attr "cpu" "k8,generic64") 1177 (eq_attr "type" "ssediv")) 1178 "athlon-double,athlon-fmul*34") 1179(define_insn_reservation "athlon_ssedivvector_amdfam10" 20 1180 (and (eq_attr "cpu" "amdfam10") 1181 (eq_attr "type" "ssediv")) 1182 "athlon-direct,athlon-fmul*17") 1183(define_insn_reservation "athlon_sseins_amdfam10" 5 1184 (and (eq_attr "cpu" "amdfam10") 1185 (and (eq_attr "type" "sseins") 1186 (eq_attr "mode" "TI"))) 1187 "athlon-vector,athlon-fpsched,athlon-faddmul") 1188