1 { 2 Copyright (c) 1998-2002 by Florian Klaempfl 3 4 This unit implements the x86 specific class for the register 5 allocator 6 7 This program is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 2 of the License, or 10 (at your option) any later version. 11 12 This program is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with this program; if not, write to the Free Software 19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 20 21 **************************************************************************** 22 } 23 24 unit rgx86; 25 26 {$i fpcdefs.inc} 27 28 interface 29 30 uses 31 cpubase,cgbase,cgutils, 32 aasmtai,aasmdata,aasmsym,aasmcpu, 33 rgobj; 34 35 type 36 trgx86 = class(trgobj) get_spill_subregnull37 function get_spill_subreg(r : tregister) : tsubregister;override; do_spill_replacenull38 function do_spill_replace(list:TAsmList;instr:tai_cpu_abstract_sym;orgreg:tsuperregister;const spilltemp:treference):boolean;override; 39 end; 40 41 tpushedsavedloc = record 42 case byte of 43 0: (pushed: boolean); 44 1: (ofs: longint); 45 end; 46 47 tpushedsavedfpu = array[tsuperregister] of tpushedsavedloc; 48 49 trgx86fpu = class 50 { these counters contain the number of elements in the } 51 { unusedregsxxx/usableregsxxx sets } 52 countunusedregsfpu : byte; 53 54 { Contains the registers which are really used by the proc itself. 55 It doesn't take care of registers used by called procedures 56 } 57 used_in_proc : tcpuregisterset; 58 59 {reg_pushes_other : regvarother_longintarray; 60 is_reg_var_other : regvarother_booleanarray; 61 regvar_loaded_other : regvarother_booleanarray;} 62 63 fpuvaroffset : byte; 64 65 constructor create; 66 getregisterfpunull67 function getregisterfpu(list: TAsmList) : tregister; 68 procedure ungetregisterfpu(list: TAsmList; r : tregister); 69 70 { pushes and restores registers } 71 procedure saveusedfpuregisters(list:TAsmList; 72 var saved:Tpushedsavedfpu; 73 const s:Tcpuregisterset); 74 procedure restoreusedfpuregisters(list:TAsmList; 75 const saved:Tpushedsavedfpu); 76 77 { corrects the fpu stack register by ofs } correct_fpuregisternull78 function correct_fpuregister(r : tregister;ofs : byte) : tregister; 79 end; 80 81 82 implementation 83 84 uses 85 verbose; 86 87 const 88 { This value is used in tsaved. If the array value is equal 89 to this, then this means that this register is not used.} 90 reg_not_saved = $7fffffff; 91 92 93 {****************************************************************************** 94 Trgcpu 95 ******************************************************************************} 96 trgx86.get_spill_subregnull97 function trgx86.get_spill_subreg(r : tregister) : tsubregister; 98 begin 99 result:=getsubreg(r); 100 end; 101 102 103 { Decide wether a "replace" spill is possible, i.e. wether we can replace a register 104 in an instruction by a memory reference. For example, in "mov ireg26d,0", the imaginary 105 register ireg26d can be replaced by a memory reference.} trgx86.do_spill_replacenull106 function trgx86.do_spill_replace(list:TAsmList;instr:tai_cpu_abstract_sym;orgreg:tsuperregister;const spilltemp:treference):boolean; 107 108 { returns true if opcde is an avx opcode which allows only the first (zero) operand might be a memory reference } avx_opcode_only_op0_may_be_memrefnull109 function avx_opcode_only_op0_may_be_memref(opcode : TAsmOp) : boolean; 110 begin 111 case opcode of 112 A_VMULSS, 113 A_VMULSD, 114 A_VSUBSS, 115 A_VSUBSD, 116 A_VADDSD, 117 A_VADDSS, 118 A_VDIVSD, 119 A_VDIVSS, 120 A_VSQRTSD, 121 A_VSQRTSS, 122 A_VCVTDQ2PD, 123 A_VCVTDQ2PS, 124 A_VCVTPD2DQ, 125 A_VCVTPD2PS, 126 A_VCVTPS2DQ, 127 A_VCVTPS2PD, 128 A_VCVTSD2SI, 129 A_VCVTSD2SS, 130 A_VCVTSI2SD, 131 A_VCVTSS2SD, 132 A_VCVTTPD2DQ, 133 A_VCVTTPS2DQ, 134 A_VCVTTSD2SI, 135 A_VCVTSI2SS, 136 A_VCVTSS2SI, 137 A_VCVTTSS2SI, 138 A_VXORPD, 139 A_VXORPS, 140 A_VORPD, 141 A_VORPS, 142 A_VANDPD, 143 A_VANDPS, 144 A_VUNPCKLPS, 145 A_VUNPCKHPS, 146 A_VSHUFPD: 147 result:=true; 148 else 149 result:=false; 150 end; 151 end; 152 153 154 var 155 n,replaceoper : longint; 156 is_subh: Boolean; 157 begin 158 result:=false; 159 with taicpu(instr) do 160 begin 161 replaceoper:=-1; 162 case ops of 163 1 : 164 begin 165 if (oper[0]^.typ=top_reg) and 166 (getregtype(oper[0]^.reg)=regtype) then 167 begin 168 if get_alias(getsupreg(oper[0]^.reg))<>orgreg then 169 internalerror(200410101); 170 replaceoper:=0; 171 end; 172 end; 173 2,3 : 174 begin 175 { avx instruction? 176 currently this rule is sufficient but it might be extended } 177 if (ops=3) and (opcode<>A_SHRD) and (opcode<>A_SHLD) and (opcode<>A_IMUL) then 178 begin 179 { BMI shifting/rotating instructions have special requirements regarding spilling, only 180 the middle operand can be replaced } 181 if ((opcode=A_RORX) or (opcode=A_SHRX) or (opcode=A_SARX) or (opcode=A_SHLX)) then 182 begin 183 if (oper[1]^.typ=top_reg) and (getregtype(oper[1]^.reg)=regtype) and (get_alias(getsupreg(oper[1]^.reg))=orgreg) then 184 replaceoper:=1; 185 end 186 { avx instructions allow only the first operand (at&t counting) to be a register operand 187 all operands must be registers ... } 188 else if (oper[0]^.typ=top_reg) and 189 (oper[1]^.typ=top_reg) and 190 (oper[2]^.typ=top_reg) and 191 { but they must be different } 192 ((getregtype(oper[1]^.reg)<>regtype) or 193 (get_alias(getsupreg(oper[0]^.reg))<>get_alias(getsupreg(oper[1]^.reg))) 194 ) and 195 ((getregtype(oper[2]^.reg)<>regtype) or 196 (get_alias(getsupreg(oper[0]^.reg))<>get_alias(getsupreg(oper[2]^.reg))) 197 ) and 198 (get_alias(getsupreg(oper[0]^.reg))=orgreg) then 199 replaceoper:=0; 200 end 201 else 202 begin 203 { We can handle opcodes with 2 and 3-op imul/shrd/shld the same way, where the 3rd operand is const or CL, 204 that doesn't need spilling. 205 However, due to AT&T order inside the compiler, the 3rd operand is 206 numbered 0, so look at operand no. 1 and 2 if we have 3 operands by 207 adding a "n". } 208 n:=0; 209 if ops=3 then 210 n:=1; 211 { lea is tricky: part of operand 0 can be spilled and the instruction can converted into an 212 add, if base or index shall be spilled and the other one is equal the destination } 213 if (opcode=A_LEA) then 214 begin 215 if (oper[0]^.ref^.offset=0) and 216 (oper[0]^.ref^.scalefactor in [0,1]) and 217 (((getregtype(oper[0]^.ref^.base)=regtype) and 218 (get_alias(getsupreg(oper[0]^.ref^.base))=orgreg) and 219 (getregtype(oper[0]^.ref^.index)=getregtype(oper[1]^.reg)) and 220 (get_alias(getsupreg(oper[0]^.ref^.index))=get_alias(getsupreg(oper[1]^.reg)))) or 221 ((getregtype(oper[0]^.ref^.index)=regtype) and 222 (get_alias(getsupreg(oper[0]^.ref^.index))=orgreg) and 223 (getregtype(oper[0]^.ref^.base)=getregtype(oper[1]^.reg)) and 224 (get_alias(getsupreg(oper[0]^.ref^.base))=get_alias(getsupreg(oper[1]^.reg)))) 225 ) then 226 replaceoper:=0; 227 end 228 else if (oper[n+0]^.typ=top_reg) and 229 (oper[n+1]^.typ=top_reg) and 230 ((getregtype(oper[n+0]^.reg)<>regtype) or 231 (getregtype(oper[n+1]^.reg)<>regtype) or 232 (get_alias(getsupreg(oper[n+0]^.reg))<>get_alias(getsupreg(oper[n+1]^.reg)))) then 233 begin 234 if (getregtype(oper[n+0]^.reg)=regtype) and 235 (get_alias(getsupreg(oper[n+0]^.reg))=orgreg) then 236 replaceoper:=0+n 237 else if (getregtype(oper[n+1]^.reg)=regtype) and 238 (get_alias(getsupreg(oper[n+1]^.reg))=orgreg) then 239 replaceoper:=1+n; 240 end 241 else if (oper[n+0]^.typ=top_reg) and 242 (oper[n+1]^.typ=top_const) then 243 begin 244 if (getregtype(oper[0+n]^.reg)=regtype) and 245 (get_alias(getsupreg(oper[0+n]^.reg))=orgreg) then 246 replaceoper:=0+n 247 else 248 internalerror(200704282); 249 end 250 else if (oper[n+0]^.typ=top_const) and 251 (oper[n+1]^.typ=top_reg) then 252 begin 253 if (getregtype(oper[1+n]^.reg)=regtype) and 254 (get_alias(getsupreg(oper[1+n]^.reg))=orgreg) then 255 replaceoper:=1+n 256 else 257 internalerror(200704283); 258 end; 259 case replaceoper of 260 0 : 261 begin 262 { Some instructions don't allow memory references 263 for source } 264 case opcode of 265 A_BT, 266 A_BTS, 267 A_BTC, 268 A_BTR, 269 270 { shufp*/unpcklp* would require 16 byte alignment for memory locations so we force the source 271 operand into a register } 272 A_SHUFPD, 273 A_SHUFPS, 274 A_UNPCKLPD, 275 A_UNPCKLPS : 276 replaceoper:=-1; 277 end; 278 end; 279 1 : 280 begin 281 { Some instructions don't allow memory references 282 for destination } 283 case opcode of 284 A_CMOVcc, 285 A_MOVZX, 286 A_MOVSX, 287 {$ifdef x86_64} 288 A_MOVSXD, 289 {$endif x86_64} 290 A_MULSS, 291 A_MULSD, 292 A_SUBSS, 293 A_SUBSD, 294 A_ADDSD, 295 A_ADDSS, 296 A_DIVSD, 297 A_DIVSS, 298 A_SQRTSD, 299 A_SQRTSS, 300 A_SHLD, 301 A_SHRD, 302 A_COMISD, 303 A_COMISS, 304 A_CVTDQ2PD, 305 A_CVTDQ2PS, 306 A_CVTPD2DQ, 307 A_CVTPD2PI, 308 A_CVTPD2PS, 309 A_CVTPI2PD, 310 A_CVTPS2DQ, 311 A_CVTPS2PD, 312 A_CVTSD2SI, 313 A_CVTSD2SS, 314 A_CVTSI2SD, 315 A_CVTSS2SD, 316 A_CVTTPD2PI, 317 A_CVTTPD2DQ, 318 A_CVTTPS2DQ, 319 A_CVTTSD2SI, 320 A_CVTPI2PS, 321 A_CVTPS2PI, 322 A_CVTSI2SS, 323 A_CVTSS2SI, 324 A_CVTTPS2PI, 325 A_CVTTSS2SI, 326 A_XORPD, 327 A_XORPS, 328 A_ORPD, 329 A_ORPS, 330 A_ANDPD, 331 A_ANDPS, 332 A_UNPCKLPS, 333 A_UNPCKHPS, 334 A_SHUFPD, 335 A_SHUFPS, 336 A_VCOMISD, 337 A_VCOMISS: 338 replaceoper:=-1; 339 340 A_IMUL: 341 if ops<>3 then 342 replaceoper:=-1; 343 {$ifdef x86_64} 344 A_MOV: 345 { 64 bit constants can only be moved into registers } 346 if (oper[0]^.typ=top_const) and 347 (oper[1]^.typ=top_reg) and 348 ((oper[0]^.val<low(longint)) or 349 (oper[0]^.val>high(longint))) then 350 replaceoper:=-1; 351 {$endif x86_64} 352 else 353 if avx_opcode_only_op0_may_be_memref(opcode) then 354 replaceoper:=-1; 355 end; 356 end; 357 2 : 358 begin 359 { Some 3-op instructions don't allow memory references 360 for destination } 361 case instr.opcode of 362 A_IMUL: 363 replaceoper:=-1; 364 else 365 if avx_opcode_only_op0_may_be_memref(opcode) then 366 replaceoper:=-1; 367 end; 368 end; 369 end; 370 end; 371 end; 372 end; 373 374 {$ifdef x86_64} 375 { 32 bit operations on 32 bit registers on x86_64 can result in 376 zeroing the upper 32 bits of the register. This does not happen 377 with memory operations, so we have to perform these calculations 378 in registers. } 379 if (opsize=S_L) then 380 replaceoper:=-1; 381 {$endif x86_64} 382 383 { Replace register with spill reference } 384 if replaceoper<>-1 then 385 begin 386 if opcode=A_LEA then 387 begin 388 opcode:=A_ADD; 389 oper[0]^.ref^:=spilltemp; 390 end 391 else 392 begin 393 is_subh:=getsubreg(oper[replaceoper]^.reg)=R_SUBH; 394 oper[replaceoper]^.typ:=top_ref; 395 new(oper[replaceoper]^.ref); 396 oper[replaceoper]^.ref^:=spilltemp; 397 if is_subh then 398 inc(oper[replaceoper]^.ref^.offset); 399 { memory locations aren't guaranteed to be aligned } 400 case opcode of 401 A_MOVAPS: 402 opcode:=A_MOVSS; 403 A_MOVAPD: 404 opcode:=A_MOVSD; 405 A_VMOVAPS: 406 opcode:=A_VMOVSS; 407 A_VMOVAPD: 408 opcode:=A_VMOVSD; 409 end; 410 end; 411 result:=true; 412 end; 413 end; 414 end; 415 416 417 {****************************************************************************** 418 Trgx86fpu 419 ******************************************************************************} 420 421 constructor Trgx86fpu.create; 422 begin 423 used_in_proc:=[]; 424 end; 425 426 trgx86fpu.getregisterfpunull427 function trgx86fpu.getregisterfpu(list: TAsmList) : tregister; 428 begin 429 { note: don't return R_ST0, see comments above implementation of } 430 { a_loadfpu_* methods in cgcpu (JM) } 431 result:=NR_ST; 432 end; 433 434 435 procedure trgx86fpu.ungetregisterfpu(list : TAsmList; r : tregister); 436 begin 437 { nothing to do, fpu stack management is handled by the load/ } 438 { store operations in cgcpu (JM) } 439 end; 440 441 442 trgx86fpu.correct_fpuregisternull443 function trgx86fpu.correct_fpuregister(r : tregister;ofs : byte) : tregister; 444 begin 445 correct_fpuregister:=r; 446 setsupreg(correct_fpuregister,ofs); 447 end; 448 449 450 procedure trgx86fpu.saveusedfpuregisters(list: TAsmList; 451 var saved : tpushedsavedfpu; 452 const s: tcpuregisterset); 453 { var 454 r : tregister; 455 hr : treference; } 456 begin 457 used_in_proc:=used_in_proc+s; 458 459 { TODO: firstsavefpureg} 460 (* 461 { don't try to save the fpu registers if not desired (e.g. for } 462 { the 80x86) } 463 if firstsavefpureg <> R_NO then 464 for r.enum:=firstsavefpureg to lastsavefpureg do 465 begin 466 saved[r.enum].ofs:=reg_not_saved; 467 { if the register is used by the calling subroutine and if } 468 { it's not a regvar (those are handled separately) } 469 if not is_reg_var_other[r.enum] and 470 (r.enum in s) and 471 { and is present in use } 472 not(r.enum in unusedregsfpu) then 473 begin 474 { then save it } 475 tg.GetTemp(list,extended_size,tt_persistent,hr); 476 saved[r.enum].ofs:=hr.offset; 477 cg.a_loadfpu_reg_ref(list,OS_FLOAT,OS_FLOAT,r,hr); 478 cg.a_reg_dealloc(list,r); 479 include(unusedregsfpu,r.enum); 480 inc(countunusedregsfpu); 481 end; 482 end; 483 *) 484 end; 485 486 487 procedure trgx86fpu.restoreusedfpuregisters(list : TAsmList; 488 const saved : tpushedsavedfpu); 489 { 490 var 491 r,r2 : tregister; 492 hr : treference; 493 } 494 begin 495 { TODO: firstsavefpureg} 496 (* 497 if firstsavefpureg <> R_NO then 498 for r.enum:=lastsavefpureg downto firstsavefpureg do 499 begin 500 if saved[r.enum].ofs <> reg_not_saved then 501 begin 502 r2.enum:=R_INTREGISTER; 503 r2.number:=NR_FRAME_POINTER_REG; 504 reference_reset_base(hr,r2,saved[r.enum].ofs); 505 cg.a_reg_alloc(list,r); 506 cg.a_loadfpu_ref_reg(list,OS_FLOAT,OS_FLOAT,hr,r); 507 if not (r.enum in unusedregsfpu) then 508 { internalerror(10) 509 in n386cal we always save/restore the reg *state* 510 using save/restoreunusedstate -> the current state 511 may not be real (JM) } 512 else 513 begin 514 dec(countunusedregsfpu); 515 exclude(unusedregsfpu,r.enum); 516 end; 517 tg.UnGetTemp(list,hr); 518 end; 519 end; 520 *) 521 end; 522 523 (* 524 procedure Trgx86fpu.saveotherregvars(list: TAsmList; const s: totherregisterset); 525 var 526 r: Tregister; 527 begin 528 if not(cs_opt_regvar in current_settings.optimizerswitches) then 529 exit; 530 if firstsavefpureg <> NR_NO then 531 for r.enum := firstsavefpureg to lastsavefpureg do 532 if is_reg_var_other[r.enum] and 533 (r.enum in s) then 534 store_regvar(list,r); 535 end; 536 *) 537 538 end. 539