1 { 2 Copyright (c) 1998-2004 by Jonas Maebe 3 4 This unit calls the optimization procedures to optimize the assembler 5 code for sparc 6 7 This program is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 2 of the License, or 10 (at your option) any later version. 11 12 This program is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with this program; if not, write to the Free Software 19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 20 21 **************************************************************************** 22 } 23 24 unit aoptcpu; 25 26 {$i fpcdefs.inc} 27 28 { $define DEBUG_AOPTCPU} 29 30 Interface 31 32 uses 33 cgbase, cpubase, aoptobj, aoptcpub, aopt, aasmtai, aasmcpu; 34 35 Type 36 TAsmOpSet = set of TAsmOp; 37 38 TCpuAsmOptimizer = class(TAsmOptimizer) RegModifiedByInstructionnull39 function RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean; override; GetNextInstructionUsingRegnull40 function GetNextInstructionUsingReg(Current: tai; 41 var Next: tai; reg: TRegister): Boolean; TryRemoveMovnull42 function TryRemoveMov(var p: tai; opcode: TAsmOp): boolean; TryRemoveMovToRefIndexnull43 function TryRemoveMovToRefIndex(var p: tai; next: taicpu): boolean; TryRemoveMovBeforeStorenull44 function TryRemoveMovBeforeStore(var p: tai; next: taicpu; const storeops: TAsmOpSet): boolean; PeepHoleOptPass1Cpunull45 function PeepHoleOptPass1Cpu(var p: tai): boolean; override; 46 procedure PeepHoleOptPass2; override; RegLoadedWithNewValuenull47 function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override; InstructionLoadsFromRegnull48 function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override; 49 50 { outputs a debug message into the assembler file } 51 procedure DebugMsg(const s: string; p: tai); 52 End; 53 54 Implementation 55 56 uses 57 cutils,globtype,globals,aasmbase,cpuinfo,verbose; 58 59 MatchInstructionnull60 function MatchInstruction(const instr: tai; const op: TAsmOp): boolean; 61 begin 62 result := 63 (instr.typ = ait_instruction) and 64 (taicpu(instr).opcode = op); 65 end; 66 67 MatchOperandnull68 function MatchOperand(const oper: TOper; reg: TRegister): boolean; 69 begin 70 result:=(oper.typ=top_reg) and (oper.reg=reg); 71 end; 72 73 IsSameRegnull74 function IsSameReg(this,next: taicpu): boolean; 75 begin 76 result:=(next.oper[0]^.typ=top_reg) and 77 (next.oper[1]^.typ=top_reg) and 78 (next.oper[0]^.reg=next.oper[1]^.reg) and 79 (next.oper[0]^.reg=this.oper[0]^.reg); 80 end; 81 82 CanBeCMOVnull83 function CanBeCMOV(p: tai; condreg: tregister): boolean; 84 begin 85 result:=assigned(p) and (p.typ=ait_instruction) and 86 ((taicpu(p).opcode in [A_MOV_D,A_MOV_S]) or 87 ( 88 { register with condition must not be overwritten } 89 (taicpu(p).opcode=A_MOVE) and 90 (taicpu(p).oper[0]^.reg<>condreg) 91 )); 92 end; 93 94 95 procedure ChangeToCMOV(p: taicpu; cond: tasmcond; reg: tregister); 96 begin 97 case cond of 98 C_COP1TRUE: 99 case p.opcode of 100 A_MOV_D: p.opcode:=A_MOVT_D; 101 A_MOV_S: p.opcode:=A_MOVT_S; 102 A_MOVE: p.opcode:=A_MOVT; 103 else 104 InternalError(2014061701); 105 end; 106 C_COP1FALSE: 107 case p.opcode of 108 A_MOV_D: p.opcode:=A_MOVF_D; 109 A_MOV_S: p.opcode:=A_MOVF_S; 110 A_MOVE: p.opcode:=A_MOVF; 111 else 112 InternalError(2014061702); 113 end; 114 C_EQ: 115 case p.opcode of 116 A_MOV_D: p.opcode:=A_MOVZ_D; 117 A_MOV_S: p.opcode:=A_MOVZ_S; 118 A_MOVE: p.opcode:=A_MOVZ; 119 else 120 InternalError(2014061703); 121 end; 122 C_NE: 123 case p.opcode of 124 A_MOV_D: p.opcode:=A_MOVN_D; 125 A_MOV_S: p.opcode:=A_MOVN_S; 126 A_MOVE: p.opcode:=A_MOVN; 127 else 128 InternalError(2014061704); 129 end; 130 else 131 InternalError(2014061705); 132 end; 133 p.ops:=3; 134 p.loadreg(2,reg); 135 end; 136 137 138 {$ifdef DEBUG_AOPTCPU} 139 procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai); 140 begin 141 asml.insertbefore(tai_comment.Create(strpnew(s)), p); 142 end; 143 {$else DEBUG_AOPTCPU} 144 procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline; 145 begin 146 end; 147 {$endif DEBUG_AOPTCPU} 148 149 TCpuAsmOptimizer.InstructionLoadsFromRegnull150 function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean; 151 var 152 p: taicpu; 153 i: longint; 154 begin 155 result:=false; 156 if not (assigned(hp) and (hp.typ=ait_instruction)) then 157 exit; 158 p:=taicpu(hp); 159 160 i:=0; 161 while(i<p.ops) do 162 begin 163 case p.oper[I]^.typ of 164 top_reg: 165 result:=(p.oper[I]^.reg=reg) and (p.spilling_get_operation_type(I)<>operand_write); 166 top_ref: 167 result:= 168 (p.oper[I]^.ref^.base=reg) or 169 (p.oper[I]^.ref^.index=reg); 170 end; 171 if result then exit; {Bailout if we found something} 172 Inc(I); 173 end; 174 end; 175 176 TCpuAsmOptimizer.RegLoadedWithNewValuenull177 function TCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean; 178 var 179 p: taicpu; 180 begin 181 p:=taicpu(hp); 182 result:=false; 183 if not ((assigned(hp)) and (hp.typ=ait_instruction)) then 184 exit; 185 186 case p.opcode of 187 { These instructions do not write into a register at all } 188 A_NOP, 189 A_C_EQ_D,A_C_EQ_S,A_C_LE_D,A_C_LE_S,A_C_LT_D,A_C_LT_S, 190 A_BA,A_BC, 191 A_SB,A_SH,A_SW,A_SWL,A_SWR,A_SWC1,A_SDC1: 192 exit; 193 end; 194 195 result:=(p.ops>0) and (p.oper[0]^.typ=top_reg) and 196 (p.oper[0]^.reg=reg); 197 end; 198 199 TCpuAsmOptimizer.RegModifiedByInstructionnull200 function TCpuAsmOptimizer.RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean; 201 var 202 i : Longint; 203 begin 204 result:=false; 205 for i:=0 to taicpu(p1).ops-1 do 206 if (taicpu(p1).oper[i]^.typ=top_reg) and (taicpu(p1).oper[i]^.reg=Reg) and (taicpu(p1).spilling_get_operation_type(i) in [operand_write,operand_readwrite]) then 207 begin 208 result:=true; 209 exit; 210 end; 211 end; 212 213 TCpuAsmOptimizer.GetNextInstructionUsingRegnull214 function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai; 215 var Next: tai; reg: TRegister): Boolean; 216 begin 217 Next:=Current; 218 repeat 219 Result:=GetNextInstruction(Next,Next); 220 until {not(cs_opt_level3 in current_settings.optimizerswitches) or} not(Result) or (Next.typ<>ait_instruction) or (RegInInstruction(reg,Next)) or 221 (is_calljmp(taicpu(Next).opcode)); 222 end; 223 224 TCpuAsmOptimizer.TryRemoveMovnull225 function TCpuAsmOptimizer.TryRemoveMov(var p: tai; opcode: TAsmOp): boolean; 226 var 227 next,hp1: tai; 228 alloc,dealloc: tai_regalloc; 229 begin 230 { Fold 231 op $reg1,... 232 opcode $reg2,$reg1 233 dealloc $reg1 234 into 235 op $reg2,... 236 opcode may be A_MOVE, A_MOV_s, A_MOV_d, etc. 237 } 238 result:=false; 239 if (taicpu(p).ops>0) and 240 GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and 241 MatchInstruction(next,opcode) and 242 MatchOperand(taicpu(next).oper[1]^,taicpu(p).oper[0]^.reg) and 243 { the destination register of mov cannot be used between p and next } 244 (not RegUsedBetween(taicpu(next).oper[0]^.reg,p,next)) then 245 246 begin 247 dealloc:=FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.Next)); 248 if assigned(dealloc) then 249 begin 250 { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation 251 and remove it if possible } 252 GetLastInstruction(p,hp1); 253 254 asml.Remove(dealloc); 255 alloc:=FindRegAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next)); 256 if assigned(alloc) then 257 begin 258 asml.Remove(alloc); 259 alloc.free; 260 dealloc.free; 261 end 262 else 263 asml.InsertAfter(dealloc,p); 264 265 { try to move the allocation of the target register } 266 GetLastInstruction(next,hp1); 267 alloc:=FindRegAlloc(taicpu(next).oper[0]^.reg,tai(hp1.Next)); 268 if assigned(alloc) then 269 begin 270 asml.Remove(alloc); 271 asml.InsertBefore(alloc,p); 272 { adjust used regs } 273 IncludeRegInUsedRegs(taicpu(next).oper[0]^.reg,UsedRegs); 274 end; 275 276 { finally get rid of the mov } 277 taicpu(p).loadreg(0,taicpu(next).oper[0]^.reg); 278 DebugMsg('Peephole: Move removed 1',next); 279 asml.remove(next); 280 next.free; 281 result:=true; 282 end 283 else // no dealloc found 284 begin 285 { try to optimize the typical call sequence 286 lw $reg, (whatever) 287 <alloc volatile registers (including $reg!!)> 288 move $t9,$reg 289 jalr $t9 290 291 if $reg is nonvolatile, its value may be used after call 292 and we cannot safely replace it with $t9 } 293 if (opcode=A_MOVE) and 294 (taicpu(next).oper[0]^.reg=NR_R25) and 295 GetNextInstruction(next,hp1) and 296 MatchInstruction(hp1,A_JALR) and 297 MatchOperand(taicpu(hp1).oper[0]^,NR_R25) and 298 assigned(FindRegAlloc(taicpu(p).oper[0]^.reg,tai(p.next))) then 299 begin 300 taicpu(p).loadreg(0,taicpu(next).oper[0]^.reg); 301 DebugMsg('Peephole: Move removed 2',next); 302 asml.remove(next); 303 next.free; 304 result:=true; 305 end; 306 end; 307 end; 308 end; 309 310 TCpuAsmOptimizer.TryRemoveMovBeforeStorenull311 function TCpuAsmOptimizer.TryRemoveMovBeforeStore(var p: tai; next: taicpu; const storeops: TAsmOpSet): boolean; 312 begin 313 result:=(next.opcode in storeops) and 314 MatchOperand(next.oper[0]^,taicpu(p).oper[0]^.reg) and 315 { Ry cannot be modified between move and store } 316 (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,next)) and 317 Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))); 318 if result then 319 begin 320 next.loadreg(0,taicpu(p).oper[1]^.reg); 321 DebugMsg('Peephole: Move removed 3',p); 322 asml.remove(p); 323 p.free; 324 p:=next; 325 end; 326 end; 327 328 TCpuAsmOptimizer.TryRemoveMovToRefIndexnull329 function TCpuAsmOptimizer.TryRemoveMovToRefIndex(var p: tai; next: taicpu): boolean; 330 begin 331 result:=(next.ops>1) and 332 (next.oper[1]^.typ=top_ref) and 333 (next.oper[1]^.ref^.refaddr<>addr_full) and 334 (next.oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and 335 (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,next)) and 336 Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))); 337 if result then 338 begin 339 next.oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg; 340 DebugMsg('Peephole: Move removed 4',p); 341 asml.remove(p); 342 p.free; 343 p:=next; 344 end; 345 end; 346 347 TCpuAsmOptimizer.PeepHoleOptPass1Cpunull348 function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean; 349 var 350 next,next2: tai; 351 begin 352 result:=false; 353 case p.typ of 354 ait_instruction: 355 begin 356 case taicpu(p).opcode of 357 A_BC: 358 begin 359 { BEQ/BNE with same register are bogus, but can be generated for code like 360 "if lo(qwordvar)=cardinal(qwordvar) ...", 361 optimizations below can also yield them, e.g. if one register was initially R0. } 362 if (taicpu(p).condition in [C_EQ,C_NE]) and 363 (taicpu(p).oper[0]^.reg=taicpu(p).oper[1]^.reg) then 364 begin 365 if (taicpu(p).condition=C_NE) then 366 begin 367 if (taicpu(p).oper[2]^.typ = top_ref) and 368 (taicpu(p).oper[2]^.ref^.symbol is TAsmLabel) then 369 TAsmLabel(taicpu(p).oper[2]^.ref^.symbol).decrefs; 370 RemoveDelaySlot(p); 371 GetNextInstruction(p,next); 372 end 373 else 374 begin 375 next:=taicpu.op_sym(A_BA,taicpu(p).oper[2]^.ref^.symbol); 376 taicpu(next).fileinfo:=taicpu(p).fileinfo; 377 asml.insertbefore(next,p); 378 end; 379 asml.remove(p); 380 p.Free; 381 p:=next; 382 result:=true; 383 end; 384 end; 385 386 A_SEH: 387 begin 388 if GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and 389 MatchInstruction(next,A_SH) and 390 MatchOperand(taicpu(next).oper[0]^,taicpu(p).oper[0]^.reg) and 391 (not RegUsedBetween(taicpu(p).oper[1]^.reg,p,next)) and 392 Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))) then 393 begin 394 taicpu(next).loadreg(0,taicpu(p).oper[1]^.reg); 395 asml.remove(p); 396 p.free; 397 p:=next; 398 result:=true; 399 end 400 else 401 result:=TryRemoveMov(p,A_MOVE); 402 end; 403 A_SEB: 404 { TODO: can be handled similar to A_SEH, but it's almost never encountered } 405 result:=TryRemoveMov(p,A_MOVE); 406 407 A_SLL: 408 begin 409 { if this is a sign extension... } 410 if (taicpu(p).oper[2]^.typ=top_const) and 411 GetNextInstruction(p,next) and 412 MatchInstruction(next,A_SRA) and 413 IsSameReg(taicpu(p),taicpu(next)) and 414 (taicpu(next).oper[2]^.typ=top_const) and 415 (taicpu(next).oper[2]^.val=taicpu(p).oper[2]^.val) and 416 (taicpu(next).oper[2]^.val=16) and 417 { ...followed by 16-bit store (possibly with PIC simplification, etc. in between) } 418 GetNextInstructionUsingReg(next,next2,taicpu(p).oper[0]^.reg) and 419 MatchInstruction(next2,A_SH) and 420 (taicpu(next2).oper[0]^.typ=top_reg) and 421 (taicpu(next2).oper[0]^.reg=taicpu(p).oper[0]^.reg) and 422 { the initial register may not be reused } 423 (not RegUsedBetween(taicpu(p).oper[1]^.reg,next,next2)) then 424 begin 425 if Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next2.next))) then 426 begin 427 taicpu(next2).loadreg(0,taicpu(p).oper[1]^.reg); 428 asml.remove(p); 429 asml.remove(next); 430 p.free; 431 next.free; 432 p:=next2; 433 result:=true; 434 end; 435 end 436 else 437 result:=TryRemoveMov(p,A_MOVE); 438 end; 439 440 A_SRL: 441 begin 442 { TODO: also kill sign-extensions that follow, both SLL+SRA and SEB/SEH versions } 443 { Remove 'andi' in sequences 444 srl Rx,Ry,16 445 andi Rx,Rx,65535 446 447 srl Rx,Ry,24 448 andi Rx,Rx,255 449 since 'srl' clears all relevant upper bits } 450 if (taicpu(p).oper[2]^.typ=top_const) and 451 GetNextInstruction(p,next) and 452 MatchInstruction(next,A_ANDI) and 453 IsSameReg(taicpu(p),taicpu(next)) and 454 (taicpu(next).oper[2]^.typ=top_const) and 455 (( 456 (taicpu(p).oper[2]^.val>=16) and 457 (taicpu(next).oper[2]^.val=65535) 458 ) or ( 459 (taicpu(p).oper[2]^.val>=24) and 460 (taicpu(next).oper[2]^.val=255) 461 )) then 462 begin 463 asml.remove(next); 464 next.free; 465 result:=true; 466 end 467 else 468 result:=TryRemoveMov(p,A_MOVE); 469 end; 470 471 A_ANDI: 472 begin 473 { Remove sign extension after 'andi' if bit 7/15 of const operand is clear } 474 if (taicpu(p).oper[2]^.typ=top_const) and 475 GetNextInstruction(p,next) and 476 MatchInstruction(next,A_SLL) and 477 GetNextInstruction(next,next2) and 478 MatchInstruction(next2,A_SRA) and 479 IsSameReg(taicpu(p),taicpu(next)) and 480 IsSameReg(taicpu(p),taicpu(next2)) and 481 (taicpu(next).oper[2]^.typ=top_const) and 482 (taicpu(next2).oper[2]^.typ=top_const) and 483 (taicpu(next).oper[2]^.val=taicpu(next2).oper[2]^.val) and 484 (( 485 (taicpu(p).oper[2]^.val<=$7fff) and 486 (taicpu(next).oper[2]^.val=16) 487 ) or ( 488 (taicpu(p).oper[2]^.val<=$7f) and 489 (taicpu(next).oper[2]^.val=24) 490 )) then 491 begin 492 asml.remove(next); 493 asml.remove(next2); 494 next.free; 495 next2.free; 496 result:=true; 497 end 498 { Remove zero extension if register is used only for byte/word memory store } 499 else if (taicpu(p).oper[2]^.typ=top_const) and 500 GetNextInstruction(p,next) and 501 ((taicpu(p).oper[2]^.val=255) and MatchInstruction(next,A_SB)) or 502 ((taicpu(p).oper[2]^.val=65535) and MatchInstruction(next,A_SH)) and 503 (taicpu(next).oper[0]^.typ=top_reg) and 504 (taicpu(next).oper[0]^.reg=taicpu(p).oper[0]^.reg) and 505 assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))) then 506 begin 507 taicpu(next).loadreg(0,taicpu(p).oper[1]^.reg); 508 asml.remove(p); 509 p.free; 510 p:=next; 511 result:=true; 512 end 513 else 514 result:=TryRemoveMov(p,A_MOVE); 515 end; 516 517 A_MOV_S: 518 begin 519 if GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and 520 (next.typ=ait_instruction) then 521 begin 522 if TryRemoveMovBeforeStore(p,taicpu(next),[A_SWC1]) then 523 result:=true; 524 end; 525 end; 526 527 A_MOV_D: 528 begin 529 if GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and 530 (next.typ=ait_instruction) then 531 begin 532 if TryRemoveMovBeforeStore(p,taicpu(next),[A_SDC1]) then 533 result:=true; 534 end; 535 end; 536 537 A_MOVE: 538 begin 539 if GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and 540 (next.typ=ait_instruction) and 541 (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,next)) then 542 begin 543 { MOVE Rx,Ry; store Rx,(ref); dealloc Rx ==> store Ry,(ref) } 544 if TryRemoveMovBeforeStore(p,taicpu(next),[A_SB,A_SH,A_SW]) then 545 result:=true 546 else if TryRemoveMovToRefIndex(p,taicpu(next)) then 547 result:=true 548 { MOVE Rx,Ry; opcode Rx,Rx,any ==> opcode Rx,Ry,any 549 MOVE Rx,Ry; opcode Rx,Rz,Rx ==> opcode Rx,Rz,Ry } 550 else if (taicpu(next).opcode in [A_ADD,A_ADDU,A_ADDI,A_ADDIU,A_SUB,A_SUBU,A_AND,A_ANDI,A_SLLV,A_SRLV,A_SRAV]) and 551 MatchOperand(taicpu(next).oper[0]^,taicpu(p).oper[0]^.reg) then 552 begin 553 if MatchOperand(taicpu(next).oper[1]^,taicpu(p).oper[0]^.reg) then 554 begin 555 taicpu(next).loadreg(1,taicpu(p).oper[1]^.reg); 556 asml.remove(p); 557 p.free; 558 p:=next; 559 result:=true; 560 end 561 { TODO: if Ry=NR_R0, this effectively changes instruction into MOVE, 562 providing further optimization possibilities } 563 else if MatchOperand(taicpu(next).oper[2]^,taicpu(p).oper[0]^.reg) then 564 begin 565 taicpu(next).loadreg(2,taicpu(p).oper[1]^.reg); 566 asml.remove(p); 567 p.free; 568 p:=next; 569 result:=true; 570 end; 571 end 572 { MOVE Rx,Ry; opcode Rz,Rx,any; dealloc Rx ==> opcode Rz,Ry,any } 573 else if (taicpu(next).opcode in [A_ADD,A_ADDU,A_ADDI,A_ADDIU,A_SUB,A_SUBU,A_SLT,A_SLTU,A_DIV,A_DIVU, 574 A_SLL,A_SRL,A_SRA,A_SLLV,A_SRLV,A_SRAV,A_AND,A_ANDI,A_OR,A_ORI,A_XOR,A_XORI]) and 575 Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))) then 576 begin 577 if MatchOperand(taicpu(next).oper[1]^,taicpu(p).oper[0]^.reg) then 578 begin 579 taicpu(next).loadreg(1,taicpu(p).oper[1]^.reg); 580 asml.remove(p); 581 p.free; 582 p:=next; 583 result:=true; 584 end 585 else if MatchOperand(taicpu(next).oper[2]^,taicpu(p).oper[0]^.reg) then 586 begin 587 taicpu(next).loadreg(2,taicpu(p).oper[1]^.reg); 588 asml.remove(p); 589 p.free; 590 p:=next; 591 result:=true; 592 end; 593 end 594 { MULT[U] and cond.branches must be handled separately due to different operand numbers } 595 else if (taicpu(next).opcode in [A_MULT,A_MULTU,A_BC]) and 596 Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))) then 597 begin 598 if MatchOperand(taicpu(next).oper[0]^,taicpu(p).oper[0]^.reg) then 599 begin 600 taicpu(next).loadreg(0,taicpu(p).oper[1]^.reg); 601 if MatchOperand(taicpu(next).oper[1]^,taicpu(p).oper[0]^.reg) then 602 taicpu(next).loadreg(1,taicpu(p).oper[1]^.reg); 603 asml.remove(p); 604 p.free; 605 p:=next; 606 result:=true; 607 end 608 else if MatchOperand(taicpu(next).oper[1]^,taicpu(p).oper[0]^.reg) then 609 begin 610 taicpu(next).loadreg(1,taicpu(p).oper[1]^.reg); 611 if MatchOperand(taicpu(next).oper[0]^,taicpu(p).oper[0]^.reg) then 612 taicpu(next).loadreg(0,taicpu(p).oper[1]^.reg); 613 asml.remove(p); 614 p.free; 615 p:=next; 616 result:=true; 617 end; 618 end 619 else if TryRemoveMov(p,A_MOVE) then 620 begin 621 { Ended up with move between same register? Suicide then. } 622 if (taicpu(p).oper[0]^.reg=taicpu(p).oper[1]^.reg) then 623 begin 624 GetNextInstruction(p,next); 625 asml.remove(p); 626 p.free; 627 p:=next; 628 result:=true; 629 end; 630 end; 631 end; 632 end; 633 634 A_ADDIU: 635 begin 636 { ADDIU Rx,Ry,const; load/store Rz,(Rx); dealloc Rx ==> load/store Rz,const(Ry) 637 ADDIU Rx,Ry,%lo(sym); load/store Rz,(Rx); dealloc Rx ==> load/store Rz,%lo(sym)(Ry) 638 ADDIU Rx,Ry,const; load Rx,(Rx) ==> load Rx,const(Ry) 639 ADDIU Rx,Ry,%lo(sym); load Rx,(Rx) ==> load Rx,%lo(sym)(Ry) } 640 if GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and 641 (next.typ=ait_instruction) and 642 (taicpu(next).opcode in [A_LB,A_LBU,A_LH,A_LHU,A_LW,A_SB,A_SH,A_SW]) and 643 (taicpu(p).oper[0]^.reg=taicpu(next).oper[1]^.ref^.base) and 644 (taicpu(next).oper[1]^.ref^.offset=0) and 645 (taicpu(next).oper[1]^.ref^.symbol=nil) and 646 ( 647 Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))) or 648 ( 649 (taicpu(p).oper[0]^.reg=taicpu(next).oper[0]^.reg) and 650 (taicpu(next).opcode in [A_LB,A_LBU,A_LH,A_LHU,A_LW]) 651 ) 652 ) and 653 (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,next)) then 654 begin 655 case taicpu(p).oper[2]^.typ of 656 top_const: 657 taicpu(next).oper[1]^.ref^.offset:=taicpu(p).oper[2]^.val; 658 659 top_ref: 660 taicpu(next).oper[1]^.ref^:=taicpu(p).oper[2]^.ref^; 661 else 662 InternalError(2014100401); 663 end; 664 taicpu(next).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg; 665 asml.remove(p); 666 p.free; 667 p:=next; 668 result:=true; 669 end 670 else 671 result:=TryRemoveMov(p,A_MOVE); 672 end; 673 674 A_ADD,A_ADDU,A_OR: 675 begin 676 if MatchOperand(taicpu(p).oper[1]^,NR_R0) then 677 begin 678 taicpu(p).freeop(1); 679 taicpu(p).oper[1]:=taicpu(p).oper[2]; 680 taicpu(p).oper[2]:=nil; 681 taicpu(p).ops:=2; 682 taicpu(p).opercnt:=2; 683 taicpu(p).opcode:=A_MOVE; 684 result:=true; 685 end 686 else if MatchOperand(taicpu(p).oper[2]^,NR_R0) then 687 begin 688 taicpu(p).freeop(2); 689 taicpu(p).ops:=2; 690 taicpu(p).opercnt:=2; 691 taicpu(p).opcode:=A_MOVE; 692 result:=true; 693 end 694 else 695 result:=TryRemoveMov(p,A_MOVE); 696 end; 697 698 A_LB,A_LBU,A_LH,A_LHU,A_LW, 699 A_ADDI, 700 A_SUB,A_SUBU, 701 A_SRA,A_SRAV, 702 A_SRLV, 703 A_SLLV, 704 A_MFLO,A_MFHI, 705 A_AND,A_XOR,A_ORI,A_XORI: 706 result:=TryRemoveMov(p,A_MOVE); 707 708 A_LWC1, 709 A_ADD_s, A_SUB_s, A_MUL_s, A_DIV_s, 710 A_ABS_s, A_NEG_s, A_SQRT_s, 711 A_CVT_s_w, A_CVT_s_l, A_CVT_s_d: 712 result:=TryRemoveMov(p,A_MOV_s); 713 714 A_LDC1, 715 A_ADD_d, A_SUB_d, A_MUL_d, A_DIV_d, 716 A_ABS_d, A_NEG_d, A_SQRT_d, 717 A_CVT_d_w, A_CVT_d_l, A_CVT_d_s: 718 result:=TryRemoveMov(p,A_MOV_d); 719 end; 720 end; 721 end; 722 end; 723 724 725 procedure TCpuAsmOptimizer.PeepHoleOptPass2; 726 var 727 p: tai; 728 l: longint; 729 hp1,hp2,hp3: tai; 730 condition: tasmcond; 731 condreg: tregister; 732 begin 733 { Currently, everything below is mips4+ } 734 if (current_settings.cputype<cpu_mips4) then 735 exit; 736 p:=BlockStart; 737 ClearUsedRegs; 738 while (p<>BlockEnd) Do 739 begin 740 UpdateUsedRegs(tai(p.next)); 741 case p.typ of 742 ait_instruction: 743 begin 744 case taicpu(p).opcode of 745 A_BC: 746 begin 747 condreg:=NR_NO; 748 if (taicpu(p).condition in [C_COP1TRUE,C_COP1FALSE]) then 749 { TODO: must be taken from "p" if/when codegen makes use of multiple %fcc } 750 condreg:=NR_FCC0 751 else if (taicpu(p).condition in [C_EQ,C_NE]) then 752 begin 753 if (taicpu(p).oper[0]^.reg=NR_R0) then 754 condreg:=taicpu(p).oper[1]^.reg 755 else if (taicpu(p).oper[1]^.reg=NR_R0) then 756 condreg:=taicpu(p).oper[0]^.reg 757 end; 758 759 if (condreg<>NR_NO) then 760 begin 761 { check for 762 bCC xxx 763 <several movs> 764 xxx: 765 } 766 l:=0; 767 GetNextInstruction(p, hp1); 768 while CanBeCMOV(hp1,condreg) do // CanBeCMOV returns False for nil or labels 769 begin 770 inc(l); 771 GetNextInstruction(hp1,hp1); 772 end; 773 if assigned(hp1) then 774 begin 775 if FindLabel(tasmlabel(taicpu(p).oper[taicpu(p).ops-1]^.ref^.symbol),hp1) then 776 begin 777 if (l<=4) and (l>0) then 778 begin 779 condition:=inverse_cond(taicpu(p).condition); 780 hp2:=p; 781 GetNextInstruction(p,hp1); 782 p:=hp1; 783 repeat 784 ChangeToCMOV(taicpu(hp1),condition,condreg); 785 GetNextInstruction(hp1,hp1); 786 until not CanBeCMOV(hp1,condreg); 787 { wait with removing else GetNextInstruction could 788 ignore the label if it was the only usage in the 789 jump moved away } 790 tasmlabel(taicpu(hp2).oper[taicpu(hp2).ops-1]^.ref^.symbol).decrefs; 791 RemoveDelaySlot(hp2); 792 asml.remove(hp2); 793 hp2.free; 794 continue; 795 end; 796 end 797 else 798 begin 799 { check further for 800 bCC xxx 801 <several movs 1> 802 b yyy 803 xxx: 804 <several movs 2> 805 yyy: 806 } 807 { hp2 points to jmp yyy } 808 hp2:=hp1; 809 { skip hp1 to xxx } 810 GetNextInstruction(hp1, hp1); 811 if assigned(hp2) and 812 assigned(hp1) and 813 (l<=3) and 814 (hp2.typ=ait_instruction) and 815 (taicpu(hp2).opcode=A_BA) and 816 { real label and jump, no further references to the 817 label are allowed } 818 (tasmlabel(taicpu(p).oper[taicpu(p).ops-1]^.ref^.symbol).getrefs<=2) and 819 FindLabel(tasmlabel(taicpu(p).oper[taicpu(p).ops-1]^.ref^.symbol),hp1) then 820 begin 821 l:=0; 822 { skip hp1 to <several moves 2> } 823 GetNextInstruction(hp1, hp1); 824 while CanBeCMOV(hp1,condreg) do 825 begin 826 inc(l); 827 GetNextInstruction(hp1, hp1); 828 end; 829 { hp1 points to yyy: } 830 if assigned(hp1) and (l<=3) and 831 FindLabel(tasmlabel(taicpu(hp2).oper[taicpu(hp2).ops-1]^.ref^.symbol),hp1) then 832 begin 833 condition:=inverse_cond(taicpu(p).condition); 834 GetNextInstruction(p,hp1); 835 hp3:=p; 836 p:=hp1; 837 while CanBeCMOV(hp1,condreg) do 838 begin 839 ChangeToCMOV(taicpu(hp1),condition,condreg); 840 GetNextInstruction(hp1,hp1); 841 end; 842 { hp2 is still at b yyy } 843 GetNextInstruction(hp2,hp1); 844 { hp2 is now at xxx: } 845 condition:=inverse_cond(condition); 846 GetNextInstruction(hp1,hp1); 847 { hp1 is now at <several movs 2> } 848 while CanBeCMOV(hp1,condreg) do 849 begin 850 ChangeToCMOV(taicpu(hp1),condition,condreg); 851 GetNextInstruction(hp1,hp1); 852 end; 853 { remove bCC } 854 tasmlabel(taicpu(hp3).oper[taicpu(hp3).ops-1]^.ref^.symbol).decrefs; 855 RemoveDelaySlot(hp3); 856 asml.remove(hp3); 857 hp3.free; 858 { remove jmp } 859 tasmlabel(taicpu(hp2).oper[taicpu(hp2).ops-1]^.ref^.symbol).decrefs; 860 RemoveDelaySlot(hp2); 861 asml.remove(hp2); 862 hp2.free; 863 continue; 864 end; 865 end; 866 end; 867 end; 868 end; 869 end; 870 end; 871 end; 872 end; 873 UpdateUsedRegs(p); 874 p:=tai(p.next); 875 end; 876 end; 877 878 begin 879 casmoptimizer:=TCpuAsmOptimizer; 880 end. 881