1 { 2 Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal 3 Development Team 4 5 This unit implements the ARM optimizer object 6 7 This program is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 2 of the License, or 10 (at your option) any later version. 11 12 This program is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with this program; if not, write to the Free Software 19 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 20 21 **************************************************************************** 22 } 23 24 25 Unit aoptcpu; 26 27 {$i fpcdefs.inc} 28 29 { $define DEBUG_AOPTCPU} 30 31 Interface 32 33 uses cpubase,cgbase,aasmtai,aopt,AoptObj,aoptcpub; 34 35 Type 36 TCpuAsmOptimizer = class(TAsmOptimizer) 37 { outputs a debug message into the assembler file } 38 procedure DebugMsg(const s: string; p: tai); 39 GetNextInstructionUsingRegnull40 Function GetNextInstructionUsingReg(Current: tai; Var Next: tai;reg : TRegister): Boolean; RegInInstructionnull41 function RegInInstruction(Reg: TRegister; p1: tai): Boolean; override; RegLoadedWithNewValuenull42 function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override; InstructionLoadsFromRegnull43 function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override; 44 45 { uses the same constructor as TAopObj } PeepHoleOptPass1Cpunull46 function PeepHoleOptPass1Cpu(var p: tai): boolean; override; 47 procedure PeepHoleOptPass2;override; 48 End; 49 50 Implementation 51 52 uses 53 cutils, 54 verbose, 55 cpuinfo, 56 aasmbase,aasmcpu,aasmdata, 57 aoptutils, 58 globals,globtype, 59 cgutils; 60 61 type 62 TAsmOpSet = set of TAsmOp; 63 CanBeCondnull64 function CanBeCond(p : tai) : boolean; 65 begin 66 result:=(p.typ=ait_instruction) and (taicpu(p).condition=C_None); 67 end; 68 69 RefsEqualnull70 function RefsEqual(const r1, r2: treference): boolean; 71 begin 72 refsequal := 73 (r1.offset = r2.offset) and 74 (r1.base = r2.base) and 75 (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and 76 (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and 77 (r1.relsymbol = r2.relsymbol) and 78 (r1.addressmode = r2.addressmode); 79 end; 80 81 MatchOperandnull82 function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline; 83 begin 84 result:=oper1.typ=oper2.typ; 85 86 if result then 87 case oper1.typ of 88 top_const: 89 Result:=oper1.val = oper2.val; 90 top_reg: 91 Result:=oper1.reg = oper2.reg; 92 top_ref: 93 Result:=RefsEqual(oper1.ref^, oper2.ref^); 94 else Result:=false; 95 end 96 end; 97 98 MatchOperandnull99 function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline; 100 begin 101 result := (oper.typ = top_reg) and (oper.reg = reg); 102 end; 103 104 MatchInstructionnull105 function MatchInstruction(const instr: tai; const op: TAsmOp): boolean; 106 begin 107 result := 108 (instr.typ = ait_instruction) and 109 (taicpu(instr).opcode = op); 110 end; 111 112 MatchInstructionnull113 function MatchInstruction(const instr: tai; const ops: TAsmOpSet): boolean; 114 begin 115 result := 116 (instr.typ = ait_instruction) and 117 (taicpu(instr).opcode in ops); 118 end; 119 120 MatchInstructionnull121 function MatchInstruction(const instr: tai; const ops: TAsmOpSet;opcount : byte): boolean; 122 begin 123 result := 124 (instr.typ = ait_instruction) and 125 (taicpu(instr).opcode in ops) and 126 (taicpu(instr).ops=opcount); 127 end; 128 129 130 {$ifdef DEBUG_AOPTCPU} 131 procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai); 132 begin 133 asml.insertbefore(tai_comment.Create(strpnew(s)), p); 134 end; 135 {$else DEBUG_AOPTCPU} 136 procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline; 137 begin 138 end; 139 {$endif DEBUG_AOPTCPU} 140 141 TCpuAsmOptimizer.RegInInstructionnull142 function TCpuAsmOptimizer.RegInInstruction(Reg: TRegister; p1: tai): Boolean; 143 begin 144 If (p1.typ = ait_instruction) and (taicpu(p1).opcode in [A_MUL,A_MULS,A_FMUL,A_FMULS,A_FMULSU]) and 145 ((getsupreg(reg)=RS_R0) or (getsupreg(reg)=RS_R1)) then 146 Result:=true 147 else if (p1.typ = ait_instruction) and (taicpu(p1).opcode=A_MOVW) and 148 ((TRegister(ord(taicpu(p1).oper[0]^.reg)+1)=reg) or (TRegister(ord(taicpu(p1).oper[1]^.reg)+1)=reg) or 149 (taicpu(p1).oper[0]^.reg=reg) or (taicpu(p1).oper[1]^.reg=reg)) then 150 Result:=true 151 else 152 Result:=inherited RegInInstruction(Reg, p1); 153 end; 154 155 TCpuAsmOptimizer.GetNextInstructionUsingRegnull156 function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai; 157 var Next: tai; reg: TRegister): Boolean; 158 begin 159 Next:=Current; 160 repeat 161 Result:=GetNextInstruction(Next,Next); 162 until not(cs_opt_level3 in current_settings.optimizerswitches) or not(Result) or (Next.typ<>ait_instruction) or (RegInInstruction(reg,Next)) or 163 (is_calljmp(taicpu(Next).opcode)); 164 end; 165 166 TCpuAsmOptimizer.RegLoadedWithNewValuenull167 function TCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean; 168 var 169 p: taicpu; 170 begin 171 if not assigned(hp) or 172 (hp.typ <> ait_instruction) then 173 begin 174 Result := false; 175 exit; 176 end; 177 p := taicpu(hp); 178 Result := ((p.opcode in [A_LDI,A_MOV,A_LDS]) and (reg=p.oper[0]^.reg) and ((p.oper[1]^.typ<>top_reg) or (reg<>p.oper[0]^.reg))) or 179 ((p.opcode in [A_LD,A_LDD,A_LPM]) and (reg=p.oper[0]^.reg) and not(RegInRef(reg,p.oper[1]^.ref^))) or 180 ((p.opcode in [A_MOVW]) and ((reg=p.oper[0]^.reg) or (TRegister(ord(reg)+1)=p.oper[0]^.reg)) and not(reg=p.oper[1]^.reg) and not(TRegister(ord(reg)+1)=p.oper[1]^.reg)) or 181 ((p.opcode in [A_POP]) and (reg=p.oper[0]^.reg)); 182 end; 183 184 TCpuAsmOptimizer.InstructionLoadsFromRegnull185 function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean; 186 var 187 p: taicpu; 188 i: longint; 189 begin 190 Result := false; 191 192 if not (assigned(hp) and (hp.typ = ait_instruction)) then 193 exit; 194 p:=taicpu(hp); 195 196 i:=0; 197 198 { we do not care about the stack pointer } 199 if p.opcode in [A_POP] then 200 exit; 201 202 { first operand only written? 203 then skip it } 204 if p.opcode in [A_MOV,A_LD,A_LDD,A_LDS,A_LPM,A_LDI,A_MOVW] then 205 i:=1; 206 207 while i<p.ops do 208 begin 209 case p.oper[i]^.typ of 210 top_reg: 211 Result := (p.oper[i]^.reg = reg) or 212 { MOVW } 213 ((i=1) and (p.opcode=A_MOVW) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg))); 214 top_ref: 215 Result := 216 (p.oper[i]^.ref^.base = reg) or 217 (p.oper[i]^.ref^.index = reg); 218 end; 219 { Bailout if we found something } 220 if Result then 221 exit; 222 Inc(i); 223 end; 224 end; 225 TCpuAsmOptimizer.PeepHoleOptPass1Cpunull226 function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean; 227 var 228 hp1,hp2,hp3,hp4,hp5: tai; 229 alloc, dealloc: tai_regalloc; 230 i: integer; 231 l: TAsmLabel; 232 TmpUsedRegs : TAllUsedRegs; 233 begin 234 result := false; 235 case p.typ of 236 ait_instruction: 237 begin 238 { 239 change 240 <op> reg,x,y 241 cp reg,r1 242 into 243 <op>s reg,x,y 244 } 245 { this optimization can applied only to the currently enabled operations because 246 the other operations do not update all flags and FPC does not track flag usage } 247 if MatchInstruction(p, [A_ADC,A_ADD,A_AND,A_ANDI,A_ASR,A_COM,A_DEC,A_EOR, 248 A_INC,A_LSL,A_LSR, 249 A_OR,A_ORI,A_ROL,A_ROR,A_SBC,A_SBCI,A_SUB,A_SUBI]) and 250 GetNextInstruction(p, hp1) and 251 ((MatchInstruction(hp1, A_CP) and 252 (((taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and 253 (taicpu(hp1).oper[1]^.reg = NR_R1)) or 254 ((taicpu(p).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) and 255 (taicpu(hp1).oper[0]^.reg = NR_R1) and 256 (taicpu(p).opcode in [A_ADC,A_ADD,A_AND,A_ANDI,A_ASR,A_COM,A_EOR, 257 A_LSL,A_LSR, 258 A_OR,A_ORI,A_ROL,A_ROR,A_SUB,A_SBI])))) or 259 (MatchInstruction(hp1, A_CPI) and 260 (taicpu(p).opcode = A_ANDI) and 261 (taicpu(p).oper[1]^.typ=top_const) and 262 (taicpu(hp1).oper[1]^.typ=top_const) and 263 (taicpu(p).oper[1]^.val=taicpu(hp1).oper[1]^.val))) and 264 GetNextInstruction(hp1, hp2) and 265 { be careful here, following instructions could use other flags 266 however after a jump fpc never depends on the value of flags } 267 { All above instructions set Z and N according to the following 268 Z := result = 0; 269 N := result[31]; 270 EQ = Z=1; NE = Z=0; 271 MI = N=1; PL = N=0; } 272 MatchInstruction(hp2, A_BRxx) and 273 ((taicpu(hp2).condition in [C_EQ,C_NE,C_MI,C_PL]) or 274 { sub/sbc set all flags } 275 (taicpu(p).opcode in [A_SUB,A_SBI])){ and 276 no flag allocation tracking implemented yet on avr 277 assigned(FindRegDealloc(NR_DEFAULTFLAGS,tai(hp2.Next)))} then 278 begin 279 { move flag allocation if possible } 280 { no flag allocation tracking implemented yet on avr 281 GetLastInstruction(hp1, hp2); 282 hp2:=FindRegAlloc(NR_DEFAULTFLAGS,tai(hp2.Next)); 283 if assigned(hp2) then 284 begin 285 asml.Remove(hp2); 286 asml.insertbefore(hp2, p); 287 end; 288 } 289 290 // If we compare to the same value we are masking then invert the comparison 291 if (taicpu(hp1).opcode=A_CPI) or 292 { sub/sbc with reverted? } 293 ((taicpu(hp1).oper[0]^.reg = NR_R1) and (taicpu(p).opcode in [A_SUB,A_SBI])) then 294 taicpu(hp2).condition:=inverse_cond(taicpu(hp2).condition); 295 296 asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p); 297 asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,hp2), hp2); 298 IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs); 299 300 DebugMsg('Peephole OpCp2Op performed', p); 301 302 asml.remove(hp1); 303 hp1.free; 304 Result:=true; 305 end 306 else 307 case taicpu(p).opcode of 308 A_LDI: 309 begin 310 { turn 311 ldi reg0, imm 312 cp/mov reg1, reg0 313 dealloc reg0 314 into 315 cpi/ldi reg1, imm 316 } 317 if MatchOpType(taicpu(p),top_reg,top_const) and 318 GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and 319 MatchInstruction(hp1,[A_CP,A_MOV],2) and 320 (not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and 321 MatchOpType(taicpu(hp1),top_reg,top_reg) and 322 (getsupreg(taicpu(hp1).oper[0]^.reg) in [16..31]) and 323 (taicpu(hp1).oper[1]^.reg=taicpu(p).oper[0]^.reg) and 324 not(MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp1).oper[1]^)) then 325 begin 326 CopyUsedRegs(TmpUsedRegs); 327 if not(RegUsedAfterInstruction(taicpu(hp1).oper[1]^.reg, hp1, TmpUsedRegs)) then 328 begin 329 case taicpu(hp1).opcode of 330 A_CP: 331 taicpu(hp1).opcode:=A_CPI; 332 A_MOV: 333 taicpu(hp1).opcode:=A_LDI; 334 else 335 internalerror(2016111901); 336 end; 337 taicpu(hp1).loadconst(1, taicpu(p).oper[1]^.val); 338 339 alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous)); 340 dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next)); 341 342 if assigned(alloc) and assigned(dealloc) then 343 begin 344 asml.Remove(alloc); 345 alloc.Free; 346 asml.Remove(dealloc); 347 dealloc.Free; 348 end; 349 350 DebugMsg('Peephole LdiMov/Cp2Ldi/Cpi performed', p); 351 352 RemoveCurrentP(p); 353 end; 354 ReleaseUsedRegs(TmpUsedRegs); 355 end; 356 end; 357 A_STS: 358 if (taicpu(p).oper[0]^.ref^.symbol=nil) and 359 (taicpu(p).oper[0]^.ref^.relsymbol=nil) and 360 (getsupreg(taicpu(p).oper[0]^.ref^.base)=RS_NO) and 361 (getsupreg(taicpu(p).oper[0]^.ref^.index)=RS_NO) and 362 (taicpu(p).oper[0]^.ref^.addressmode=AM_UNCHANGED) and 363 (taicpu(p).oper[0]^.ref^.offset>=32) and 364 (taicpu(p).oper[0]^.ref^.offset<=95) then 365 begin 366 DebugMsg('Peephole Sts2Out performed', p); 367 368 taicpu(p).opcode:=A_OUT; 369 taicpu(p).loadconst(0,taicpu(p).oper[0]^.ref^.offset-32); 370 end; 371 A_LDS: 372 if (taicpu(p).oper[1]^.ref^.symbol=nil) and 373 (taicpu(p).oper[1]^.ref^.relsymbol=nil) and 374 (getsupreg(taicpu(p).oper[1]^.ref^.base)=RS_NO) and 375 (getsupreg(taicpu(p).oper[1]^.ref^.index)=RS_NO) and 376 (taicpu(p).oper[1]^.ref^.addressmode=AM_UNCHANGED) and 377 (taicpu(p).oper[1]^.ref^.offset>=32) and 378 (taicpu(p).oper[1]^.ref^.offset<=95) then 379 begin 380 DebugMsg('Peephole Lds2In performed', p); 381 382 taicpu(p).opcode:=A_IN; 383 taicpu(p).loadconst(1,taicpu(p).oper[1]^.ref^.offset-32); 384 end; 385 A_IN: 386 if GetNextInstruction(p,hp1) then 387 begin 388 { 389 in rX,Y 390 ori rX,n 391 out Y,rX 392 393 into 394 sbi rX,lg(n) 395 } 396 if (taicpu(p).oper[1]^.val<=31) and 397 MatchInstruction(hp1,A_ORI) and 398 (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and 399 (PopCnt(byte(taicpu(hp1).oper[1]^.val))=1) and 400 GetNextInstruction(hp1,hp2) and 401 MatchInstruction(hp2,A_OUT) and 402 MatchOperand(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and 403 MatchOperand(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) then 404 begin 405 DebugMsg('Peephole InOriOut2Sbi performed', p); 406 407 taicpu(p).opcode:=A_SBI; 408 taicpu(p).loadconst(0,taicpu(p).oper[1]^.val); 409 taicpu(p).loadconst(1,BsrByte(taicpu(hp1).oper[1]^.val)); 410 asml.Remove(hp1); 411 hp1.Free; 412 asml.Remove(hp2); 413 hp2.Free; 414 result:=true; 415 end 416 { 417 in rX,Y 418 andi rX,not(n) 419 out Y,rX 420 421 into 422 cbi rX,lg(n) 423 } 424 else if (taicpu(p).oper[1]^.val<=31) and 425 MatchInstruction(hp1,A_ANDI) and 426 (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and 427 (PopCnt(byte(not(taicpu(hp1).oper[1]^.val)))=1) and 428 GetNextInstruction(hp1,hp2) and 429 MatchInstruction(hp2,A_OUT) and 430 MatchOperand(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and 431 MatchOperand(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) then 432 begin 433 DebugMsg('Peephole InAndiOut2Cbi performed', p); 434 435 taicpu(p).opcode:=A_CBI; 436 taicpu(p).loadconst(0,taicpu(p).oper[1]^.val); 437 taicpu(p).loadconst(1,BsrByte(not(taicpu(hp1).oper[1]^.val))); 438 asml.Remove(hp1); 439 hp1.Free; 440 asml.Remove(hp2); 441 hp2.Free; 442 result:=true; 443 end 444 { 445 in rX,Y 446 andi rX,n 447 breq/brne L1 448 449 into 450 sbis/sbic Y,lg(n) 451 jmp L1 452 .Ltemp: 453 } 454 else if (taicpu(p).oper[1]^.val<=31) and 455 MatchInstruction(hp1,A_ANDI) and 456 (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and 457 (PopCnt(byte(taicpu(hp1).oper[1]^.val))=1) and 458 GetNextInstruction(hp1,hp2) and 459 MatchInstruction(hp2,A_BRxx) and 460 (taicpu(hp2).condition in [C_EQ,C_NE]) then 461 begin 462 if taicpu(hp2).condition=C_EQ then 463 taicpu(p).opcode:=A_SBIS 464 else 465 taicpu(p).opcode:=A_SBIC; 466 467 DebugMsg('Peephole InAndiBrx2SbixJmp performed', p); 468 469 taicpu(p).loadconst(0,taicpu(p).oper[1]^.val); 470 taicpu(p).loadconst(1,BsrByte(taicpu(hp1).oper[1]^.val)); 471 asml.Remove(hp1); 472 hp1.Free; 473 474 taicpu(hp2).condition:=C_None; 475 if CPUAVR_HAS_JMP_CALL in cpu_capabilities[current_settings.cputype] then 476 taicpu(hp2).opcode:=A_JMP 477 else 478 taicpu(hp2).opcode:=A_RJMP; 479 480 current_asmdata.getjumplabel(l); 481 l.increfs; 482 asml.InsertAfter(tai_label.create(l), hp2); 483 484 result:=true; 485 end; 486 end; 487 A_ANDI: 488 begin 489 { 490 Turn 491 andi rx, #pow2 492 brne l 493 <op> 494 l: 495 Into 496 sbrs rx, #(1 shl imm) 497 <op> 498 l: 499 } 500 if (taicpu(p).ops=2) and 501 (taicpu(p).oper[1]^.typ=top_const) and 502 ispowerof2(taicpu(p).oper[1]^.val,i) and 503 assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(p.next))) and 504 GetNextInstruction(p,hp1) and 505 (hp1.typ=ait_instruction) and 506 (taicpu(hp1).opcode=A_BRxx) and 507 (taicpu(hp1).condition in [C_EQ,C_NE]) and 508 (taicpu(hp1).ops>0) and 509 (taicpu(hp1).oper[0]^.typ = top_ref) and 510 (taicpu(hp1).oper[0]^.ref^.symbol is TAsmLabel) and 511 GetNextInstruction(hp1,hp2) and 512 (hp2.typ=ait_instruction) and 513 GetNextInstruction(hp2,hp3) and 514 (hp3.typ=ait_label) and 515 (taicpu(hp1).oper[0]^.ref^.symbol=tai_label(hp3).labsym) then 516 begin 517 DebugMsg('Peephole AndiBr2Sbr performed', p); 518 519 taicpu(p).oper[1]^.val:=i; 520 521 if taicpu(hp1).condition=C_NE then 522 taicpu(p).opcode:=A_SBRS 523 else 524 taicpu(p).opcode:=A_SBRC; 525 526 asml.Remove(hp1); 527 hp1.free; 528 529 result:=true; 530 end 531 { 532 Remove 533 andi rx, #y 534 dealloc rx 535 } 536 else if (taicpu(p).ops=2) and 537 (taicpu(p).oper[0]^.typ=top_reg) and 538 assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(p.next))) and 539 (assigned(FindRegDeAlloc(NR_DEFAULTFLAGS,tai(p.Next))) or 540 (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs))) then 541 begin 542 DebugMsg('Redundant Andi removed', p); 543 544 result:=RemoveCurrentP(p); 545 end; 546 end; 547 A_ADD: 548 begin 549 if (taicpu(p).oper[1]^.reg=NR_R1) and 550 GetNextInstruction(p, hp1) and 551 MatchInstruction(hp1,A_ADC) then 552 begin 553 DebugMsg('Peephole AddAdc2Add performed', p); 554 555 result:=RemoveCurrentP(p); 556 end; 557 end; 558 A_SUB: 559 begin 560 if (taicpu(p).oper[1]^.reg=NR_R1) and 561 GetNextInstruction(p, hp1) and 562 MatchInstruction(hp1,A_SBC) then 563 begin 564 DebugMsg('Peephole SubSbc2Sub performed', p); 565 566 taicpu(hp1).opcode:=A_SUB; 567 568 result:=RemoveCurrentP(p); 569 end; 570 end; 571 A_CLR: 572 begin 573 { turn the common 574 clr rX 575 mov/ld rX, rY 576 into 577 mov/ld rX, rY 578 } 579 if (taicpu(p).ops=1) and 580 (taicpu(p).oper[0]^.typ=top_reg) and 581 GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and 582 (not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and 583 (hp1.typ=ait_instruction) and 584 (taicpu(hp1).opcode in [A_MOV,A_LD]) and 585 (taicpu(hp1).ops>0) and 586 (taicpu(hp1).oper[0]^.typ=top_reg) and 587 (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) then 588 begin 589 DebugMsg('Peephole ClrMov2Mov performed', p); 590 591 result:=RemoveCurrentP(p); 592 end 593 { turn 594 clr rX 595 ... 596 adc rY, rX 597 into 598 ... 599 adc rY, r1 600 } 601 else if (taicpu(p).ops=1) and 602 (taicpu(p).oper[0]^.typ=top_reg) and 603 GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and 604 (not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and 605 (hp1.typ=ait_instruction) and 606 (taicpu(hp1).opcode in [A_ADC,A_SBC]) and 607 (taicpu(hp1).ops=2) and 608 (taicpu(hp1).oper[1]^.typ=top_reg) and 609 (taicpu(hp1).oper[1]^.reg=taicpu(p).oper[0]^.reg) and 610 (taicpu(hp1).oper[0]^.reg<>taicpu(p).oper[0]^.reg) and 611 assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then 612 begin 613 DebugMsg('Peephole ClrAdc2Adc performed', p); 614 615 taicpu(hp1).oper[1]^.reg:=NR_R1; 616 617 alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous)); 618 dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next)); 619 620 if assigned(alloc) and assigned(dealloc) then 621 begin 622 asml.Remove(alloc); 623 alloc.Free; 624 asml.Remove(dealloc); 625 dealloc.Free; 626 end; 627 628 result:=RemoveCurrentP(p); 629 end; 630 end; 631 A_PUSH: 632 begin 633 { turn 634 push reg0 635 push reg1 636 pop reg3 637 pop reg2 638 639 into 640 641 movw reg2,reg0 642 643 or 644 645 mov reg3,reg1 646 mov reg2,reg0 647 648 } 649 if GetNextInstruction(p,hp1) and 650 MatchInstruction(hp1,A_PUSH) and 651 652 GetNextInstruction(hp1,hp2) and 653 MatchInstruction(hp2,A_POP) and 654 655 GetNextInstruction(hp2,hp3) and 656 MatchInstruction(hp3,A_POP) then 657 begin 658 if (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(p).oper[0]^.reg)+1) and 659 ((getsupreg(taicpu(p).oper[0]^.reg) mod 2)=0) and 660 (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp3).oper[0]^.reg)+1) and 661 ((getsupreg(taicpu(hp3).oper[0]^.reg) mod 2)=0) then 662 begin 663 DebugMsg('Peephole PushPushPopPop2Movw performed', p); 664 665 taicpu(hp3).ops:=2; 666 taicpu(hp3).opcode:=A_MOVW; 667 668 taicpu(hp3).loadreg(1, taicpu(p).oper[0]^.reg); 669 670 RemoveCurrentP(p); 671 RemoveCurrentP(p); 672 result:=RemoveCurrentP(p); 673 end 674 else 675 begin 676 DebugMsg('Peephole PushPushPopPop2MovMov performed', p); 677 678 taicpu(p).ops:=2; 679 taicpu(p).opcode:=A_MOV; 680 681 taicpu(hp1).ops:=2; 682 taicpu(hp1).opcode:=A_MOV; 683 684 taicpu(p).loadreg(1, taicpu(p).oper[0]^.reg); 685 taicpu(p).loadreg(0, taicpu(hp3).oper[0]^.reg); 686 687 taicpu(hp1).loadreg(1, taicpu(hp1).oper[0]^.reg); 688 taicpu(hp1).loadreg(0, taicpu(hp2).oper[0]^.reg); 689 690 { life range of reg2 and reg3 is increased, fix register allocation entries } 691 CopyUsedRegs(TmpUsedRegs); 692 UpdateUsedRegs(TmpUsedRegs,tai(p.Next)); 693 AllocRegBetween(taicpu(hp2).oper[0]^.reg,hp1,hp2,TmpUsedRegs); 694 ReleaseUsedRegs(TmpUsedRegs); 695 696 CopyUsedRegs(TmpUsedRegs); 697 AllocRegBetween(taicpu(hp3).oper[0]^.reg,p,hp3,TmpUsedRegs); 698 ReleaseUsedRegs(TmpUsedRegs); 699 700 IncludeRegInUsedRegs(taicpu(hp3).oper[0]^.reg,UsedRegs); 701 UpdateUsedRegs(tai(p.Next)); 702 703 asml.Remove(hp2); 704 hp2.Free; 705 asml.Remove(hp3); 706 hp3.Free; 707 708 result:=true; 709 end 710 711 end; 712 end; 713 A_CALL: 714 if (cs_opt_level4 in current_settings.optimizerswitches) and 715 GetNextInstruction(p,hp1) and 716 MatchInstruction(hp1,A_RET) then 717 begin 718 DebugMsg('Peephole CallReg2Jmp performed', p); 719 720 taicpu(p).opcode:=A_JMP; 721 722 asml.Remove(hp1); 723 hp1.Free; 724 725 result:=true; 726 end; 727 A_RCALL: 728 if (cs_opt_level4 in current_settings.optimizerswitches) and 729 GetNextInstruction(p,hp1) and 730 MatchInstruction(hp1,A_RET) then 731 begin 732 DebugMsg('Peephole RCallReg2RJmp performed', p); 733 734 taicpu(p).opcode:=A_RJMP; 735 736 asml.Remove(hp1); 737 hp1.Free; 738 739 result:=true; 740 end; 741 A_MOV: 742 begin 743 { change 744 mov reg0, reg1 745 dealloc reg0 746 into 747 dealloc reg0 748 } 749 if MatchOpType(taicpu(p),top_reg,top_reg) then 750 begin 751 CopyUsedRegs(TmpUsedRegs); 752 UpdateUsedRegs(TmpUsedRegs,tai(p.Next)); 753 if not(RegInUsedRegs(taicpu(p).oper[0]^.reg,TmpUsedRegs)) and 754 { reg. allocation information before calls is not perfect, so don't do this before 755 calls/icalls } 756 GetNextInstruction(p,hp1) and 757 not(MatchInstruction(hp1,[A_CALL,A_RCALL])) then 758 begin 759 DebugMsg('Peephole Mov2Nop performed', p); 760 result:=RemoveCurrentP(p); 761 ReleaseUsedRegs(TmpUsedRegs); 762 exit; 763 end; 764 ReleaseUsedRegs(TmpUsedRegs); 765 end; 766 767 { turn 768 mov reg0, reg1 769 <op> reg2,reg0 770 dealloc reg0 771 into 772 <op> reg2,reg1 773 } 774 if MatchOpType(taicpu(p),top_reg,top_reg) and 775 GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and 776 (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) and 777 (MatchInstruction(hp1,[A_PUSH,A_MOV,A_CP,A_CPC,A_ADD,A_SUB,A_ADC,A_SBC,A_EOR,A_AND,A_OR, 778 A_OUT,A_IN]) or 779 { the reference register of ST/STD cannot be replaced } 780 (MatchInstruction(hp1,[A_STD,A_ST]) and (MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^)))) and 781 (not RegModifiedByInstruction(taicpu(p).oper[0]^.reg, hp1)) and 782 {(taicpu(hp1).ops=1) and 783 (taicpu(hp1).oper[0]^.typ = top_reg) and 784 (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and } 785 assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then 786 begin 787 DebugMsg('Peephole MovOp2Op performed', p); 788 789 for i := 0 to taicpu(hp1).ops-1 do 790 if taicpu(hp1).oper[i]^.typ=top_reg then 791 if taicpu(hp1).oper[i]^.reg=taicpu(p).oper[0]^.reg then 792 taicpu(hp1).oper[i]^.reg:=taicpu(p).oper[1]^.reg; 793 794 alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous)); 795 dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next)); 796 797 if assigned(alloc) and assigned(dealloc) then 798 begin 799 asml.Remove(alloc); 800 alloc.Free; 801 asml.Remove(dealloc); 802 dealloc.Free; 803 end; 804 805 { life range of reg1 is increased } 806 AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,usedregs); 807 { p will be removed, update used register as we continue 808 with the next instruction after p } 809 810 result:=RemoveCurrentP(p); 811 end 812 { remove 813 mov reg0,reg0 814 } 815 else if (taicpu(p).ops=2) and 816 (taicpu(p).oper[0]^.typ = top_reg) and 817 (taicpu(p).oper[1]^.typ = top_reg) and 818 (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) then 819 begin 820 DebugMsg('Peephole RedundantMov performed', p); 821 822 result:=RemoveCurrentP(p); 823 end 824 { 825 Turn 826 mov rx,ry 827 op rx,rz 828 mov ry, rx 829 Into 830 op ry,rz 831 } 832 else if (taicpu(p).ops=2) and 833 MatchOpType(taicpu(p),top_reg,top_reg) and 834 GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and 835 (hp1.typ=ait_instruction) and 836 (taicpu(hp1).ops >= 1) and 837 (taicpu(hp1).oper[0]^.typ = top_reg) and 838 GetNextInstructionUsingReg(hp1,hp2,taicpu(hp1).oper[0]^.reg) and 839 MatchInstruction(hp2,A_MOV) and 840 MatchOpType(taicpu(hp2),top_reg,top_reg) and 841 (taicpu(hp2).oper[0]^.reg = taicpu(p).oper[1]^.reg) and 842 (taicpu(hp2).oper[1]^.reg = taicpu(hp1).oper[0]^.reg) and 843 (taicpu(hp2).oper[1]^.reg = taicpu(p).oper[0]^.reg) and 844 (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp2)) and 845 (taicpu(hp1).opcode in [A_ADD,A_ADC,A_SUB,A_SBC,A_AND,A_OR,A_EOR, 846 A_INC,A_DEC, 847 A_LSL,A_LSR,A_ASR,A_ROR,A_ROL]) and 848 assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg, tai(hp2.Next))) then 849 begin 850 DebugMsg('Peephole MovOpMov2Op performed', p); 851 852 if (taicpu(hp1).ops=2) and 853 (taicpu(hp1).oper[1]^.typ=top_reg) and 854 (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then 855 taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg; 856 857 taicpu(hp1).oper[0]^.reg:=taicpu(p).oper[1]^.reg; 858 859 alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous)); 860 dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp2.Next)); 861 862 if assigned(alloc) and assigned(dealloc) then 863 begin 864 asml.Remove(alloc); 865 alloc.Free; 866 asml.Remove(dealloc); 867 dealloc.Free; 868 end; 869 870 asml.remove(hp2); 871 hp2.free; 872 873 result:=RemoveCurrentP(p); 874 end 875 { 876 Turn 877 mov rx,ry 878 op rx,rw 879 mov rw,rx 880 Into 881 op rw,ry 882 } 883 else if (taicpu(p).ops=2) and 884 MatchOpType(taicpu(p),top_reg,top_reg) and 885 GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and 886 (hp1.typ=ait_instruction) and 887 (taicpu(hp1).ops = 2) and 888 MatchOpType(taicpu(hp1),top_reg,top_reg) and 889 GetNextInstructionUsingReg(hp1,hp2,taicpu(hp1).oper[0]^.reg) and 890 (hp2.typ=ait_instruction) and 891 (taicpu(hp2).opcode=A_MOV) and 892 MatchOpType(taicpu(hp2),top_reg,top_reg) and 893 (taicpu(hp2).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) and 894 (taicpu(hp2).oper[1]^.reg = taicpu(hp1).oper[0]^.reg) and 895 (taicpu(hp2).oper[1]^.reg = taicpu(p).oper[0]^.reg) and 896 (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and 897 (taicpu(hp1).opcode in [A_ADD,A_ADC,A_AND,A_OR,A_EOR]) and 898 assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg, tai(hp2.Next))) then 899 begin 900 DebugMsg('Peephole MovOpMov2Op2 performed', p); 901 902 taicpu(hp1).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg; 903 taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg; 904 905 alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous)); 906 dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp2.Next)); 907 908 if assigned(alloc) and assigned(dealloc) then 909 begin 910 asml.Remove(alloc); 911 alloc.Free; 912 asml.Remove(dealloc); 913 dealloc.Free; 914 end; 915 916 result:=RemoveCurrentP(p); 917 918 asml.remove(hp2); 919 hp2.free; 920 end 921 { fold 922 mov reg2,reg0 923 mov reg3,reg1 924 to 925 movw reg2,reg0 926 } 927 else if (CPUAVR_HAS_MOVW in cpu_capabilities[current_settings.cputype]) and 928 (taicpu(p).ops=2) and 929 (taicpu(p).oper[0]^.typ = top_reg) and 930 (taicpu(p).oper[1]^.typ = top_reg) and 931 getnextinstruction(p,hp1) and 932 (hp1.typ = ait_instruction) and 933 (taicpu(hp1).opcode = A_MOV) and 934 (taicpu(hp1).ops=2) and 935 (taicpu(hp1).oper[0]^.typ = top_reg) and 936 (taicpu(hp1).oper[1]^.typ = top_reg) and 937 (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(p).oper[0]^.reg)+1) and 938 ((getsupreg(taicpu(p).oper[0]^.reg) mod 2)=0) and 939 ((getsupreg(taicpu(p).oper[1]^.reg) mod 2)=0) and 940 (getsupreg(taicpu(hp1).oper[1]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)+1) then 941 begin 942 DebugMsg('Peephole MovMov2Movw performed', p); 943 944 alloc:=FindRegAllocBackward(taicpu(hp1).oper[0]^.reg,tai(hp1.Previous)); 945 if assigned(alloc) then 946 begin 947 asml.Remove(alloc); 948 asml.InsertBefore(alloc,p); 949 { proper book keeping of currently used registers } 950 IncludeRegInUsedRegs(taicpu(hp1).oper[0]^.reg,UsedRegs); 951 end; 952 953 taicpu(p).opcode:=A_MOVW; 954 asml.remove(hp1); 955 hp1.free; 956 result:=true; 957 end 958 { 959 This removes the first mov from 960 mov rX,... 961 mov rX,... 962 } 963 else if GetNextInstruction(p,hp1) and MatchInstruction(hp1,A_MOV) then 964 while MatchInstruction(hp1,A_MOV) and 965 MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and 966 { don't remove the first mov if the second is a mov rX,rX } 967 not(MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp1).oper[1]^)) do 968 begin 969 DebugMsg('Peephole MovMov2Mov performed', p); 970 971 result:=RemoveCurrentP(p); 972 973 GetNextInstruction(hp1,hp1); 974 if not assigned(hp1) then 975 break; 976 end; 977 end; 978 A_SBIC, 979 A_SBIS: 980 begin 981 { 982 Turn 983 sbic/sbis X, y 984 jmp .L1 985 op 986 .L1: 987 988 into 989 sbis/sbic X,y 990 op 991 .L1: 992 } 993 if GetNextInstruction(p, hp1) and 994 (hp1.typ=ait_instruction) and 995 (taicpu(hp1).opcode in [A_JMP,A_RJMP]) and 996 (taicpu(hp1).ops>0) and 997 (taicpu(hp1).oper[0]^.typ = top_ref) and 998 (taicpu(hp1).oper[0]^.ref^.symbol is TAsmLabel) and 999 GetNextInstruction(hp1, hp2) and 1000 (hp2.typ=ait_instruction) and 1001 (not taicpu(hp2).is_jmp) and 1002 GetNextInstruction(hp2, hp3) and 1003 (hp3.typ=ait_label) and 1004 (taicpu(hp1).oper[0]^.ref^.symbol=tai_label(hp3).labsym) then 1005 begin 1006 DebugMsg('Peephole SbiJmp2Sbi performed',p); 1007 1008 if taicpu(p).opcode=A_SBIC then 1009 taicpu(p).opcode:=A_SBIS 1010 else 1011 taicpu(p).opcode:=A_SBIC; 1012 1013 tai_label(hp3).labsym.decrefs; 1014 1015 AsmL.remove(hp1); 1016 taicpu(hp1).Free; 1017 1018 result:=true; 1019 end 1020 { 1021 Turn 1022 sbiX X, y 1023 jmp .L1 1024 jmp .L2 1025 .L1: 1026 op 1027 .L2: 1028 1029 into 1030 sbiX X,y 1031 .L1: 1032 op 1033 .L2: 1034 } 1035 else if GetNextInstruction(p, hp1) and 1036 (hp1.typ=ait_instruction) and 1037 (taicpu(hp1).opcode in [A_JMP,A_RJMP]) and 1038 (taicpu(hp1).ops>0) and 1039 (taicpu(hp1).oper[0]^.typ = top_ref) and 1040 (taicpu(hp1).oper[0]^.ref^.symbol is TAsmLabel) and 1041 1042 GetNextInstruction(hp1, hp2) and 1043 (hp2.typ=ait_instruction) and 1044 (taicpu(hp2).opcode in [A_JMP,A_RJMP]) and 1045 (taicpu(hp2).ops>0) and 1046 (taicpu(hp2).oper[0]^.typ = top_ref) and 1047 (taicpu(hp2).oper[0]^.ref^.symbol is TAsmLabel) and 1048 1049 GetNextInstruction(hp2, hp3) and 1050 (hp3.typ=ait_label) and 1051 (taicpu(hp1).oper[0]^.ref^.symbol=tai_label(hp3).labsym) and 1052 1053 GetNextInstruction(hp3, hp4) and 1054 (hp4.typ=ait_instruction) and 1055 1056 GetNextInstruction(hp4, hp5) and 1057 (hp3.typ=ait_label) and 1058 (taicpu(hp2).oper[0]^.ref^.symbol=tai_label(hp5).labsym) then 1059 begin 1060 DebugMsg('Peephole SbiJmpJmp2Sbi performed',p); 1061 1062 tai_label(hp3).labsym.decrefs; 1063 tai_label(hp5).labsym.decrefs; 1064 1065 AsmL.remove(hp1); 1066 taicpu(hp1).Free; 1067 1068 AsmL.remove(hp2); 1069 taicpu(hp2).Free; 1070 1071 result:=true; 1072 end; 1073 end; 1074 end; 1075 end; 1076 end; 1077 end; 1078 1079 1080 procedure TCpuAsmOptimizer.PeepHoleOptPass2; 1081 begin 1082 end; 1083 1084 begin 1085 casmoptimizer:=TCpuAsmOptimizer; 1086 End. 1087 1088