1 {
2     Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
3     Development Team
4 
5     This unit implements the ARM optimizer object
6 
7     This program is free software; you can redistribute it and/or modify
8     it under the terms of the GNU General Public License as published by
9     the Free Software Foundation; either version 2 of the License, or
10     (at your option) any later version.
11 
12     This program is distributed in the hope that it will be useful,
13     but WITHOUT ANY WARRANTY; without even the implied warranty of
14     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15     GNU General Public License for more details.
16 
17     You should have received a copy of the GNU General Public License
18     along with this program; if not, write to the Free Software
19     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 
21  ****************************************************************************
22 }
23 
24 
25 Unit aoptcpu;
26 
27 {$i fpcdefs.inc}
28 
29 { $define DEBUG_AOPTCPU}
30 
31 Interface
32 
33 uses cpubase,cgbase,aasmtai,aopt,AoptObj,aoptcpub;
34 
35 Type
36   TCpuAsmOptimizer = class(TAsmOptimizer)
37     { outputs a debug message into the assembler file }
38     procedure DebugMsg(const s: string; p: tai);
39 
GetNextInstructionUsingRegnull40     Function GetNextInstructionUsingReg(Current: tai; Var Next: tai;reg : TRegister): Boolean;
RegInInstructionnull41     function RegInInstruction(Reg: TRegister; p1: tai): Boolean; override;
RegLoadedWithNewValuenull42     function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
InstructionLoadsFromRegnull43     function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;
44 
45     { uses the same constructor as TAopObj }
PeepHoleOptPass1Cpunull46     function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
47     procedure PeepHoleOptPass2;override;
48   End;
49 
50 Implementation
51 
52   uses
53     cutils,
54     verbose,
55     cpuinfo,
56     aasmbase,aasmcpu,aasmdata,
57     aoptutils,
58     globals,globtype,
59     cgutils;
60 
61   type
62     TAsmOpSet = set of TAsmOp;
63 
CanBeCondnull64   function CanBeCond(p : tai) : boolean;
65     begin
66       result:=(p.typ=ait_instruction) and (taicpu(p).condition=C_None);
67     end;
68 
69 
RefsEqualnull70   function RefsEqual(const r1, r2: treference): boolean;
71     begin
72       refsequal :=
73         (r1.offset = r2.offset) and
74         (r1.base = r2.base) and
75         (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
76         (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
77         (r1.relsymbol = r2.relsymbol) and
78         (r1.addressmode = r2.addressmode);
79     end;
80 
81 
MatchOperandnull82   function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
83     begin
84       result:=oper1.typ=oper2.typ;
85 
86       if result then
87         case oper1.typ of
88           top_const:
89             Result:=oper1.val = oper2.val;
90           top_reg:
91             Result:=oper1.reg = oper2.reg;
92           top_ref:
93             Result:=RefsEqual(oper1.ref^, oper2.ref^);
94           else Result:=false;
95         end
96     end;
97 
98 
MatchOperandnull99   function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
100     begin
101       result := (oper.typ = top_reg) and (oper.reg = reg);
102     end;
103 
104 
MatchInstructionnull105   function MatchInstruction(const instr: tai; const op: TAsmOp): boolean;
106     begin
107       result :=
108         (instr.typ = ait_instruction) and
109         (taicpu(instr).opcode = op);
110     end;
111 
112 
MatchInstructionnull113   function MatchInstruction(const instr: tai; const ops: TAsmOpSet): boolean;
114     begin
115       result :=
116         (instr.typ = ait_instruction) and
117         (taicpu(instr).opcode in ops);
118     end;
119 
120 
MatchInstructionnull121   function MatchInstruction(const instr: tai; const ops: TAsmOpSet;opcount : byte): boolean;
122     begin
123       result :=
124         (instr.typ = ait_instruction) and
125         (taicpu(instr).opcode in ops) and
126         (taicpu(instr).ops=opcount);
127     end;
128 
129 
130 {$ifdef DEBUG_AOPTCPU}
131   procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
132     begin
133       asml.insertbefore(tai_comment.Create(strpnew(s)), p);
134     end;
135 {$else DEBUG_AOPTCPU}
136   procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
137     begin
138     end;
139 {$endif DEBUG_AOPTCPU}
140 
141 
TCpuAsmOptimizer.RegInInstructionnull142   function TCpuAsmOptimizer.RegInInstruction(Reg: TRegister; p1: tai): Boolean;
143     begin
144       If (p1.typ = ait_instruction) and (taicpu(p1).opcode in [A_MUL,A_MULS,A_FMUL,A_FMULS,A_FMULSU]) and
145               ((getsupreg(reg)=RS_R0) or (getsupreg(reg)=RS_R1)) then
146         Result:=true
147       else if (p1.typ = ait_instruction) and (taicpu(p1).opcode=A_MOVW) and
148         ((TRegister(ord(taicpu(p1).oper[0]^.reg)+1)=reg) or (TRegister(ord(taicpu(p1).oper[1]^.reg)+1)=reg) or
149          (taicpu(p1).oper[0]^.reg=reg) or (taicpu(p1).oper[1]^.reg=reg)) then
150         Result:=true
151       else
152         Result:=inherited RegInInstruction(Reg, p1);
153     end;
154 
155 
TCpuAsmOptimizer.GetNextInstructionUsingRegnull156   function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
157     var Next: tai; reg: TRegister): Boolean;
158     begin
159       Next:=Current;
160       repeat
161         Result:=GetNextInstruction(Next,Next);
162       until not(cs_opt_level3 in current_settings.optimizerswitches) or not(Result) or (Next.typ<>ait_instruction) or (RegInInstruction(reg,Next)) or
163         (is_calljmp(taicpu(Next).opcode));
164     end;
165 
166 
TCpuAsmOptimizer.RegLoadedWithNewValuenull167   function TCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
168     var
169       p: taicpu;
170     begin
171       if not assigned(hp) or
172          (hp.typ <> ait_instruction) then
173        begin
174          Result := false;
175          exit;
176        end;
177       p := taicpu(hp);
178       Result := ((p.opcode in [A_LDI,A_MOV,A_LDS]) and (reg=p.oper[0]^.reg) and ((p.oper[1]^.typ<>top_reg) or (reg<>p.oper[0]^.reg))) or
179         ((p.opcode in [A_LD,A_LDD,A_LPM]) and (reg=p.oper[0]^.reg) and not(RegInRef(reg,p.oper[1]^.ref^))) or
180         ((p.opcode in [A_MOVW]) and ((reg=p.oper[0]^.reg) or (TRegister(ord(reg)+1)=p.oper[0]^.reg)) and not(reg=p.oper[1]^.reg) and not(TRegister(ord(reg)+1)=p.oper[1]^.reg)) or
181         ((p.opcode in [A_POP]) and (reg=p.oper[0]^.reg));
182     end;
183 
184 
TCpuAsmOptimizer.InstructionLoadsFromRegnull185   function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
186     var
187       p: taicpu;
188       i: longint;
189     begin
190       Result := false;
191 
192       if not (assigned(hp) and (hp.typ = ait_instruction)) then
193         exit;
194       p:=taicpu(hp);
195 
196       i:=0;
197 
198       { we do not care about the stack pointer }
199       if p.opcode in [A_POP] then
200         exit;
201 
202       { first operand only written?
203         then skip it }
204       if p.opcode in [A_MOV,A_LD,A_LDD,A_LDS,A_LPM,A_LDI,A_MOVW] then
205         i:=1;
206 
207       while i<p.ops do
208         begin
209           case p.oper[i]^.typ of
210             top_reg:
211               Result := (p.oper[i]^.reg = reg) or
212                 { MOVW }
213                 ((i=1) and (p.opcode=A_MOVW) and (getsupreg(p.oper[0]^.reg)+1=getsupreg(reg)));
214             top_ref:
215               Result :=
216                 (p.oper[i]^.ref^.base = reg) or
217                 (p.oper[i]^.ref^.index = reg);
218           end;
219           { Bailout if we found something }
220           if Result then
221             exit;
222           Inc(i);
223         end;
224     end;
225 
TCpuAsmOptimizer.PeepHoleOptPass1Cpunull226   function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
227     var
228       hp1,hp2,hp3,hp4,hp5: tai;
229       alloc, dealloc: tai_regalloc;
230       i: integer;
231       l: TAsmLabel;
232       TmpUsedRegs : TAllUsedRegs;
233     begin
234       result := false;
235       case p.typ of
236         ait_instruction:
237           begin
238             {
239               change
240               <op> reg,x,y
241               cp reg,r1
242               into
243               <op>s reg,x,y
244             }
245             { this optimization can applied only to the currently enabled operations because
246               the other operations do not update all flags and FPC does not track flag usage }
247             if MatchInstruction(p, [A_ADC,A_ADD,A_AND,A_ANDI,A_ASR,A_COM,A_DEC,A_EOR,
248                                     A_INC,A_LSL,A_LSR,
249                                     A_OR,A_ORI,A_ROL,A_ROR,A_SBC,A_SBCI,A_SUB,A_SUBI]) and
250               GetNextInstruction(p, hp1) and
251               ((MatchInstruction(hp1, A_CP) and
252                 (((taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and
253                   (taicpu(hp1).oper[1]^.reg = NR_R1)) or
254                  ((taicpu(p).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) and
255                   (taicpu(hp1).oper[0]^.reg = NR_R1) and
256                   (taicpu(p).opcode in [A_ADC,A_ADD,A_AND,A_ANDI,A_ASR,A_COM,A_EOR,
257                                         A_LSL,A_LSR,
258                                         A_OR,A_ORI,A_ROL,A_ROR,A_SUB,A_SBI])))) or
259                (MatchInstruction(hp1, A_CPI) and
260                 (taicpu(p).opcode = A_ANDI) and
261                 (taicpu(p).oper[1]^.typ=top_const) and
262                 (taicpu(hp1).oper[1]^.typ=top_const) and
263                 (taicpu(p).oper[1]^.val=taicpu(hp1).oper[1]^.val))) and
264               GetNextInstruction(hp1, hp2) and
265               { be careful here, following instructions could use other flags
266                 however after a jump fpc never depends on the value of flags }
267               { All above instructions set Z and N according to the following
268                 Z := result = 0;
269                 N := result[31];
270                 EQ = Z=1; NE = Z=0;
271                 MI = N=1; PL = N=0; }
272               MatchInstruction(hp2, A_BRxx) and
273               ((taicpu(hp2).condition in [C_EQ,C_NE,C_MI,C_PL]) or
274               { sub/sbc set all flags }
275                (taicpu(p).opcode in [A_SUB,A_SBI])){ and
276               no flag allocation tracking implemented yet on avr
277               assigned(FindRegDealloc(NR_DEFAULTFLAGS,tai(hp2.Next)))} then
278               begin
279                 { move flag allocation if possible }
280                 { no flag allocation tracking implemented yet on avr
281                 GetLastInstruction(hp1, hp2);
282                 hp2:=FindRegAlloc(NR_DEFAULTFLAGS,tai(hp2.Next));
283                 if assigned(hp2) then
284                   begin
285                     asml.Remove(hp2);
286                     asml.insertbefore(hp2, p);
287                   end;
288                 }
289 
290                 // If we compare to the same value we are masking then invert the comparison
291                 if (taicpu(hp1).opcode=A_CPI) or
292                   { sub/sbc with reverted? }
293                   ((taicpu(hp1).oper[0]^.reg = NR_R1) and (taicpu(p).opcode in [A_SUB,A_SBI])) then
294                   taicpu(hp2).condition:=inverse_cond(taicpu(hp2).condition);
295 
296                 asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
297                 asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,hp2), hp2);
298                 IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
299 
300                 DebugMsg('Peephole OpCp2Op performed', p);
301 
302                 asml.remove(hp1);
303                 hp1.free;
304                 Result:=true;
305               end
306             else
307               case taicpu(p).opcode of
308                 A_LDI:
309                   begin
310                     { turn
311                       ldi reg0, imm
312                       cp/mov reg1, reg0
313                       dealloc reg0
314                       into
315                       cpi/ldi reg1, imm
316                     }
317                     if MatchOpType(taicpu(p),top_reg,top_const) and
318                        GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
319                        MatchInstruction(hp1,[A_CP,A_MOV],2) and
320                        (not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
321                        MatchOpType(taicpu(hp1),top_reg,top_reg) and
322                        (getsupreg(taicpu(hp1).oper[0]^.reg) in [16..31]) and
323                        (taicpu(hp1).oper[1]^.reg=taicpu(p).oper[0]^.reg) and
324                        not(MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp1).oper[1]^)) then
325                       begin
326                         CopyUsedRegs(TmpUsedRegs);
327                         if not(RegUsedAfterInstruction(taicpu(hp1).oper[1]^.reg, hp1, TmpUsedRegs)) then
328                           begin
329                             case taicpu(hp1).opcode of
330                               A_CP:
331                                 taicpu(hp1).opcode:=A_CPI;
332                               A_MOV:
333                                 taicpu(hp1).opcode:=A_LDI;
334                               else
335                                 internalerror(2016111901);
336                             end;
337                             taicpu(hp1).loadconst(1, taicpu(p).oper[1]^.val);
338 
339                             alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
340                             dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
341 
342                             if assigned(alloc) and assigned(dealloc) then
343                               begin
344                                 asml.Remove(alloc);
345                                 alloc.Free;
346                                 asml.Remove(dealloc);
347                                 dealloc.Free;
348                               end;
349 
350                             DebugMsg('Peephole LdiMov/Cp2Ldi/Cpi performed', p);
351 
352                             RemoveCurrentP(p);
353                           end;
354                         ReleaseUsedRegs(TmpUsedRegs);
355                       end;
356                   end;
357                 A_STS:
358                   if (taicpu(p).oper[0]^.ref^.symbol=nil) and
359                     (taicpu(p).oper[0]^.ref^.relsymbol=nil) and
360                     (getsupreg(taicpu(p).oper[0]^.ref^.base)=RS_NO) and
361                     (getsupreg(taicpu(p).oper[0]^.ref^.index)=RS_NO) and
362                     (taicpu(p).oper[0]^.ref^.addressmode=AM_UNCHANGED) and
363                     (taicpu(p).oper[0]^.ref^.offset>=32) and
364                     (taicpu(p).oper[0]^.ref^.offset<=95) then
365                     begin
366                       DebugMsg('Peephole Sts2Out performed', p);
367 
368                       taicpu(p).opcode:=A_OUT;
369                       taicpu(p).loadconst(0,taicpu(p).oper[0]^.ref^.offset-32);
370                     end;
371                 A_LDS:
372                   if (taicpu(p).oper[1]^.ref^.symbol=nil) and
373                     (taicpu(p).oper[1]^.ref^.relsymbol=nil) and
374                     (getsupreg(taicpu(p).oper[1]^.ref^.base)=RS_NO) and
375                     (getsupreg(taicpu(p).oper[1]^.ref^.index)=RS_NO) and
376                     (taicpu(p).oper[1]^.ref^.addressmode=AM_UNCHANGED) and
377                     (taicpu(p).oper[1]^.ref^.offset>=32) and
378                     (taicpu(p).oper[1]^.ref^.offset<=95) then
379                     begin
380                       DebugMsg('Peephole Lds2In performed', p);
381 
382                       taicpu(p).opcode:=A_IN;
383                       taicpu(p).loadconst(1,taicpu(p).oper[1]^.ref^.offset-32);
384                     end;
385                 A_IN:
386                     if GetNextInstruction(p,hp1) then
387                       begin
388                         {
389                           in rX,Y
390                           ori rX,n
391                           out Y,rX
392 
393                           into
394                           sbi rX,lg(n)
395                         }
396                         if (taicpu(p).oper[1]^.val<=31) and
397                           MatchInstruction(hp1,A_ORI) and
398                           (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
399                           (PopCnt(byte(taicpu(hp1).oper[1]^.val))=1) and
400                           GetNextInstruction(hp1,hp2) and
401                           MatchInstruction(hp2,A_OUT) and
402                           MatchOperand(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
403                           MatchOperand(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) then
404                           begin
405                             DebugMsg('Peephole InOriOut2Sbi performed', p);
406 
407                             taicpu(p).opcode:=A_SBI;
408                             taicpu(p).loadconst(0,taicpu(p).oper[1]^.val);
409                             taicpu(p).loadconst(1,BsrByte(taicpu(hp1).oper[1]^.val));
410                             asml.Remove(hp1);
411                             hp1.Free;
412                             asml.Remove(hp2);
413                             hp2.Free;
414                             result:=true;
415                           end
416                          {
417                           in rX,Y
418                           andi rX,not(n)
419                           out Y,rX
420 
421                           into
422                           cbi rX,lg(n)
423                         }
424                         else if (taicpu(p).oper[1]^.val<=31) and
425                            MatchInstruction(hp1,A_ANDI) and
426                            (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
427                            (PopCnt(byte(not(taicpu(hp1).oper[1]^.val)))=1) and
428                            GetNextInstruction(hp1,hp2) and
429                            MatchInstruction(hp2,A_OUT) and
430                            MatchOperand(taicpu(hp2).oper[1]^,taicpu(p).oper[0]^) and
431                            MatchOperand(taicpu(hp2).oper[0]^,taicpu(p).oper[1]^) then
432                           begin
433                             DebugMsg('Peephole InAndiOut2Cbi performed', p);
434 
435                             taicpu(p).opcode:=A_CBI;
436                             taicpu(p).loadconst(0,taicpu(p).oper[1]^.val);
437                             taicpu(p).loadconst(1,BsrByte(not(taicpu(hp1).oper[1]^.val)));
438                             asml.Remove(hp1);
439                             hp1.Free;
440                             asml.Remove(hp2);
441                             hp2.Free;
442                             result:=true;
443                           end
444                          {
445                               in rX,Y
446                               andi rX,n
447                               breq/brne L1
448 
449                           into
450                               sbis/sbic Y,lg(n)
451                               jmp L1
452                             .Ltemp:
453                         }
454                         else if (taicpu(p).oper[1]^.val<=31) and
455                            MatchInstruction(hp1,A_ANDI) and
456                            (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
457                            (PopCnt(byte(taicpu(hp1).oper[1]^.val))=1) and
458                            GetNextInstruction(hp1,hp2) and
459                            MatchInstruction(hp2,A_BRxx) and
460                            (taicpu(hp2).condition in [C_EQ,C_NE]) then
461                           begin
462                             if taicpu(hp2).condition=C_EQ then
463                               taicpu(p).opcode:=A_SBIS
464                             else
465                               taicpu(p).opcode:=A_SBIC;
466 
467                             DebugMsg('Peephole InAndiBrx2SbixJmp performed', p);
468 
469                             taicpu(p).loadconst(0,taicpu(p).oper[1]^.val);
470                             taicpu(p).loadconst(1,BsrByte(taicpu(hp1).oper[1]^.val));
471                             asml.Remove(hp1);
472                             hp1.Free;
473 
474                             taicpu(hp2).condition:=C_None;
475                             if CPUAVR_HAS_JMP_CALL in cpu_capabilities[current_settings.cputype] then
476                               taicpu(hp2).opcode:=A_JMP
477                             else
478                               taicpu(hp2).opcode:=A_RJMP;
479 
480                             current_asmdata.getjumplabel(l);
481                             l.increfs;
482                             asml.InsertAfter(tai_label.create(l), hp2);
483 
484                             result:=true;
485                           end;
486                       end;
487                 A_ANDI:
488                   begin
489                     {
490                       Turn
491                           andi rx, #pow2
492                           brne l
493                           <op>
494                         l:
495                       Into
496                           sbrs rx, #(1 shl imm)
497                           <op>
498                         l:
499                     }
500                     if (taicpu(p).ops=2) and
501                        (taicpu(p).oper[1]^.typ=top_const) and
502                        ispowerof2(taicpu(p).oper[1]^.val,i) and
503                        assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(p.next))) and
504                        GetNextInstruction(p,hp1) and
505                        (hp1.typ=ait_instruction) and
506                        (taicpu(hp1).opcode=A_BRxx) and
507                        (taicpu(hp1).condition in [C_EQ,C_NE]) and
508                        (taicpu(hp1).ops>0) and
509                        (taicpu(hp1).oper[0]^.typ = top_ref) and
510                        (taicpu(hp1).oper[0]^.ref^.symbol is TAsmLabel) and
511                        GetNextInstruction(hp1,hp2) and
512                        (hp2.typ=ait_instruction) and
513                        GetNextInstruction(hp2,hp3) and
514                        (hp3.typ=ait_label) and
515                        (taicpu(hp1).oper[0]^.ref^.symbol=tai_label(hp3).labsym) then
516                       begin
517                         DebugMsg('Peephole AndiBr2Sbr performed', p);
518 
519                         taicpu(p).oper[1]^.val:=i;
520 
521                         if taicpu(hp1).condition=C_NE then
522                           taicpu(p).opcode:=A_SBRS
523                         else
524                           taicpu(p).opcode:=A_SBRC;
525 
526                         asml.Remove(hp1);
527                         hp1.free;
528 
529                         result:=true;
530                       end
531                     {
532                       Remove
533                         andi rx, #y
534                         dealloc rx
535                     }
536                     else if (taicpu(p).ops=2) and
537                        (taicpu(p).oper[0]^.typ=top_reg) and
538                        assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(p.next))) and
539                        (assigned(FindRegDeAlloc(NR_DEFAULTFLAGS,tai(p.Next))) or
540                         (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs))) then
541                       begin
542                         DebugMsg('Redundant Andi removed', p);
543 
544                         result:=RemoveCurrentP(p);
545                       end;
546                   end;
547                 A_ADD:
548                   begin
549                     if (taicpu(p).oper[1]^.reg=NR_R1) and
550                     GetNextInstruction(p, hp1) and
551                     MatchInstruction(hp1,A_ADC) then
552                     begin
553                       DebugMsg('Peephole AddAdc2Add performed', p);
554 
555                       result:=RemoveCurrentP(p);
556                     end;
557                   end;
558                 A_SUB:
559                   begin
560                     if (taicpu(p).oper[1]^.reg=NR_R1) and
561                     GetNextInstruction(p, hp1) and
562                     MatchInstruction(hp1,A_SBC) then
563                     begin
564                       DebugMsg('Peephole SubSbc2Sub performed', p);
565 
566                       taicpu(hp1).opcode:=A_SUB;
567 
568                       result:=RemoveCurrentP(p);
569                     end;
570                   end;
571                 A_CLR:
572                   begin
573                     { turn the common
574                       clr rX
575                       mov/ld rX, rY
576                       into
577                       mov/ld rX, rY
578                     }
579                     if (taicpu(p).ops=1) and
580                        (taicpu(p).oper[0]^.typ=top_reg) and
581                        GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
582                        (not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
583                        (hp1.typ=ait_instruction) and
584                        (taicpu(hp1).opcode in [A_MOV,A_LD]) and
585                        (taicpu(hp1).ops>0) and
586                        (taicpu(hp1).oper[0]^.typ=top_reg) and
587                        (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) then
588                       begin
589                         DebugMsg('Peephole ClrMov2Mov performed', p);
590 
591                         result:=RemoveCurrentP(p);
592                       end
593                     { turn
594                       clr rX
595                       ...
596                       adc rY, rX
597                       into
598                       ...
599                       adc rY, r1
600                     }
601                     else if (taicpu(p).ops=1) and
602                        (taicpu(p).oper[0]^.typ=top_reg) and
603                        GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
604                        (not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
605                        (hp1.typ=ait_instruction) and
606                        (taicpu(hp1).opcode in [A_ADC,A_SBC]) and
607                        (taicpu(hp1).ops=2) and
608                        (taicpu(hp1).oper[1]^.typ=top_reg) and
609                        (taicpu(hp1).oper[1]^.reg=taicpu(p).oper[0]^.reg) and
610                        (taicpu(hp1).oper[0]^.reg<>taicpu(p).oper[0]^.reg) and
611                        assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
612                       begin
613                         DebugMsg('Peephole ClrAdc2Adc performed', p);
614 
615                         taicpu(hp1).oper[1]^.reg:=NR_R1;
616 
617                         alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
618                         dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
619 
620                         if assigned(alloc) and assigned(dealloc) then
621                           begin
622                             asml.Remove(alloc);
623                             alloc.Free;
624                             asml.Remove(dealloc);
625                             dealloc.Free;
626                           end;
627 
628                         result:=RemoveCurrentP(p);
629                       end;
630                   end;
631                 A_PUSH:
632                   begin
633                     { turn
634                       push reg0
635                       push reg1
636                       pop reg3
637                       pop reg2
638 
639                       into
640 
641                       movw reg2,reg0
642 
643                       or
644 
645                       mov  reg3,reg1
646                       mov  reg2,reg0
647 
648                     }
649                     if GetNextInstruction(p,hp1) and
650                        MatchInstruction(hp1,A_PUSH) and
651 
652                        GetNextInstruction(hp1,hp2) and
653                        MatchInstruction(hp2,A_POP) and
654 
655                        GetNextInstruction(hp2,hp3) and
656                        MatchInstruction(hp3,A_POP) then
657                       begin
658                        if (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(p).oper[0]^.reg)+1) and
659                          ((getsupreg(taicpu(p).oper[0]^.reg) mod 2)=0) and
660                          (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp3).oper[0]^.reg)+1) and
661                          ((getsupreg(taicpu(hp3).oper[0]^.reg) mod 2)=0) then
662                          begin
663                            DebugMsg('Peephole PushPushPopPop2Movw performed', p);
664 
665                            taicpu(hp3).ops:=2;
666                            taicpu(hp3).opcode:=A_MOVW;
667 
668                            taicpu(hp3).loadreg(1, taicpu(p).oper[0]^.reg);
669 
670                            RemoveCurrentP(p);
671                            RemoveCurrentP(p);
672                            result:=RemoveCurrentP(p);
673                          end
674                        else
675                          begin
676                            DebugMsg('Peephole PushPushPopPop2MovMov performed', p);
677 
678                            taicpu(p).ops:=2;
679                            taicpu(p).opcode:=A_MOV;
680 
681                            taicpu(hp1).ops:=2;
682                            taicpu(hp1).opcode:=A_MOV;
683 
684                            taicpu(p).loadreg(1, taicpu(p).oper[0]^.reg);
685                            taicpu(p).loadreg(0, taicpu(hp3).oper[0]^.reg);
686 
687                            taicpu(hp1).loadreg(1, taicpu(hp1).oper[0]^.reg);
688                            taicpu(hp1).loadreg(0, taicpu(hp2).oper[0]^.reg);
689 
690                            { life range of reg2 and reg3 is increased, fix register allocation entries }
691                            CopyUsedRegs(TmpUsedRegs);
692                            UpdateUsedRegs(TmpUsedRegs,tai(p.Next));
693                            AllocRegBetween(taicpu(hp2).oper[0]^.reg,hp1,hp2,TmpUsedRegs);
694                            ReleaseUsedRegs(TmpUsedRegs);
695 
696                            CopyUsedRegs(TmpUsedRegs);
697                            AllocRegBetween(taicpu(hp3).oper[0]^.reg,p,hp3,TmpUsedRegs);
698                            ReleaseUsedRegs(TmpUsedRegs);
699 
700                            IncludeRegInUsedRegs(taicpu(hp3).oper[0]^.reg,UsedRegs);
701                            UpdateUsedRegs(tai(p.Next));
702 
703                            asml.Remove(hp2);
704                            hp2.Free;
705                            asml.Remove(hp3);
706                            hp3.Free;
707 
708                            result:=true;
709                          end
710 
711                       end;
712                   end;
713                 A_CALL:
714                   if (cs_opt_level4 in current_settings.optimizerswitches) and
715                     GetNextInstruction(p,hp1) and
716                     MatchInstruction(hp1,A_RET) then
717                     begin
718                        DebugMsg('Peephole CallReg2Jmp performed', p);
719 
720                        taicpu(p).opcode:=A_JMP;
721 
722                        asml.Remove(hp1);
723                        hp1.Free;
724 
725                        result:=true;
726                     end;
727                 A_RCALL:
728                   if (cs_opt_level4 in current_settings.optimizerswitches) and
729                     GetNextInstruction(p,hp1) and
730                     MatchInstruction(hp1,A_RET) then
731                     begin
732                        DebugMsg('Peephole RCallReg2RJmp performed', p);
733 
734                        taicpu(p).opcode:=A_RJMP;
735 
736                        asml.Remove(hp1);
737                        hp1.Free;
738 
739                        result:=true;
740                     end;
741                 A_MOV:
742                   begin
743                     { change
744                       mov reg0, reg1
745                       dealloc reg0
746                       into
747                       dealloc reg0
748                     }
749                     if MatchOpType(taicpu(p),top_reg,top_reg) then
750                       begin
751                         CopyUsedRegs(TmpUsedRegs);
752                         UpdateUsedRegs(TmpUsedRegs,tai(p.Next));
753                         if not(RegInUsedRegs(taicpu(p).oper[0]^.reg,TmpUsedRegs)) and
754                           { reg. allocation information before calls is not perfect, so don't do this before
755                             calls/icalls }
756                           GetNextInstruction(p,hp1) and
757                           not(MatchInstruction(hp1,[A_CALL,A_RCALL])) then
758                           begin
759                             DebugMsg('Peephole Mov2Nop performed', p);
760                             result:=RemoveCurrentP(p);
761                             ReleaseUsedRegs(TmpUsedRegs);
762                             exit;
763                           end;
764                         ReleaseUsedRegs(TmpUsedRegs);
765                       end;
766 
767                     { turn
768                       mov reg0, reg1
769                       <op> reg2,reg0
770                       dealloc reg0
771                       into
772                       <op> reg2,reg1
773                     }
774                     if MatchOpType(taicpu(p),top_reg,top_reg) and
775                        GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
776                        (not RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) and
777                        (MatchInstruction(hp1,[A_PUSH,A_MOV,A_CP,A_CPC,A_ADD,A_SUB,A_ADC,A_SBC,A_EOR,A_AND,A_OR,
778                                                A_OUT,A_IN]) or
779                        { the reference register of ST/STD cannot be replaced }
780                        (MatchInstruction(hp1,[A_STD,A_ST]) and (MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^)))) and
781                        (not RegModifiedByInstruction(taicpu(p).oper[0]^.reg, hp1)) and
782                        {(taicpu(hp1).ops=1) and
783                        (taicpu(hp1).oper[0]^.typ = top_reg) and
784                        (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and  }
785                        assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
786                       begin
787                         DebugMsg('Peephole MovOp2Op performed', p);
788 
789                         for i := 0 to taicpu(hp1).ops-1 do
790                           if taicpu(hp1).oper[i]^.typ=top_reg then
791                             if taicpu(hp1).oper[i]^.reg=taicpu(p).oper[0]^.reg then
792                               taicpu(hp1).oper[i]^.reg:=taicpu(p).oper[1]^.reg;
793 
794                         alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
795                         dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
796 
797                         if assigned(alloc) and assigned(dealloc) then
798                           begin
799                             asml.Remove(alloc);
800                             alloc.Free;
801                             asml.Remove(dealloc);
802                             dealloc.Free;
803                           end;
804 
805                         { life range of reg1 is increased }
806                         AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,usedregs);
807                         { p will be removed, update used register as we continue
808                           with the next instruction after p }
809 
810                         result:=RemoveCurrentP(p);
811                       end
812                     { remove
813                       mov reg0,reg0
814                     }
815                     else if (taicpu(p).ops=2) and
816                        (taicpu(p).oper[0]^.typ = top_reg) and
817                        (taicpu(p).oper[1]^.typ = top_reg) and
818                        (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
819                       begin
820                         DebugMsg('Peephole RedundantMov performed', p);
821 
822                         result:=RemoveCurrentP(p);
823                       end
824                     {
825                       Turn
826                         mov rx,ry
827                         op rx,rz
828                         mov ry, rx
829                       Into
830                         op ry,rz
831                     }
832                     else if (taicpu(p).ops=2) and
833                        MatchOpType(taicpu(p),top_reg,top_reg) and
834                        GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
835                        (hp1.typ=ait_instruction) and
836                        (taicpu(hp1).ops >= 1) and
837                        (taicpu(hp1).oper[0]^.typ = top_reg) and
838                        GetNextInstructionUsingReg(hp1,hp2,taicpu(hp1).oper[0]^.reg) and
839                        MatchInstruction(hp2,A_MOV) and
840                        MatchOpType(taicpu(hp2),top_reg,top_reg) and
841                        (taicpu(hp2).oper[0]^.reg = taicpu(p).oper[1]^.reg) and
842                        (taicpu(hp2).oper[1]^.reg = taicpu(hp1).oper[0]^.reg) and
843                        (taicpu(hp2).oper[1]^.reg = taicpu(p).oper[0]^.reg) and
844                        (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp2)) and
845                        (taicpu(hp1).opcode in [A_ADD,A_ADC,A_SUB,A_SBC,A_AND,A_OR,A_EOR,
846                                                A_INC,A_DEC,
847                                                A_LSL,A_LSR,A_ASR,A_ROR,A_ROL]) and
848                        assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg, tai(hp2.Next))) then
849                       begin
850                         DebugMsg('Peephole MovOpMov2Op performed', p);
851 
852                         if (taicpu(hp1).ops=2) and
853                            (taicpu(hp1).oper[1]^.typ=top_reg) and
854                            (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[1]^.reg) then
855                           taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
856 
857                         taicpu(hp1).oper[0]^.reg:=taicpu(p).oper[1]^.reg;
858 
859                         alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
860                         dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp2.Next));
861 
862                         if assigned(alloc) and assigned(dealloc) then
863                           begin
864                             asml.Remove(alloc);
865                             alloc.Free;
866                             asml.Remove(dealloc);
867                             dealloc.Free;
868                           end;
869 
870                         asml.remove(hp2);
871                         hp2.free;
872 
873                         result:=RemoveCurrentP(p);
874                       end
875                     {
876                       Turn
877                         mov rx,ry
878                         op  rx,rw
879                         mov rw,rx
880                       Into
881                         op rw,ry
882                     }
883                     else if (taicpu(p).ops=2) and
884                        MatchOpType(taicpu(p),top_reg,top_reg) and
885                        GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
886                        (hp1.typ=ait_instruction) and
887                        (taicpu(hp1).ops = 2) and
888                        MatchOpType(taicpu(hp1),top_reg,top_reg) and
889                        GetNextInstructionUsingReg(hp1,hp2,taicpu(hp1).oper[0]^.reg) and
890                        (hp2.typ=ait_instruction) and
891                        (taicpu(hp2).opcode=A_MOV) and
892                        MatchOpType(taicpu(hp2),top_reg,top_reg) and
893                        (taicpu(hp2).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) and
894                        (taicpu(hp2).oper[1]^.reg = taicpu(hp1).oper[0]^.reg) and
895                        (taicpu(hp2).oper[1]^.reg = taicpu(p).oper[0]^.reg) and
896                        (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and
897                        (taicpu(hp1).opcode in [A_ADD,A_ADC,A_AND,A_OR,A_EOR]) and
898                        assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg, tai(hp2.Next))) then
899                       begin
900                         DebugMsg('Peephole MovOpMov2Op2 performed', p);
901 
902                         taicpu(hp1).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
903                         taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
904 
905                         alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
906                         dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp2.Next));
907 
908                         if assigned(alloc) and assigned(dealloc) then
909                           begin
910                             asml.Remove(alloc);
911                             alloc.Free;
912                             asml.Remove(dealloc);
913                             dealloc.Free;
914                           end;
915 
916                         result:=RemoveCurrentP(p);
917 
918                         asml.remove(hp2);
919                         hp2.free;
920                       end
921                     { fold
922                       mov reg2,reg0
923                       mov reg3,reg1
924                       to
925                       movw reg2,reg0
926                     }
927                     else if (CPUAVR_HAS_MOVW in cpu_capabilities[current_settings.cputype]) and
928                        (taicpu(p).ops=2) and
929                        (taicpu(p).oper[0]^.typ = top_reg) and
930                        (taicpu(p).oper[1]^.typ = top_reg) and
931                        getnextinstruction(p,hp1) and
932                        (hp1.typ = ait_instruction) and
933                        (taicpu(hp1).opcode = A_MOV) and
934                        (taicpu(hp1).ops=2) and
935                        (taicpu(hp1).oper[0]^.typ = top_reg) and
936                        (taicpu(hp1).oper[1]^.typ = top_reg) and
937                        (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(p).oper[0]^.reg)+1) and
938                        ((getsupreg(taicpu(p).oper[0]^.reg) mod 2)=0) and
939                        ((getsupreg(taicpu(p).oper[1]^.reg) mod 2)=0) and
940                        (getsupreg(taicpu(hp1).oper[1]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)+1) then
941                       begin
942                         DebugMsg('Peephole MovMov2Movw performed', p);
943 
944                         alloc:=FindRegAllocBackward(taicpu(hp1).oper[0]^.reg,tai(hp1.Previous));
945                         if assigned(alloc) then
946                           begin
947                             asml.Remove(alloc);
948                             asml.InsertBefore(alloc,p);
949                             { proper book keeping of currently used registers }
950                             IncludeRegInUsedRegs(taicpu(hp1).oper[0]^.reg,UsedRegs);
951                           end;
952 
953                         taicpu(p).opcode:=A_MOVW;
954                         asml.remove(hp1);
955                         hp1.free;
956                         result:=true;
957                       end
958                     {
959                       This removes the first mov from
960                       mov rX,...
961                       mov rX,...
962                     }
963                     else if GetNextInstruction(p,hp1) and MatchInstruction(hp1,A_MOV) then
964                       while MatchInstruction(hp1,A_MOV) and
965                             MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
966                             { don't remove the first mov if the second is a mov rX,rX }
967                             not(MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp1).oper[1]^)) do
968                         begin
969                           DebugMsg('Peephole MovMov2Mov performed', p);
970 
971                           result:=RemoveCurrentP(p);
972 
973                           GetNextInstruction(hp1,hp1);
974                           if not assigned(hp1) then
975                             break;
976                         end;
977                   end;
978                 A_SBIC,
979                 A_SBIS:
980                   begin
981                     {
982                       Turn
983                           sbic/sbis X, y
984                           jmp .L1
985                           op
986                         .L1:
987 
988                       into
989                           sbis/sbic X,y
990                           op
991                         .L1:
992                     }
993                     if GetNextInstruction(p, hp1) and
994                        (hp1.typ=ait_instruction) and
995                        (taicpu(hp1).opcode in [A_JMP,A_RJMP]) and
996                        (taicpu(hp1).ops>0) and
997                        (taicpu(hp1).oper[0]^.typ = top_ref) and
998                        (taicpu(hp1).oper[0]^.ref^.symbol is TAsmLabel) and
999                        GetNextInstruction(hp1, hp2) and
1000                        (hp2.typ=ait_instruction) and
1001                        (not taicpu(hp2).is_jmp) and
1002                        GetNextInstruction(hp2, hp3) and
1003                        (hp3.typ=ait_label) and
1004                        (taicpu(hp1).oper[0]^.ref^.symbol=tai_label(hp3).labsym) then
1005                       begin
1006                         DebugMsg('Peephole SbiJmp2Sbi performed',p);
1007 
1008                         if taicpu(p).opcode=A_SBIC then
1009                           taicpu(p).opcode:=A_SBIS
1010                         else
1011                           taicpu(p).opcode:=A_SBIC;
1012 
1013                         tai_label(hp3).labsym.decrefs;
1014 
1015                         AsmL.remove(hp1);
1016                         taicpu(hp1).Free;
1017 
1018                         result:=true;
1019                       end
1020                     {
1021                       Turn
1022                           sbiX X, y
1023                           jmp .L1
1024                           jmp .L2
1025                         .L1:
1026                           op
1027                         .L2:
1028 
1029                       into
1030                           sbiX X,y
1031                         .L1:
1032                           op
1033                         .L2:
1034                     }
1035                     else if GetNextInstruction(p, hp1) and
1036                        (hp1.typ=ait_instruction) and
1037                        (taicpu(hp1).opcode in [A_JMP,A_RJMP]) and
1038                        (taicpu(hp1).ops>0) and
1039                        (taicpu(hp1).oper[0]^.typ = top_ref) and
1040                        (taicpu(hp1).oper[0]^.ref^.symbol is TAsmLabel) and
1041 
1042                        GetNextInstruction(hp1, hp2) and
1043                        (hp2.typ=ait_instruction) and
1044                        (taicpu(hp2).opcode in [A_JMP,A_RJMP]) and
1045                        (taicpu(hp2).ops>0) and
1046                        (taicpu(hp2).oper[0]^.typ = top_ref) and
1047                        (taicpu(hp2).oper[0]^.ref^.symbol is TAsmLabel) and
1048 
1049                        GetNextInstruction(hp2, hp3) and
1050                        (hp3.typ=ait_label) and
1051                        (taicpu(hp1).oper[0]^.ref^.symbol=tai_label(hp3).labsym) and
1052 
1053                        GetNextInstruction(hp3, hp4) and
1054                        (hp4.typ=ait_instruction) and
1055 
1056                        GetNextInstruction(hp4, hp5) and
1057                        (hp3.typ=ait_label) and
1058                        (taicpu(hp2).oper[0]^.ref^.symbol=tai_label(hp5).labsym) then
1059                       begin
1060                         DebugMsg('Peephole SbiJmpJmp2Sbi performed',p);
1061 
1062                         tai_label(hp3).labsym.decrefs;
1063                         tai_label(hp5).labsym.decrefs;
1064 
1065                         AsmL.remove(hp1);
1066                         taicpu(hp1).Free;
1067 
1068                         AsmL.remove(hp2);
1069                         taicpu(hp2).Free;
1070 
1071                         result:=true;
1072                       end;
1073                   end;
1074               end;
1075           end;
1076       end;
1077     end;
1078 
1079 
1080   procedure TCpuAsmOptimizer.PeepHoleOptPass2;
1081     begin
1082     end;
1083 
1084 begin
1085   casmoptimizer:=TCpuAsmOptimizer;
1086 End.
1087 
1088