1 {
2     Copyright (c) 1998-2004 by Jonas Maebe
3 
4     This unit calls the optimization procedures to optimize the assembler
5     code for sparc
6 
7     This program is free software; you can redistribute it and/or modify
8     it under the terms of the GNU General Public License as published by
9     the Free Software Foundation; either version 2 of the License, or
10     (at your option) any later version.
11 
12     This program is distributed in the hope that it will be useful,
13     but WITHOUT ANY WARRANTY; without even the implied warranty of
14     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15     GNU General Public License for more details.
16 
17     You should have received a copy of the GNU General Public License
18     along with this program; if not, write to the Free Software
19     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 
21  ****************************************************************************
22 }
23 
24 unit aoptcpu;
25 
26 {$i fpcdefs.inc}
27 
28 { $define DEBUG_AOPTCPU}
29 
30   Interface
31 
32     uses
33       cgbase, cpubase, aoptobj, aoptcpub, aopt, aasmtai, aasmcpu;
34 
35     Type
36       TAsmOpSet = set of TAsmOp;
37 
38       TCpuAsmOptimizer = class(TAsmOptimizer)
RegModifiedByInstructionnull39         function RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean; override;
GetNextInstructionUsingRegnull40         function GetNextInstructionUsingReg(Current: tai;
41           var Next: tai; reg: TRegister): Boolean;
TryRemoveMovnull42         function TryRemoveMov(var p: tai; opcode: TAsmOp): boolean;
TryRemoveMovToRefIndexnull43         function TryRemoveMovToRefIndex(var p: tai; next: taicpu): boolean;
TryRemoveMovBeforeStorenull44         function TryRemoveMovBeforeStore(var p: tai; next: taicpu; const storeops: TAsmOpSet): boolean;
PeepHoleOptPass1Cpunull45         function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
46         procedure PeepHoleOptPass2; override;
RegLoadedWithNewValuenull47         function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
InstructionLoadsFromRegnull48         function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;
49 
50         { outputs a debug message into the assembler file }
51         procedure DebugMsg(const s: string; p: tai);
52       End;
53 
54   Implementation
55 
56      uses
57        cutils,globtype,globals,aasmbase,cpuinfo,verbose;
58 
59 
MatchInstructionnull60   function MatchInstruction(const instr: tai; const op: TAsmOp): boolean;
61     begin
62       result :=
63         (instr.typ = ait_instruction) and
64         (taicpu(instr).opcode = op);
65     end;
66 
67 
MatchOperandnull68   function MatchOperand(const oper: TOper; reg: TRegister): boolean;
69     begin
70       result:=(oper.typ=top_reg) and (oper.reg=reg);
71     end;
72 
73 
IsSameRegnull74   function IsSameReg(this,next: taicpu): boolean;
75     begin
76       result:=(next.oper[0]^.typ=top_reg) and
77         (next.oper[1]^.typ=top_reg) and
78         (next.oper[0]^.reg=next.oper[1]^.reg) and
79         (next.oper[0]^.reg=this.oper[0]^.reg);
80     end;
81 
82 
CanBeCMOVnull83   function CanBeCMOV(p: tai; condreg: tregister): boolean;
84     begin
85       result:=assigned(p) and (p.typ=ait_instruction) and
86         ((taicpu(p).opcode in [A_MOV_D,A_MOV_S]) or
87         (
88           { register with condition must not be overwritten }
89           (taicpu(p).opcode=A_MOVE) and
90           (taicpu(p).oper[0]^.reg<>condreg)
91         ));
92     end;
93 
94 
95   procedure ChangeToCMOV(p: taicpu; cond: tasmcond; reg: tregister);
96     begin
97       case cond of
98         C_COP1TRUE:
99           case p.opcode of
100             A_MOV_D: p.opcode:=A_MOVT_D;
101             A_MOV_S: p.opcode:=A_MOVT_S;
102             A_MOVE:  p.opcode:=A_MOVT;
103           else
104             InternalError(2014061701);
105           end;
106         C_COP1FALSE:
107           case p.opcode of
108             A_MOV_D: p.opcode:=A_MOVF_D;
109             A_MOV_S: p.opcode:=A_MOVF_S;
110             A_MOVE:  p.opcode:=A_MOVF;
111           else
112             InternalError(2014061702);
113           end;
114         C_EQ:
115           case p.opcode of
116             A_MOV_D: p.opcode:=A_MOVZ_D;
117             A_MOV_S: p.opcode:=A_MOVZ_S;
118             A_MOVE:  p.opcode:=A_MOVZ;
119           else
120             InternalError(2014061703);
121           end;
122         C_NE:
123           case p.opcode of
124             A_MOV_D: p.opcode:=A_MOVN_D;
125             A_MOV_S: p.opcode:=A_MOVN_S;
126             A_MOVE:  p.opcode:=A_MOVN;
127           else
128             InternalError(2014061704);
129           end;
130       else
131         InternalError(2014061705);
132       end;
133       p.ops:=3;
134       p.loadreg(2,reg);
135     end;
136 
137 
138 {$ifdef DEBUG_AOPTCPU}
139   procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
140     begin
141       asml.insertbefore(tai_comment.Create(strpnew(s)), p);
142     end;
143 {$else DEBUG_AOPTCPU}
144   procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
145     begin
146     end;
147 {$endif DEBUG_AOPTCPU}
148 
149 
TCpuAsmOptimizer.InstructionLoadsFromRegnull150  function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
151     var
152       p: taicpu;
153       i: longint;
154     begin
155       result:=false;
156       if not (assigned(hp) and (hp.typ=ait_instruction)) then
157         exit;
158       p:=taicpu(hp);
159 
160       i:=0;
161       while(i<p.ops) do
162         begin
163           case p.oper[I]^.typ of
164             top_reg:
165               result:=(p.oper[I]^.reg=reg) and (p.spilling_get_operation_type(I)<>operand_write);
166             top_ref:
167               result:=
168                 (p.oper[I]^.ref^.base=reg) or
169                 (p.oper[I]^.ref^.index=reg);
170           end;
171           if result then exit; {Bailout if we found something}
172           Inc(I);
173         end;
174     end;
175 
176 
TCpuAsmOptimizer.RegLoadedWithNewValuenull177   function TCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
178     var
179       p: taicpu;
180     begin
181       p:=taicpu(hp);
182       result:=false;
183       if not ((assigned(hp)) and (hp.typ=ait_instruction)) then
184         exit;
185 
186       case p.opcode of
187         { These instructions do not write into a register at all }
188         A_NOP,
189         A_C_EQ_D,A_C_EQ_S,A_C_LE_D,A_C_LE_S,A_C_LT_D,A_C_LT_S,
190         A_BA,A_BC,
191         A_SB,A_SH,A_SW,A_SWL,A_SWR,A_SWC1,A_SDC1:
192           exit;
193       end;
194 
195       result:=(p.ops>0) and (p.oper[0]^.typ=top_reg) and
196         (p.oper[0]^.reg=reg);
197     end;
198 
199 
TCpuAsmOptimizer.RegModifiedByInstructionnull200   function TCpuAsmOptimizer.RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean;
201     var
202       i : Longint;
203     begin
204       result:=false;
205       for i:=0 to taicpu(p1).ops-1 do
206         if (taicpu(p1).oper[i]^.typ=top_reg) and (taicpu(p1).oper[i]^.reg=Reg) and (taicpu(p1).spilling_get_operation_type(i) in [operand_write,operand_readwrite]) then
207           begin
208             result:=true;
209             exit;
210           end;
211     end;
212 
213 
TCpuAsmOptimizer.GetNextInstructionUsingRegnull214   function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
215     var Next: tai; reg: TRegister): Boolean;
216     begin
217       Next:=Current;
218       repeat
219         Result:=GetNextInstruction(Next,Next);
220       until {not(cs_opt_level3 in current_settings.optimizerswitches) or} not(Result) or (Next.typ<>ait_instruction) or (RegInInstruction(reg,Next)) or
221         (is_calljmp(taicpu(Next).opcode));
222     end;
223 
224 
TCpuAsmOptimizer.TryRemoveMovnull225   function TCpuAsmOptimizer.TryRemoveMov(var p: tai; opcode: TAsmOp): boolean;
226     var
227       next,hp1: tai;
228       alloc,dealloc: tai_regalloc;
229     begin
230       { Fold
231           op      $reg1,...
232           opcode  $reg2,$reg1
233           dealloc $reg1
234         into
235           op   $reg2,...
236         opcode may be A_MOVE, A_MOV_s, A_MOV_d, etc.
237       }
238       result:=false;
239       if (taicpu(p).ops>0) and
240          GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and
241          MatchInstruction(next,opcode) and
242          MatchOperand(taicpu(next).oper[1]^,taicpu(p).oper[0]^.reg) and
243          { the destination register of mov cannot be used between p and next }
244          (not RegUsedBetween(taicpu(next).oper[0]^.reg,p,next)) then
245 
246         begin
247           dealloc:=FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.Next));
248           if assigned(dealloc) then
249             begin
250               { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
251                 and remove it if possible }
252               GetLastInstruction(p,hp1);
253 
254               asml.Remove(dealloc);
255               alloc:=FindRegAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
256               if assigned(alloc) then
257                 begin
258                   asml.Remove(alloc);
259                   alloc.free;
260                   dealloc.free;
261                 end
262               else
263                 asml.InsertAfter(dealloc,p);
264 
265               { try to move the allocation of the target register }
266               GetLastInstruction(next,hp1);
267               alloc:=FindRegAlloc(taicpu(next).oper[0]^.reg,tai(hp1.Next));
268               if assigned(alloc) then
269                 begin
270                   asml.Remove(alloc);
271                   asml.InsertBefore(alloc,p);
272                   { adjust used regs }
273                   IncludeRegInUsedRegs(taicpu(next).oper[0]^.reg,UsedRegs);
274                 end;
275 
276               { finally get rid of the mov }
277               taicpu(p).loadreg(0,taicpu(next).oper[0]^.reg);
278               DebugMsg('Peephole: Move removed 1',next);
279               asml.remove(next);
280               next.free;
281               result:=true;
282             end
283           else       // no dealloc found
284             begin
285               { try to optimize the typical call sequence
286                 lw  $reg, (whatever)
287                 <alloc volatile registers (including $reg!!)>
288                 move $t9,$reg
289                 jalr $t9
290 
291                 if $reg is nonvolatile, its value may be used after call
292                 and we cannot safely replace it with $t9 }
293               if (opcode=A_MOVE) and
294                  (taicpu(next).oper[0]^.reg=NR_R25) and
295                  GetNextInstruction(next,hp1) and
296                  MatchInstruction(hp1,A_JALR) and
297                  MatchOperand(taicpu(hp1).oper[0]^,NR_R25) and
298                  assigned(FindRegAlloc(taicpu(p).oper[0]^.reg,tai(p.next))) then
299                 begin
300                   taicpu(p).loadreg(0,taicpu(next).oper[0]^.reg);
301                   DebugMsg('Peephole: Move removed 2',next);
302                   asml.remove(next);
303                   next.free;
304                   result:=true;
305                 end;
306             end;
307         end;
308     end;
309 
310 
TCpuAsmOptimizer.TryRemoveMovBeforeStorenull311   function TCpuAsmOptimizer.TryRemoveMovBeforeStore(var p: tai; next: taicpu; const storeops: TAsmOpSet): boolean;
312     begin
313       result:=(next.opcode in storeops) and
314         MatchOperand(next.oper[0]^,taicpu(p).oper[0]^.reg) and
315         { Ry cannot be modified between move and store }
316         (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,next)) and
317         Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next)));
318       if result then
319         begin
320           next.loadreg(0,taicpu(p).oper[1]^.reg);
321           DebugMsg('Peephole: Move removed 3',p);
322           asml.remove(p);
323           p.free;
324           p:=next;
325         end;
326     end;
327 
328 
TCpuAsmOptimizer.TryRemoveMovToRefIndexnull329   function TCpuAsmOptimizer.TryRemoveMovToRefIndex(var p: tai; next: taicpu): boolean;
330     begin
331       result:=(next.ops>1) and
332         (next.oper[1]^.typ=top_ref) and
333         (next.oper[1]^.ref^.refaddr<>addr_full) and
334         (next.oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
335         (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,next)) and
336         Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next)));
337       if result then
338         begin
339           next.oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;
340           DebugMsg('Peephole: Move removed 4',p);
341           asml.remove(p);
342           p.free;
343           p:=next;
344         end;
345     end;
346 
347 
TCpuAsmOptimizer.PeepHoleOptPass1Cpunull348   function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
349     var
350       next,next2: tai;
351     begin
352       result:=false;
353       case p.typ of
354         ait_instruction:
355           begin
356             case taicpu(p).opcode of
357               A_BC:
358                 begin
359                   { BEQ/BNE with same register are bogus, but can be generated for code like
360                    "if lo(qwordvar)=cardinal(qwordvar) ...",
361                     optimizations below can also yield them, e.g. if one register was initially R0. }
362                   if (taicpu(p).condition in [C_EQ,C_NE]) and
363                     (taicpu(p).oper[0]^.reg=taicpu(p).oper[1]^.reg) then
364                     begin
365                       if (taicpu(p).condition=C_NE) then
366                         begin
367                           if (taicpu(p).oper[2]^.typ = top_ref) and
368                             (taicpu(p).oper[2]^.ref^.symbol is TAsmLabel) then
369                             TAsmLabel(taicpu(p).oper[2]^.ref^.symbol).decrefs;
370                           RemoveDelaySlot(p);
371                           GetNextInstruction(p,next);
372                         end
373                       else
374                         begin
375                           next:=taicpu.op_sym(A_BA,taicpu(p).oper[2]^.ref^.symbol);
376                           taicpu(next).fileinfo:=taicpu(p).fileinfo;
377                           asml.insertbefore(next,p);
378                         end;
379                       asml.remove(p);
380                       p.Free;
381                       p:=next;
382                       result:=true;
383                     end;
384                 end;
385 
386               A_SEH:
387                 begin
388                   if GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and
389                     MatchInstruction(next,A_SH) and
390                     MatchOperand(taicpu(next).oper[0]^,taicpu(p).oper[0]^.reg) and
391                     (not RegUsedBetween(taicpu(p).oper[1]^.reg,p,next)) and
392                     Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))) then
393                     begin
394                       taicpu(next).loadreg(0,taicpu(p).oper[1]^.reg);
395                       asml.remove(p);
396                       p.free;
397                       p:=next;
398                       result:=true;
399                     end
400                   else
401                     result:=TryRemoveMov(p,A_MOVE);
402                 end;
403               A_SEB:
404                 { TODO: can be handled similar to A_SEH, but it's almost never encountered }
405                 result:=TryRemoveMov(p,A_MOVE);
406 
407               A_SLL:
408                 begin
409                   { if this is a sign extension... }
410                   if (taicpu(p).oper[2]^.typ=top_const) and
411                     GetNextInstruction(p,next) and
412                     MatchInstruction(next,A_SRA) and
413                     IsSameReg(taicpu(p),taicpu(next)) and
414                     (taicpu(next).oper[2]^.typ=top_const) and
415                     (taicpu(next).oper[2]^.val=taicpu(p).oper[2]^.val) and
416                     (taicpu(next).oper[2]^.val=16) and
417                     { ...followed by 16-bit store (possibly with PIC simplification, etc. in between) }
418                     GetNextInstructionUsingReg(next,next2,taicpu(p).oper[0]^.reg) and
419                     MatchInstruction(next2,A_SH) and
420                     (taicpu(next2).oper[0]^.typ=top_reg) and
421                     (taicpu(next2).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
422                     { the initial register may not be reused }
423                     (not RegUsedBetween(taicpu(p).oper[1]^.reg,next,next2)) then
424                     begin
425                       if Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next2.next))) then
426                         begin
427                           taicpu(next2).loadreg(0,taicpu(p).oper[1]^.reg);
428                           asml.remove(p);
429                           asml.remove(next);
430                           p.free;
431                           next.free;
432                           p:=next2;
433                           result:=true;
434                         end;
435                     end
436                   else
437                     result:=TryRemoveMov(p,A_MOVE);
438                 end;
439 
440               A_SRL:
441                 begin
442                   { TODO: also kill sign-extensions that follow, both SLL+SRA and SEB/SEH versions }
443                   { Remove 'andi' in sequences
444                       srl   Rx,Ry,16
445                       andi  Rx,Rx,65535
446 
447                       srl   Rx,Ry,24
448                       andi  Rx,Rx,255
449                     since 'srl' clears all relevant upper bits }
450                   if (taicpu(p).oper[2]^.typ=top_const) and
451                     GetNextInstruction(p,next) and
452                     MatchInstruction(next,A_ANDI) and
453                     IsSameReg(taicpu(p),taicpu(next)) and
454                     (taicpu(next).oper[2]^.typ=top_const) and
455                     ((
456                       (taicpu(p).oper[2]^.val>=16) and
457                       (taicpu(next).oper[2]^.val=65535)
458                     ) or (
459                       (taicpu(p).oper[2]^.val>=24) and
460                       (taicpu(next).oper[2]^.val=255)
461                     )) then
462                     begin
463                       asml.remove(next);
464                       next.free;
465                       result:=true;
466                     end
467                   else
468                     result:=TryRemoveMov(p,A_MOVE);
469                 end;
470 
471               A_ANDI:
472                 begin
473                   { Remove sign extension after 'andi' if bit 7/15 of const operand is clear }
474                   if (taicpu(p).oper[2]^.typ=top_const) and
475                     GetNextInstruction(p,next) and
476                     MatchInstruction(next,A_SLL) and
477                     GetNextInstruction(next,next2) and
478                     MatchInstruction(next2,A_SRA) and
479                     IsSameReg(taicpu(p),taicpu(next)) and
480                     IsSameReg(taicpu(p),taicpu(next2)) and
481                     (taicpu(next).oper[2]^.typ=top_const) and
482                     (taicpu(next2).oper[2]^.typ=top_const) and
483                     (taicpu(next).oper[2]^.val=taicpu(next2).oper[2]^.val) and
484                     ((
485                       (taicpu(p).oper[2]^.val<=$7fff) and
486                       (taicpu(next).oper[2]^.val=16)
487                     ) or (
488                       (taicpu(p).oper[2]^.val<=$7f) and
489                       (taicpu(next).oper[2]^.val=24)
490                     )) then
491                     begin
492                       asml.remove(next);
493                       asml.remove(next2);
494                       next.free;
495                       next2.free;
496                       result:=true;
497                     end
498                   { Remove zero extension if register is used only for byte/word memory store }
499                   else if (taicpu(p).oper[2]^.typ=top_const) and
500                     GetNextInstruction(p,next) and
501                     ((taicpu(p).oper[2]^.val=255) and MatchInstruction(next,A_SB)) or
502                     ((taicpu(p).oper[2]^.val=65535) and MatchInstruction(next,A_SH)) and
503                     (taicpu(next).oper[0]^.typ=top_reg) and
504                     (taicpu(next).oper[0]^.reg=taicpu(p).oper[0]^.reg) and
505                     assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))) then
506                     begin
507                       taicpu(next).loadreg(0,taicpu(p).oper[1]^.reg);
508                       asml.remove(p);
509                       p.free;
510                       p:=next;
511                       result:=true;
512                     end
513                   else
514                     result:=TryRemoveMov(p,A_MOVE);
515                 end;
516 
517               A_MOV_S:
518                 begin
519                   if GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and
520                      (next.typ=ait_instruction) then
521                     begin
522                       if TryRemoveMovBeforeStore(p,taicpu(next),[A_SWC1]) then
523                         result:=true;
524                     end;
525                 end;
526 
527               A_MOV_D:
528                 begin
529                   if GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and
530                      (next.typ=ait_instruction) then
531                     begin
532                       if TryRemoveMovBeforeStore(p,taicpu(next),[A_SDC1]) then
533                         result:=true;
534                     end;
535                 end;
536 
537               A_MOVE:
538                 begin
539                   if GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and
540                     (next.typ=ait_instruction) and
541                     (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,next)) then
542                     begin
543                       { MOVE  Rx,Ry; store Rx,(ref); dealloc Rx   ==> store Ry,(ref) }
544                       if TryRemoveMovBeforeStore(p,taicpu(next),[A_SB,A_SH,A_SW]) then
545                         result:=true
546                       else if TryRemoveMovToRefIndex(p,taicpu(next)) then
547                         result:=true
548                       { MOVE  Rx,Ry; opcode  Rx,Rx,any              ==> opcode Rx,Ry,any
549                         MOVE  Rx,Ry; opcode  Rx,Rz,Rx               ==> opcode Rx,Rz,Ry   }
550                       else if (taicpu(next).opcode in [A_ADD,A_ADDU,A_ADDI,A_ADDIU,A_SUB,A_SUBU,A_AND,A_ANDI,A_SLLV,A_SRLV,A_SRAV]) and
551                          MatchOperand(taicpu(next).oper[0]^,taicpu(p).oper[0]^.reg) then
552                         begin
553                           if MatchOperand(taicpu(next).oper[1]^,taicpu(p).oper[0]^.reg) then
554                             begin
555                               taicpu(next).loadreg(1,taicpu(p).oper[1]^.reg);
556                               asml.remove(p);
557                               p.free;
558                               p:=next;
559                               result:=true;
560                             end
561                           { TODO: if Ry=NR_R0, this effectively changes instruction into MOVE,
562                             providing further optimization possibilities }
563                           else if MatchOperand(taicpu(next).oper[2]^,taicpu(p).oper[0]^.reg) then
564                             begin
565                               taicpu(next).loadreg(2,taicpu(p).oper[1]^.reg);
566                               asml.remove(p);
567                               p.free;
568                               p:=next;
569                               result:=true;
570                             end;
571                         end
572                       { MOVE  Rx,Ry; opcode Rz,Rx,any; dealloc Rx  ==> opcode Rz,Ry,any }
573                       else if (taicpu(next).opcode in [A_ADD,A_ADDU,A_ADDI,A_ADDIU,A_SUB,A_SUBU,A_SLT,A_SLTU,A_DIV,A_DIVU,
574                                                        A_SLL,A_SRL,A_SRA,A_SLLV,A_SRLV,A_SRAV,A_AND,A_ANDI,A_OR,A_ORI,A_XOR,A_XORI]) and
575                          Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))) then
576                         begin
577                           if MatchOperand(taicpu(next).oper[1]^,taicpu(p).oper[0]^.reg) then
578                             begin
579                               taicpu(next).loadreg(1,taicpu(p).oper[1]^.reg);
580                               asml.remove(p);
581                               p.free;
582                               p:=next;
583                               result:=true;
584                             end
585                           else if MatchOperand(taicpu(next).oper[2]^,taicpu(p).oper[0]^.reg) then
586                             begin
587                               taicpu(next).loadreg(2,taicpu(p).oper[1]^.reg);
588                               asml.remove(p);
589                               p.free;
590                               p:=next;
591                               result:=true;
592                             end;
593                         end
594                       { MULT[U] and cond.branches must be handled separately due to different operand numbers }
595                       else if (taicpu(next).opcode in [A_MULT,A_MULTU,A_BC]) and
596                          Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))) then
597                         begin
598                           if MatchOperand(taicpu(next).oper[0]^,taicpu(p).oper[0]^.reg) then
599                             begin
600                               taicpu(next).loadreg(0,taicpu(p).oper[1]^.reg);
601                               if MatchOperand(taicpu(next).oper[1]^,taicpu(p).oper[0]^.reg) then
602                                 taicpu(next).loadreg(1,taicpu(p).oper[1]^.reg);
603                               asml.remove(p);
604                               p.free;
605                               p:=next;
606                               result:=true;
607                             end
608                           else if MatchOperand(taicpu(next).oper[1]^,taicpu(p).oper[0]^.reg) then
609                             begin
610                               taicpu(next).loadreg(1,taicpu(p).oper[1]^.reg);
611                               if MatchOperand(taicpu(next).oper[0]^,taicpu(p).oper[0]^.reg) then
612                                 taicpu(next).loadreg(0,taicpu(p).oper[1]^.reg);
613                               asml.remove(p);
614                               p.free;
615                               p:=next;
616                               result:=true;
617                             end;
618                         end
619                       else if TryRemoveMov(p,A_MOVE) then
620                         begin
621                           { Ended up with move between same register? Suicide then. }
622                           if (taicpu(p).oper[0]^.reg=taicpu(p).oper[1]^.reg) then
623                             begin
624                               GetNextInstruction(p,next);
625                               asml.remove(p);
626                               p.free;
627                               p:=next;
628                               result:=true;
629                             end;
630                         end;
631                     end;
632                 end;
633 
634               A_ADDIU:
635                 begin
636                   { ADDIU  Rx,Ry,const;    load/store  Rz,(Rx); dealloc Rx  ==> load/store Rz,const(Ry)
637                     ADDIU  Rx,Ry,%lo(sym); load/store  Rz,(Rx); dealloc Rx  ==> load/store Rz,%lo(sym)(Ry)
638                     ADDIU  Rx,Ry,const;    load Rx,(Rx)                     ==> load Rx,const(Ry)
639                     ADDIU  Rx,Ry,%lo(sym); load Rx,(Rx)                     ==> load Rx,%lo(sym)(Ry)    }
640                   if GetNextInstructionUsingReg(p,next,taicpu(p).oper[0]^.reg) and
641                     (next.typ=ait_instruction) and
642                     (taicpu(next).opcode in [A_LB,A_LBU,A_LH,A_LHU,A_LW,A_SB,A_SH,A_SW]) and
643                     (taicpu(p).oper[0]^.reg=taicpu(next).oper[1]^.ref^.base) and
644                     (taicpu(next).oper[1]^.ref^.offset=0) and
645                     (taicpu(next).oper[1]^.ref^.symbol=nil) and
646                     (
647                       Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next))) or
648                       (
649                         (taicpu(p).oper[0]^.reg=taicpu(next).oper[0]^.reg) and
650                         (taicpu(next).opcode in [A_LB,A_LBU,A_LH,A_LHU,A_LW])
651                       )
652                     )  and
653                     (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,next)) then
654                     begin
655                       case taicpu(p).oper[2]^.typ of
656                         top_const:
657                           taicpu(next).oper[1]^.ref^.offset:=taicpu(p).oper[2]^.val;
658 
659                         top_ref:
660                           taicpu(next).oper[1]^.ref^:=taicpu(p).oper[2]^.ref^;
661                       else
662                         InternalError(2014100401);
663                       end;
664                       taicpu(next).oper[1]^.ref^.base:=taicpu(p).oper[1]^.reg;
665                       asml.remove(p);
666                       p.free;
667                       p:=next;
668                       result:=true;
669                     end
670                   else
671                     result:=TryRemoveMov(p,A_MOVE);
672                 end;
673 
674               A_ADD,A_ADDU,A_OR:
675                 begin
676                   if MatchOperand(taicpu(p).oper[1]^,NR_R0) then
677                     begin
678                       taicpu(p).freeop(1);
679                       taicpu(p).oper[1]:=taicpu(p).oper[2];
680                       taicpu(p).oper[2]:=nil;
681                       taicpu(p).ops:=2;
682                       taicpu(p).opercnt:=2;
683                       taicpu(p).opcode:=A_MOVE;
684                       result:=true;
685                     end
686                   else if MatchOperand(taicpu(p).oper[2]^,NR_R0) then
687                     begin
688                       taicpu(p).freeop(2);
689                       taicpu(p).ops:=2;
690                       taicpu(p).opercnt:=2;
691                       taicpu(p).opcode:=A_MOVE;
692                       result:=true;
693                     end
694                   else
695                     result:=TryRemoveMov(p,A_MOVE);
696                 end;
697 
698               A_LB,A_LBU,A_LH,A_LHU,A_LW,
699               A_ADDI,
700               A_SUB,A_SUBU,
701               A_SRA,A_SRAV,
702               A_SRLV,
703               A_SLLV,
704               A_MFLO,A_MFHI,
705               A_AND,A_XOR,A_ORI,A_XORI:
706                 result:=TryRemoveMov(p,A_MOVE);
707 
708               A_LWC1,
709               A_ADD_s, A_SUB_s, A_MUL_s, A_DIV_s,
710               A_ABS_s, A_NEG_s, A_SQRT_s,
711               A_CVT_s_w, A_CVT_s_l, A_CVT_s_d:
712                 result:=TryRemoveMov(p,A_MOV_s);
713 
714               A_LDC1,
715               A_ADD_d, A_SUB_d, A_MUL_d, A_DIV_d,
716               A_ABS_d, A_NEG_d, A_SQRT_d,
717               A_CVT_d_w, A_CVT_d_l, A_CVT_d_s:
718                 result:=TryRemoveMov(p,A_MOV_d);
719             end;
720           end;
721       end;
722     end;
723 
724 
725   procedure TCpuAsmOptimizer.PeepHoleOptPass2;
726     var
727       p: tai;
728       l: longint;
729       hp1,hp2,hp3: tai;
730       condition: tasmcond;
731       condreg: tregister;
732     begin
733       { Currently, everything below is mips4+ }
734       if (current_settings.cputype<cpu_mips4) then
735         exit;
736       p:=BlockStart;
737       ClearUsedRegs;
738       while (p<>BlockEnd) Do
739         begin
740           UpdateUsedRegs(tai(p.next));
741           case p.typ of
742             ait_instruction:
743               begin
744                 case taicpu(p).opcode of
745                   A_BC:
746                     begin
747                       condreg:=NR_NO;
748                       if (taicpu(p).condition in [C_COP1TRUE,C_COP1FALSE]) then
749                         { TODO: must be taken from "p" if/when codegen makes use of multiple %fcc }
750                         condreg:=NR_FCC0
751                       else if (taicpu(p).condition in [C_EQ,C_NE]) then
752                         begin
753                           if (taicpu(p).oper[0]^.reg=NR_R0) then
754                             condreg:=taicpu(p).oper[1]^.reg
755                           else if (taicpu(p).oper[1]^.reg=NR_R0) then
756                             condreg:=taicpu(p).oper[0]^.reg
757                         end;
758 
759                       if (condreg<>NR_NO) then
760                         begin
761                           { check for
762                               bCC   xxx
763                               <several movs>
764                           xxx:
765                           }
766                           l:=0;
767                           GetNextInstruction(p, hp1);
768                           while CanBeCMOV(hp1,condreg) do       // CanBeCMOV returns False for nil or labels
769                             begin
770                               inc(l);
771                               GetNextInstruction(hp1,hp1);
772                             end;
773                           if assigned(hp1) then
774                             begin
775                               if FindLabel(tasmlabel(taicpu(p).oper[taicpu(p).ops-1]^.ref^.symbol),hp1) then
776                                 begin
777                                   if (l<=4) and (l>0) then
778                                     begin
779                                       condition:=inverse_cond(taicpu(p).condition);
780                                       hp2:=p;
781                                       GetNextInstruction(p,hp1);
782                                       p:=hp1;
783                                       repeat
784                                         ChangeToCMOV(taicpu(hp1),condition,condreg);
785                                         GetNextInstruction(hp1,hp1);
786                                       until not CanBeCMOV(hp1,condreg);
787                                       { wait with removing else GetNextInstruction could
788                                         ignore the label if it was the only usage in the
789                                         jump moved away }
790                                       tasmlabel(taicpu(hp2).oper[taicpu(hp2).ops-1]^.ref^.symbol).decrefs;
791                                       RemoveDelaySlot(hp2);
792                                       asml.remove(hp2);
793                                       hp2.free;
794                                       continue;
795                                     end;
796                                 end
797                               else
798                                 begin
799                                   { check further for
800                                         bCC   xxx
801                                         <several movs 1>
802                                         b     yyy
803                                     xxx:
804                                         <several movs 2>
805                                     yyy:
806                                   }
807                                   { hp2 points to jmp yyy }
808                                   hp2:=hp1;
809                                   { skip hp1 to xxx }
810                                   GetNextInstruction(hp1, hp1);
811                                   if assigned(hp2) and
812                                     assigned(hp1) and
813                                     (l<=3) and
814                                     (hp2.typ=ait_instruction) and
815                                     (taicpu(hp2).opcode=A_BA) and
816                                     { real label and jump, no further references to the
817                                       label are allowed }
818                                     (tasmlabel(taicpu(p).oper[taicpu(p).ops-1]^.ref^.symbol).getrefs<=2) and
819                                     FindLabel(tasmlabel(taicpu(p).oper[taicpu(p).ops-1]^.ref^.symbol),hp1) then
820                                     begin
821                                       l:=0;
822                                       { skip hp1 to <several moves 2> }
823                                       GetNextInstruction(hp1, hp1);
824                                       while CanBeCMOV(hp1,condreg) do
825                                         begin
826                                           inc(l);
827                                           GetNextInstruction(hp1, hp1);
828                                         end;
829                                       { hp1 points to yyy: }
830                                       if assigned(hp1) and (l<=3) and
831                                         FindLabel(tasmlabel(taicpu(hp2).oper[taicpu(hp2).ops-1]^.ref^.symbol),hp1) then
832                                         begin
833                                           condition:=inverse_cond(taicpu(p).condition);
834                                           GetNextInstruction(p,hp1);
835                                           hp3:=p;
836                                           p:=hp1;
837                                           while CanBeCMOV(hp1,condreg) do
838                                             begin
839                                               ChangeToCMOV(taicpu(hp1),condition,condreg);
840                                               GetNextInstruction(hp1,hp1);
841                                             end;
842                                           { hp2 is still at b yyy }
843                                           GetNextInstruction(hp2,hp1);
844                                           { hp2 is now at xxx: }
845                                           condition:=inverse_cond(condition);
846                                           GetNextInstruction(hp1,hp1);
847                                           { hp1 is now at <several movs 2> }
848                                           while CanBeCMOV(hp1,condreg) do
849                                             begin
850                                               ChangeToCMOV(taicpu(hp1),condition,condreg);
851                                               GetNextInstruction(hp1,hp1);
852                                             end;
853                                           { remove bCC }
854                                           tasmlabel(taicpu(hp3).oper[taicpu(hp3).ops-1]^.ref^.symbol).decrefs;
855                                           RemoveDelaySlot(hp3);
856                                           asml.remove(hp3);
857                                           hp3.free;
858                                           { remove jmp }
859                                           tasmlabel(taicpu(hp2).oper[taicpu(hp2).ops-1]^.ref^.symbol).decrefs;
860                                           RemoveDelaySlot(hp2);
861                                           asml.remove(hp2);
862                                           hp2.free;
863                                           continue;
864                                         end;
865                                     end;
866                                 end;
867                             end;
868                         end;
869                     end;
870                 end;
871               end;
872           end;
873           UpdateUsedRegs(p);
874           p:=tai(p.next);
875         end;
876     end;
877 
878 begin
879   casmoptimizer:=TCpuAsmOptimizer;
880 end.
881