1 {
2     Copyright (c) 1998-2002 by Florian Klaempfl
3 
4     This unit implements the x86 specific class for the register
5     allocator
6 
7     This program is free software; you can redistribute it and/or modify
8     it under the terms of the GNU General Public License as published by
9     the Free Software Foundation; either version 2 of the License, or
10     (at your option) any later version.
11 
12     This program is distributed in the hope that it will be useful,
13     but WITHOUT ANY WARRANTY; without even the implied warranty of
14     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15     GNU General Public License for more details.
16 
17     You should have received a copy of the GNU General Public License
18     along with this program; if not, write to the Free Software
19     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 
21  ****************************************************************************
22 }
23 
24 unit rgx86;
25 
26 {$i fpcdefs.inc}
27 
28   interface
29 
30     uses
31       cpubase,cgbase,cgutils,
32       aasmtai,aasmdata,aasmsym,aasmcpu,
33       rgobj;
34 
35     type
36        trgx86 = class(trgobj)
get_spill_subregnull37          function  get_spill_subreg(r : tregister) : tsubregister;override;
do_spill_replacenull38          function  do_spill_replace(list:TAsmList;instr:tai_cpu_abstract_sym;orgreg:tsuperregister;const spilltemp:treference):boolean;override;
39        end;
40 
41        tpushedsavedloc = record
42          case byte of
43            0: (pushed: boolean);
44            1: (ofs: longint);
45        end;
46 
47        tpushedsavedfpu = array[tsuperregister] of tpushedsavedloc;
48 
49        trgx86fpu = class
50           { these counters contain the number of elements in the }
51           { unusedregsxxx/usableregsxxx sets                     }
52           countunusedregsfpu : byte;
53 
54           { Contains the registers which are really used by the proc itself.
55             It doesn't take care of registers used by called procedures
56           }
57           used_in_proc : tcpuregisterset;
58 
59           {reg_pushes_other : regvarother_longintarray;
60           is_reg_var_other : regvarother_booleanarray;
61           regvar_loaded_other : regvarother_booleanarray;}
62 
63           fpuvaroffset : byte;
64 
65           constructor create;
66 
getregisterfpunull67           function getregisterfpu(list: TAsmList) : tregister;
68           procedure ungetregisterfpu(list: TAsmList; r : tregister);
69 
70           { pushes and restores registers }
71           procedure saveusedfpuregisters(list:TAsmList;
72                                          var saved:Tpushedsavedfpu;
73                                          const s:Tcpuregisterset);
74           procedure restoreusedfpuregisters(list:TAsmList;
75                                             const saved:Tpushedsavedfpu);
76 
77           { corrects the fpu stack register by ofs }
correct_fpuregisternull78           function correct_fpuregister(r : tregister;ofs : byte) : tregister;
79        end;
80 
81 
82 implementation
83 
84     uses
85        verbose;
86 
87     const
88        { This value is used in tsaved. If the array value is equal
89          to this, then this means that this register is not used.}
90        reg_not_saved = $7fffffff;
91 
92 
93 {******************************************************************************
94                                     Trgcpu
95 ******************************************************************************}
96 
trgx86.get_spill_subregnull97     function trgx86.get_spill_subreg(r : tregister) : tsubregister;
98       begin
99         result:=getsubreg(r);
100       end;
101 
102 
103     { Decide wether a "replace" spill is possible, i.e. wether we can replace a register
104       in an instruction by a memory reference. For example, in "mov ireg26d,0", the imaginary
105       register ireg26d can be replaced by a memory reference.}
trgx86.do_spill_replacenull106     function trgx86.do_spill_replace(list:TAsmList;instr:tai_cpu_abstract_sym;orgreg:tsuperregister;const spilltemp:treference):boolean;
107 
108        { returns true if opcde is an avx opcode which allows only the first (zero) operand might be a memory reference }
avx_opcode_only_op0_may_be_memrefnull109        function avx_opcode_only_op0_may_be_memref(opcode : TAsmOp) : boolean;
110          begin
111            case opcode of
112              A_VMULSS,
113              A_VMULSD,
114              A_VSUBSS,
115              A_VSUBSD,
116              A_VADDSD,
117              A_VADDSS,
118              A_VDIVSD,
119              A_VDIVSS,
120              A_VSQRTSD,
121              A_VSQRTSS,
122              A_VCVTDQ2PD,
123              A_VCVTDQ2PS,
124              A_VCVTPD2DQ,
125              A_VCVTPD2PS,
126              A_VCVTPS2DQ,
127              A_VCVTPS2PD,
128              A_VCVTSD2SI,
129              A_VCVTSD2SS,
130              A_VCVTSI2SD,
131              A_VCVTSS2SD,
132              A_VCVTTPD2DQ,
133              A_VCVTTPS2DQ,
134              A_VCVTTSD2SI,
135              A_VCVTSI2SS,
136              A_VCVTSS2SI,
137              A_VCVTTSS2SI,
138              A_VXORPD,
139              A_VXORPS,
140              A_VORPD,
141              A_VORPS,
142              A_VANDPD,
143              A_VANDPS,
144              A_VUNPCKLPS,
145              A_VUNPCKHPS,
146              A_VSHUFPD:
147                result:=true;
148              else
149                result:=false;
150            end;
151          end;
152 
153 
154       var
155         n,replaceoper : longint;
156         is_subh: Boolean;
157       begin
158         result:=false;
159         with taicpu(instr) do
160           begin
161             replaceoper:=-1;
162             case ops of
163               1 :
164                 begin
165                   if (oper[0]^.typ=top_reg) and
166                      (getregtype(oper[0]^.reg)=regtype) then
167                     begin
168                       if get_alias(getsupreg(oper[0]^.reg))<>orgreg then
169                         internalerror(200410101);
170                       replaceoper:=0;
171                     end;
172                 end;
173               2,3 :
174                 begin
175                   { avx instruction?
176                     currently this rule is sufficient but it might be extended }
177                   if (ops=3) and (opcode<>A_SHRD) and (opcode<>A_SHLD) and (opcode<>A_IMUL) then
178                     begin
179                       { BMI shifting/rotating instructions have special requirements regarding spilling, only
180                         the middle operand can be replaced }
181                       if ((opcode=A_RORX) or (opcode=A_SHRX) or (opcode=A_SARX) or (opcode=A_SHLX)) then
182                         begin
183                           if (oper[1]^.typ=top_reg) and (getregtype(oper[1]^.reg)=regtype) and (get_alias(getsupreg(oper[1]^.reg))=orgreg) then
184                             replaceoper:=1;
185                         end
186                       { avx instructions allow only the first operand (at&t counting) to be a register operand
187                         all operands must be registers ... }
188                       else if (oper[0]^.typ=top_reg) and
189                          (oper[1]^.typ=top_reg) and
190                          (oper[2]^.typ=top_reg) and
191                          { but they must be different }
192                          ((getregtype(oper[1]^.reg)<>regtype) or
193                           (get_alias(getsupreg(oper[0]^.reg))<>get_alias(getsupreg(oper[1]^.reg)))
194                          ) and
195                          ((getregtype(oper[2]^.reg)<>regtype) or
196                           (get_alias(getsupreg(oper[0]^.reg))<>get_alias(getsupreg(oper[2]^.reg)))
197                          ) and
198                          (get_alias(getsupreg(oper[0]^.reg))=orgreg) then
199                         replaceoper:=0;
200                     end
201                   else
202                     begin
203                       { We can handle opcodes with 2 and 3-op imul/shrd/shld the same way, where the 3rd operand is const or CL,
204                         that doesn't need spilling.
205                         However, due to AT&T order inside the compiler, the 3rd operand is
206                         numbered 0, so look at operand no. 1 and 2 if we have 3 operands by
207                         adding a "n". }
208                       n:=0;
209                       if ops=3 then
210                         n:=1;
211                       { lea is tricky: part of operand 0 can be spilled and the instruction can converted into an
212                         add, if base or index shall be spilled and the other one is equal the destination }
213                       if (opcode=A_LEA) then
214                         begin
215                           if (oper[0]^.ref^.offset=0) and
216                              (oper[0]^.ref^.scalefactor in [0,1]) and
217                              (((getregtype(oper[0]^.ref^.base)=regtype) and
218                                (get_alias(getsupreg(oper[0]^.ref^.base))=orgreg) and
219                                (getregtype(oper[0]^.ref^.index)=getregtype(oper[1]^.reg)) and
220                                (get_alias(getsupreg(oper[0]^.ref^.index))=get_alias(getsupreg(oper[1]^.reg)))) or
221                               ((getregtype(oper[0]^.ref^.index)=regtype) and
222                                (get_alias(getsupreg(oper[0]^.ref^.index))=orgreg) and
223                                (getregtype(oper[0]^.ref^.base)=getregtype(oper[1]^.reg)) and
224                                (get_alias(getsupreg(oper[0]^.ref^.base))=get_alias(getsupreg(oper[1]^.reg))))
225                              ) then
226                              replaceoper:=0;
227                         end
228                       else if (oper[n+0]^.typ=top_reg) and
229                          (oper[n+1]^.typ=top_reg) and
230                          ((getregtype(oper[n+0]^.reg)<>regtype) or
231                           (getregtype(oper[n+1]^.reg)<>regtype) or
232                           (get_alias(getsupreg(oper[n+0]^.reg))<>get_alias(getsupreg(oper[n+1]^.reg)))) then
233                         begin
234                           if (getregtype(oper[n+0]^.reg)=regtype) and
235                              (get_alias(getsupreg(oper[n+0]^.reg))=orgreg) then
236                             replaceoper:=0+n
237                           else if (getregtype(oper[n+1]^.reg)=regtype) and
238                              (get_alias(getsupreg(oper[n+1]^.reg))=orgreg) then
239                             replaceoper:=1+n;
240                         end
241                       else if (oper[n+0]^.typ=top_reg) and
242                          (oper[n+1]^.typ=top_const) then
243                         begin
244                           if (getregtype(oper[0+n]^.reg)=regtype) and
245                              (get_alias(getsupreg(oper[0+n]^.reg))=orgreg) then
246                             replaceoper:=0+n
247                           else
248                             internalerror(200704282);
249                         end
250                       else if (oper[n+0]^.typ=top_const) and
251                          (oper[n+1]^.typ=top_reg) then
252                         begin
253                           if (getregtype(oper[1+n]^.reg)=regtype) and
254                              (get_alias(getsupreg(oper[1+n]^.reg))=orgreg) then
255                             replaceoper:=1+n
256                           else
257                             internalerror(200704283);
258                         end;
259                       case replaceoper of
260                         0 :
261                           begin
262                             { Some instructions don't allow memory references
263                               for source }
264                             case opcode of
265                               A_BT,
266                               A_BTS,
267                               A_BTC,
268                               A_BTR,
269 
270                               { shufp*/unpcklp* would require 16 byte alignment for memory locations so we force the source
271                                 operand into a register }
272                               A_SHUFPD,
273                               A_SHUFPS,
274                               A_UNPCKLPD,
275                               A_UNPCKLPS :
276                                 replaceoper:=-1;
277                             end;
278                           end;
279                         1 :
280                           begin
281                             { Some instructions don't allow memory references
282                               for destination }
283                             case opcode of
284                               A_CMOVcc,
285                               A_MOVZX,
286                               A_MOVSX,
287 {$ifdef x86_64}
288                               A_MOVSXD,
289 {$endif x86_64}
290                               A_MULSS,
291                               A_MULSD,
292                               A_SUBSS,
293                               A_SUBSD,
294                               A_ADDSD,
295                               A_ADDSS,
296                               A_DIVSD,
297                               A_DIVSS,
298                               A_SQRTSD,
299                               A_SQRTSS,
300                               A_SHLD,
301                               A_SHRD,
302                               A_COMISD,
303                               A_COMISS,
304                               A_CVTDQ2PD,
305                               A_CVTDQ2PS,
306                               A_CVTPD2DQ,
307                               A_CVTPD2PI,
308                               A_CVTPD2PS,
309                               A_CVTPI2PD,
310                               A_CVTPS2DQ,
311                               A_CVTPS2PD,
312                               A_CVTSD2SI,
313                               A_CVTSD2SS,
314                               A_CVTSI2SD,
315                               A_CVTSS2SD,
316                               A_CVTTPD2PI,
317                               A_CVTTPD2DQ,
318                               A_CVTTPS2DQ,
319                               A_CVTTSD2SI,
320                               A_CVTPI2PS,
321                               A_CVTPS2PI,
322                               A_CVTSI2SS,
323                               A_CVTSS2SI,
324                               A_CVTTPS2PI,
325                               A_CVTTSS2SI,
326                               A_XORPD,
327                               A_XORPS,
328                               A_ORPD,
329                               A_ORPS,
330                               A_ANDPD,
331                               A_ANDPS,
332                               A_UNPCKLPS,
333                               A_UNPCKHPS,
334                               A_SHUFPD,
335                               A_SHUFPS,
336                               A_VCOMISD,
337                               A_VCOMISS:
338                                 replaceoper:=-1;
339 
340                               A_IMUL:
341                                 if ops<>3 then
342                                   replaceoper:=-1;
343 {$ifdef x86_64}
344                               A_MOV:
345                                  { 64 bit constants can only be moved into registers }
346                                  if (oper[0]^.typ=top_const) and
347                                     (oper[1]^.typ=top_reg) and
348                                     ((oper[0]^.val<low(longint)) or
349                                      (oper[0]^.val>high(longint))) then
350                                    replaceoper:=-1;
351 {$endif x86_64}
352                               else
353                                 if avx_opcode_only_op0_may_be_memref(opcode) then
354                                   replaceoper:=-1;
355                             end;
356                           end;
357                         2 :
358                           begin
359                             { Some 3-op instructions don't allow memory references
360                               for destination }
361                             case instr.opcode of
362                               A_IMUL:
363                                 replaceoper:=-1;
364                               else
365                                 if avx_opcode_only_op0_may_be_memref(opcode) then
366                                   replaceoper:=-1;
367                             end;
368                           end;
369                       end;
370                     end;
371                 end;
372              end;
373 
374 {$ifdef x86_64}
375             { 32 bit operations on 32 bit registers on x86_64 can result in
376               zeroing the upper 32 bits of the register. This does not happen
377               with memory operations, so we have to perform these calculations
378               in registers.  }
379             if (opsize=S_L) then
380               replaceoper:=-1;
381 {$endif x86_64}
382 
383             { Replace register with spill reference }
384             if replaceoper<>-1 then
385               begin
386                 if opcode=A_LEA then
387                   begin
388                     opcode:=A_ADD;
389                     oper[0]^.ref^:=spilltemp;
390                   end
391                 else
392                   begin
393                     is_subh:=getsubreg(oper[replaceoper]^.reg)=R_SUBH;
394                     oper[replaceoper]^.typ:=top_ref;
395                     new(oper[replaceoper]^.ref);
396                     oper[replaceoper]^.ref^:=spilltemp;
397                     if is_subh then
398                       inc(oper[replaceoper]^.ref^.offset);
399                     { memory locations aren't guaranteed to be aligned }
400                     case opcode of
401                       A_MOVAPS:
402                         opcode:=A_MOVSS;
403                       A_MOVAPD:
404                         opcode:=A_MOVSD;
405                       A_VMOVAPS:
406                         opcode:=A_VMOVSS;
407                       A_VMOVAPD:
408                         opcode:=A_VMOVSD;
409                     end;
410                   end;
411                 result:=true;
412               end;
413           end;
414       end;
415 
416 
417 {******************************************************************************
418                                   Trgx86fpu
419 ******************************************************************************}
420 
421     constructor Trgx86fpu.create;
422       begin
423         used_in_proc:=[];
424       end;
425 
426 
trgx86fpu.getregisterfpunull427     function trgx86fpu.getregisterfpu(list: TAsmList) : tregister;
428       begin
429         { note: don't return R_ST0, see comments above implementation of }
430         { a_loadfpu_* methods in cgcpu (JM)                              }
431         result:=NR_ST;
432       end;
433 
434 
435     procedure trgx86fpu.ungetregisterfpu(list : TAsmList; r : tregister);
436       begin
437         { nothing to do, fpu stack management is handled by the load/ }
438         { store operations in cgcpu (JM)                              }
439       end;
440 
441 
442 
trgx86fpu.correct_fpuregisternull443     function trgx86fpu.correct_fpuregister(r : tregister;ofs : byte) : tregister;
444       begin
445         correct_fpuregister:=r;
446         setsupreg(correct_fpuregister,ofs);
447       end;
448 
449 
450     procedure trgx86fpu.saveusedfpuregisters(list: TAsmList;
451                                              var saved : tpushedsavedfpu;
452                                              const s: tcpuregisterset);
453      { var
454          r : tregister;
455          hr : treference; }
456       begin
457         used_in_proc:=used_in_proc+s;
458 
459 { TODO: firstsavefpureg}
460 (*
461         { don't try to save the fpu registers if not desired (e.g. for }
462         { the 80x86)                                                   }
463         if firstsavefpureg <> R_NO then
464           for r.enum:=firstsavefpureg to lastsavefpureg do
465             begin
466               saved[r.enum].ofs:=reg_not_saved;
467               { if the register is used by the calling subroutine and if }
468               { it's not a regvar (those are handled separately)         }
469               if not is_reg_var_other[r.enum] and
470                  (r.enum in s) and
471                  { and is present in use }
472                  not(r.enum in unusedregsfpu) then
473                 begin
474                   { then save it }
475                   tg.GetTemp(list,extended_size,tt_persistent,hr);
476                   saved[r.enum].ofs:=hr.offset;
477                   cg.a_loadfpu_reg_ref(list,OS_FLOAT,OS_FLOAT,r,hr);
478                   cg.a_reg_dealloc(list,r);
479                   include(unusedregsfpu,r.enum);
480                   inc(countunusedregsfpu);
481                 end;
482             end;
483 *)
484       end;
485 
486 
487     procedure trgx86fpu.restoreusedfpuregisters(list : TAsmList;
488                                                 const saved : tpushedsavedfpu);
489 {
490       var
491          r,r2 : tregister;
492          hr : treference;
493 }
494       begin
495 { TODO: firstsavefpureg}
496 (*
497         if firstsavefpureg <> R_NO then
498           for r.enum:=lastsavefpureg downto firstsavefpureg do
499             begin
500               if saved[r.enum].ofs <> reg_not_saved then
501                 begin
502                   r2.enum:=R_INTREGISTER;
503                   r2.number:=NR_FRAME_POINTER_REG;
504                   reference_reset_base(hr,r2,saved[r.enum].ofs);
505                   cg.a_reg_alloc(list,r);
506                   cg.a_loadfpu_ref_reg(list,OS_FLOAT,OS_FLOAT,hr,r);
507                   if not (r.enum in unusedregsfpu) then
508                     { internalerror(10)
509                       in n386cal we always save/restore the reg *state*
510                       using save/restoreunusedstate -> the current state
511                       may not be real (JM) }
512                   else
513                     begin
514                       dec(countunusedregsfpu);
515                       exclude(unusedregsfpu,r.enum);
516                     end;
517                   tg.UnGetTemp(list,hr);
518                 end;
519             end;
520 *)
521       end;
522 
523 (*
524     procedure Trgx86fpu.saveotherregvars(list: TAsmList; const s: totherregisterset);
525       var
526         r: Tregister;
527       begin
528         if not(cs_opt_regvar in current_settings.optimizerswitches) then
529           exit;
530         if firstsavefpureg <> NR_NO then
531           for r.enum := firstsavefpureg to lastsavefpureg do
532             if is_reg_var_other[r.enum] and
533                (r.enum in s) then
534               store_regvar(list,r);
535       end;
536 *)
537 
538 end.
539