1 {
2     Copyright (c) 1998-2002 by Florian Klaempfl
3 
4     Generate SPARC assembler for math nodes
5 
6     This program is free software; you can redistribute it and/or modify
7     it under the terms of the GNU General Public License as published by
8     the Free Software Foundation; either version 2 of the License, or
9     (at your option) any later version.
10 
11     This program is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14     GNU General Public License for more details.
15 
16     You should have received a copy of the GNU General Public License
17     along with this program; if not, write to the Free Software
18     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 
20  ****************************************************************************
21 }
22 unit ncpumat;
23 
24 {$i fpcdefs.inc}
25 
26 interface
27 
28     uses
29       node,nmat,ncgmat;
30 
31     type
32       tSparcmoddivnode = class(tmoddivnode)
33         procedure pass_generate_code;override;
34 {$ifdef SPARC64}
use_moddiv64bitint_helpernull35         function use_moddiv64bitint_helper : boolean; override;
36 {$endif SPARC64}
37       end;
38 
39       tSparcshlshrnode = class(tcgshlshrnode)
40 {$ifndef SPARC64}
41          procedure second_64bit;override;
42          { everything will be handled in pass_2 }
first_shlshr64bitintnull43          function first_shlshr64bitint: tnode; override;
44 {$endif SPARC64}
45       end;
46 
47       tSparcnotnode = class(tcgnotnode)
48          procedure second_boolean;override;
49       end;
50 
51       tsparcunaryminusnode = class(tcgunaryminusnode)
52          procedure second_float; override;
53       end;
54 
55 implementation
56 
57     uses
58       globtype,systems,constexp,
59       cutils,verbose,globals,
60       symconst,symdef,
61       aasmbase,aasmcpu,aasmtai,aasmdata,
62       defutil,
63       cgbase,cgobj,hlcgobj,pass_2,procinfo,
64       ncon,
65       cpubase,
66       ncgutil,cgcpu,cgutils;
67 
68 {*****************************************************************************
69                              TSparcMODDIVNODE
70 *****************************************************************************}
71 
72 {$ifdef sparc64}
tSparcmoddivnode.use_moddiv64bitint_helpernull73     function tSparcmoddivnode.use_moddiv64bitint_helper: boolean;
74       begin
75         { sparc64 has no overflow checked 64 bit div }
76         result:=(is_64bitint(left.resultdef) or is_64bitint(right.resultdef)) and
77           (cs_check_overflow in current_settings.localswitches);
78       end;
79 
80 
81     procedure tSparcmoddivnode.pass_generate_code;
82       const
83         { 64 bit   signed  overflow }
84         divops: array[boolean, boolean, boolean] of tasmop =
85           (((A_UDIV,A_UDIVcc),(A_SDIV,A_SDIVcc)),
86            ((A_UDIVX,A_NOP),(A_SDIVX,A_NOP))
87           );
88       var
89          power      : longint;
90          op         : tasmop;
91          tmpreg,
92          numerator,
93          divider,
94          resultreg  : tregister;
95          overflowlabel : tasmlabel;
96          ai : taicpu;
97          no_overflow : boolean;
98       begin
99          secondpass(left);
100          secondpass(right);
101          location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
102          location.register:=cg.GetIntRegister(current_asmdata.CurrAsmList,OS_INT);
103 
104          { put numerator in register }
105          hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
106          numerator := left.location.register;
107          resultreg := location.register;
108 
109          if is_64bit(resultdef) then
110            begin
111              if (nodetype = divn) and
112                 (right.nodetype = ordconstn) and
113                 ispowerof2(tordconstnode(right).value.svalue,power) and
114                 (not (cs_check_overflow in current_settings.localswitches)) then
115                begin
116                  if is_signed(left.resultdef) Then
117                    begin
118                      tmpreg:=cg.GetIntRegister(current_asmdata.CurrAsmList,OS_INT);
119                      cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SAR,OS_INT,63,numerator,tmpreg);
120                      { if signed, tmpreg=right value-1, otherwise 0 }
121                      cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_AND,OS_INT,tordconstnode(right).value.svalue-1,tmpreg);
122                      { add to the left value }
123                      cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_ADD,OS_INT,numerator,tmpreg);
124                      cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SAR,OS_INT,aword(power),tmpreg,resultreg);
125                    end
126                  else
127                    cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SHR,OS_INT,aword(power),numerator,resultreg);
128                end
129              else
130                begin
131                  { load divider in a register if necessary }
132                  divider:=NR_NO;
133                  if (right.location.loc<>LOC_CONSTANT) or
134                     (right.location.value<simm13lo) or
135                     (right.location.value>simm13hi) then
136                    begin
137                      hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,
138                        right.resultdef,right.resultdef,true);
139                      divider:=right.location.register;
140                    end;
141 
142                  op := divops[true, is_signed(right.resultdef),
143                               cs_check_overflow in current_settings.localswitches];
144                  if op=A_NOP then
145                    { current_asmdata.CurrAsmList.concat(tai_comment.create(strpnew('Wrong code generated here'))); }
146                    begin
147                      no_overflow:=true;
148                      op:=divops[true,is_signed(right.resultdef),false];
149                    end
150                  else
151                    no_overflow:=false;
152                  if (divider<>NR_NO) then
153                    current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(op,numerator,divider,resultreg))
154                  else
155                    current_asmdata.CurrAsmList.concat(taicpu.op_reg_const_reg(op,numerator,right.location.value,resultreg));
156 
157                  if (nodetype = modn) then
158                    begin
159                      if not no_overflow then
160                        begin
161                          current_asmdata.getjumplabel(overflowlabel);
162                          ai:=taicpu.op_cond_sym(A_Bxx,C_VS,overflowlabel);
163                          ai.delayslot_annulled:=true;
164                          current_asmdata.CurrAsmList.concat(ai);
165                        end;
166                      current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_NOT,resultreg));
167                      if not no_overflow then
168                        cg.a_label(current_asmdata.CurrAsmList,overflowlabel);
169                      if (divider<>NR_NO) then
170                        current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_MULX,resultreg,divider,resultreg))
171                      else
172                        current_asmdata.CurrAsmList.concat(taicpu.op_reg_const_reg(A_MULX,resultreg,right.location.value,resultreg));
173                      current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_SUB,numerator,resultreg,resultreg));
174                    end;
175                end;
176            end
177          else
178            begin
179              if (nodetype = divn) and
180                 (right.nodetype = ordconstn) and
181                 ispowerof2(tordconstnode(right).value.svalue,power) and
182                 (not (cs_check_overflow in current_settings.localswitches)) then
183                begin
184                  if is_signed(left.resultdef) Then
185                    begin
186                      tmpreg:=cg.GetIntRegister(current_asmdata.CurrAsmList,OS_INT);
187                      cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SAR,OS_INT,31,numerator,tmpreg);
188                      { if signed, tmpreg=right value-1, otherwise 0 }
189                      cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_AND,OS_INT,tordconstnode(right).value.svalue-1,tmpreg);
190                      { add to the left value }
191                      cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_ADD,OS_INT,numerator,tmpreg);
192                      cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SAR,OS_INT,aword(power),tmpreg,resultreg);
193                    end
194                  else
195                    cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SHR,OS_INT,aword(power),numerator,resultreg);
196                end
197              else
198                begin
199                  { load divider in a register if necessary }
200                  divider:=NR_NO;
201                  if (right.location.loc<>LOC_CONSTANT) or
202                     (right.location.value<simm13lo) or
203                     (right.location.value>simm13hi) then
204                    begin
205                      hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,
206                        right.resultdef,right.resultdef,true);
207                      divider:=right.location.register;
208                    end;
209 
210                  { needs overflow checking, (-maxlongint-1) div (-1) overflows! }
211                  { And on Sparc, the only way to catch a div-by-0 is by checking  }
212                  { the overflow flag (JM)                                       }
213 
214                  { Fill %y with the -1 or 0 depending on the highest bit }
215                  if is_signed(left.resultdef) then
216                    begin
217                      tmpreg:=cg.GetIntRegister(current_asmdata.CurrAsmList,OS_INT);
218                      current_asmdata.CurrAsmList.concat(taicpu.op_reg_const_reg(A_SRA,numerator,31,tmpreg));
219                      current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MOV,tmpreg,NR_Y));
220                    end
221                  else
222                    current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MOV,NR_G0,NR_Y));
223                  { wait 3 instructions slots before we can read %y }
224                  current_asmdata.CurrAsmList.concat(taicpu.op_none(A_NOP));
225                  current_asmdata.CurrAsmList.concat(taicpu.op_none(A_NOP));
226                  current_asmdata.CurrAsmList.concat(taicpu.op_none(A_NOP));
227 
228                  op := divops[false, is_signed(right.resultdef),
229                               cs_check_overflow in current_settings.localswitches];
230                  if (divider<>NR_NO) then
231                    current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(op,numerator,divider,resultreg))
232                  else
233                    current_asmdata.CurrAsmList.concat(taicpu.op_reg_const_reg(op,numerator,right.location.value,resultreg));
234 
235                  if (nodetype = modn) then
236                    begin
237                      current_asmdata.getjumplabel(overflowlabel);
238                      ai:=taicpu.op_cond_sym(A_Bxx,C_VS,overflowlabel);
239                      ai.delayslot_annulled:=true;
240                      current_asmdata.CurrAsmList.concat(ai);
241                      current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_NOT,resultreg));
242                      cg.a_label(current_asmdata.CurrAsmList,overflowlabel);
243                      if (divider<>NR_NO) then
244                        current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_SMUL,resultreg,divider,resultreg))
245                      else
246                        current_asmdata.CurrAsmList.concat(taicpu.op_reg_const_reg(A_SMUL,resultreg,right.location.value,resultreg));
247                      current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_SUB,numerator,resultreg,resultreg));
248                    end;
249                end;
250            end;
251         { set result location }
252         location.loc:=LOC_REGISTER;
253         location.register:=resultreg;
254         cg.g_overflowcheck(current_asmdata.CurrAsmList,Location,resultdef);
255       end;
256 {$else sparc64}
257     procedure tSparcmoddivnode.pass_generate_code;
258       const
259                     { signed   overflow }
260         divops: array[boolean, boolean] of tasmop =
261           ((A_UDIV,A_UDIVcc),(A_SDIV,A_SDIVcc));
262       var
263          power      : longint;
264          op         : tasmop;
265          tmpreg,
266          numerator,
267          divider,
268          resultreg  : tregister;
269          overflowlabel : tasmlabel;
270          ai : taicpu;
271       begin
272          secondpass(left);
273          secondpass(right);
274          location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
275          location.register:=cg.GetIntRegister(current_asmdata.CurrAsmList,OS_INT);
276 
277          { put numerator in register }
278          hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
279          numerator := left.location.register;
280          resultreg := location.register;
281 
282          if (nodetype = divn) and
283             (right.nodetype = ordconstn) and
284             ispowerof2(tordconstnode(right).value.svalue,power) and
285             (not (cs_check_overflow in current_settings.localswitches)) then
286            begin
287              if is_signed(left.resultdef) Then
288                begin
289                  tmpreg:=cg.GetIntRegister(current_asmdata.CurrAsmList,OS_INT);
290                  cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SAR,OS_INT,31,numerator,tmpreg);
291                  { if signed, tmpreg=right value-1, otherwise 0 }
292                  cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_AND,OS_INT,tordconstnode(right).value.svalue-1,tmpreg);
293                  { add to the left value }
294                  cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_ADD,OS_INT,numerator,tmpreg);
295                  cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SAR,OS_INT,aword(power),tmpreg,resultreg);
296                end
297              else
298                cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SHR,OS_INT,aword(power),numerator,resultreg);
299            end
300          else
301            begin
302              { load divider in a register if necessary }
303              divider:=NR_NO;
304              if (right.location.loc<>LOC_CONSTANT) or
305                 (right.location.value<simm13lo) or
306                 (right.location.value>simm13hi) then
307                begin
308                  hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,
309                    right.resultdef,right.resultdef,true);
310                  divider:=right.location.register;
311                end;
312 
313              { needs overflow checking, (-maxlongint-1) div (-1) overflows! }
314              { And on Sparc, the only way to catch a div-by-0 is by checking  }
315              { the overflow flag (JM)                                       }
316 
317              { Fill %y with the -1 or 0 depending on the highest bit }
318              if is_signed(left.resultdef) then
319                begin
320                  tmpreg:=cg.GetIntRegister(current_asmdata.CurrAsmList,OS_INT);
321                  current_asmdata.CurrAsmList.concat(taicpu.op_reg_const_reg(A_SRA,numerator,31,tmpreg));
322                  current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MOV,tmpreg,NR_Y));
323                end
324              else
325                current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MOV,NR_G0,NR_Y));
326              { wait 3 instructions slots before we can read %y }
327              current_asmdata.CurrAsmList.concat(taicpu.op_none(A_NOP));
328              current_asmdata.CurrAsmList.concat(taicpu.op_none(A_NOP));
329              current_asmdata.CurrAsmList.concat(taicpu.op_none(A_NOP));
330 
331              op := divops[is_signed(right.resultdef),
332                           cs_check_overflow in current_settings.localswitches];
333              if (divider<>NR_NO) then
334                current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(op,numerator,divider,resultreg))
335              else
336                current_asmdata.CurrAsmList.concat(taicpu.op_reg_const_reg(op,numerator,right.location.value,resultreg));
337 
338              if (nodetype = modn) then
339                begin
340                  current_asmdata.getjumplabel(overflowlabel);
341                  ai:=taicpu.op_cond_sym(A_Bxx,C_VS,overflowlabel);
342                  ai.delayslot_annulled:=true;
343                  current_asmdata.CurrAsmList.concat(ai);
344                  current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_NOT,resultreg));
345                  cg.a_label(current_asmdata.CurrAsmList,overflowlabel);
346                  if (divider<>NR_NO) then
347                    current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_SMUL,resultreg,divider,resultreg))
348                  else
349                    current_asmdata.CurrAsmList.concat(taicpu.op_reg_const_reg(A_SMUL,resultreg,right.location.value,resultreg));
350                  current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_SUB,numerator,resultreg,resultreg));
351                end;
352            end;
353         { set result location }
354         location.loc:=LOC_REGISTER;
355         location.register:=resultreg;
356         cg.g_overflowcheck(current_asmdata.CurrAsmList,Location,resultdef);
357       end;
358 {$endif sparc64}
359 
360 {*****************************************************************************
361                              TSparcSHLRSHRNODE
362 *****************************************************************************}
363 
364 {$ifndef SPARC64}
TSparcShlShrNode.first_shlshr64bitintnull365     function TSparcShlShrNode.first_shlshr64bitint:TNode;
366       begin
367         { 64bit without constants need a helper }
368         if is_64bit(left.resultdef) and
369            (right.nodetype<>ordconstn) then
370           begin
371             result:=inherited first_shlshr64bitint;
372             exit;
373           end;
374 
375         result := nil;
376       end;
377 
378 
379     procedure tSparcshlshrnode.second_64bit;
380       var
381         hregister,hreg64hi,hreg64lo : tregister;
382         op : topcg;
383         shiftval: aword;
384       const
385         ops: array [boolean] of topcg = (OP_SHR,OP_SHL);
386       begin
387         { 64bit without constants need a helper, and is
388           already replaced in pass1 }
389         if (right.nodetype<>ordconstn) then
390           internalerror(200405301);
391 
392         location_reset(location, LOC_REGISTER, def_cgsize(resultdef));
393 
394         { load left operator in a register }
395         hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,true);
396         hreg64hi:=left.location.register64.reghi;
397         hreg64lo:=left.location.register64.reglo;
398 
399         shiftval := tordconstnode(right).value.svalue and 63;
400         op := ops[nodetype=shln];
401         location.register64.reglo:=cg.GetIntRegister(current_asmdata.CurrAsmList,OS_32);
402         location.register64.reghi:=cg.GetIntRegister(current_asmdata.CurrAsmList,OS_32);
403 
404         { Emitting "left shl 1" as "left+left" is twice shorter }
405         if (nodetype=shln) and (shiftval=1) then
406           cg64.a_op64_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,OS_64,left.location.register64,left.location.register64,location.register64)
407         else if shiftval > 31 then
408           begin
409             if nodetype = shln then
410               begin
411                 cg.a_load_const_reg(current_asmdata.CurrAsmList,OS_32,0,location.register64.reglo);
412                 { if shiftval and 31 = 0, it will optimize to MOVE }
413                 cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SHL, OS_32, shiftval and 31, hreg64lo, location.register64.reghi);
414               end
415             else
416               begin
417                 cg.a_load_const_reg(current_asmdata.CurrAsmList,OS_32,0,location.register64.reghi);
418                 cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SHR, OS_32, shiftval and 31, hreg64hi, location.register64.reglo);
419               end;
420           end
421         else
422           begin
423             hregister := cg.getintregister(current_asmdata.CurrAsmList, OS_32);
424 
425             cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, op, OS_32, shiftval, hreg64hi, location.register64.reghi);
426             cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, op, OS_32, shiftval, hreg64lo, location.register64.reglo);
427             if shiftval <> 0 then
428               begin
429                 if nodetype = shln then
430                   begin
431                     cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SHR, OS_32, 32-shiftval, hreg64lo, hregister);
432                     cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList, OP_OR, OS_32, hregister, location.register64.reghi, location.register64.reghi);
433                   end
434                 else
435                   begin
436                     cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SHL, OS_32, 32-shiftval, hreg64hi, hregister);
437                     cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList, OP_OR, OS_32, hregister, location.register64.reglo, location.register64.reglo);
438                   end;
439               end;
440           end;
441       end;
442 {$endif SPARC64}
443 
444 
445 {*****************************************************************************
446                                TSPARCNOTNODE
447 *****************************************************************************}
448 
449     procedure tsparcnotnode.second_boolean;
450       begin
451         if not handle_locjump then
452           begin
453             secondpass(left);
454             case left.location.loc of
455               LOC_FLAGS :
456                 begin
457                   location_copy(location,left.location);
458                   inverse_flags(location.resflags);
459                 end;
460               LOC_REGISTER, LOC_CREGISTER,
461               LOC_REFERENCE, LOC_CREFERENCE,
462               LOC_SUBSETREG, LOC_CSUBSETREG,
463               LOC_SUBSETREF, LOC_CSUBSETREF:
464                 begin
465                   hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
466 {$ifndef SPARC64}
467                   if is_64bit(left.resultdef) then
468                     current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_ORcc,
469                       left.location.register64.reglo,left.location.register64.reghi,NR_G0))
470                   else
471 {$endif SPARC64}
472                     current_asmdata.CurrAsmList.concat(taicpu.op_reg_const_reg(A_SUBcc,left.location.register,0,NR_G0));
473                   location_reset(location,LOC_FLAGS,OS_NO);
474                   location.resflags.Init(NR_ICC,F_E);
475                end;
476               else
477                 internalerror(2003042401);
478             end;
479           end;
480       end;
481 
482 
483 {*****************************************************************************
484                                    TSPARCUNARYMINUSNODE
485 *****************************************************************************}
486 
487     procedure tsparcunaryminusnode.second_float;
488       begin
489         secondpass(left);
490         hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
491         location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
492         location.register:=cg.getfpuregister(current_asmdata.CurrAsmList,location.size);
493         case location.size of
494           OS_F32:
495             current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FNEGs,left.location.register,location.register));
496           OS_F64:
497             current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FNEGd,left.location.register,location.register));
498           OS_F128:
499             current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FNEGq,left.location.register,location.register));
500         else
501           internalerror(2013030501);
502         end;
503       end;
504 
505 begin
506    cmoddivnode:=tSparcmoddivnode;
507    cshlshrnode:=tSparcshlshrnode;
508    cnotnode:=tSparcnotnode;
509    cunaryminusnode:=tsparcunaryminusnode;
510 end.
511