1 {
2     Copyright (c) 1998-2002 by Florian Klaempfl
3 
4     Generate i8086 assembler for math nodes
5 
6     This program is free software; you can redistribute it and/or modify
7     it under the terms of the GNU General Public License as published by
8     the Free Software Foundation; either version 2 of the License, or
9     (at your option) any later version.
10 
11     This program is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14     GNU General Public License for more details.
15 
16     You should have received a copy of the GNU General Public License
17     along with this program; if not, write to the Free Software
18     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 
20  ****************************************************************************
21 }
22 unit n8086mat;
23 
24 {$i fpcdefs.inc}
25 
26 interface
27 
28     uses
29       node,nmat,ncgmat,nx86mat;
30 
31     type
32       ti8086moddivnode = class(tmoddivnode)
use_moddiv32bit_helpernull33          function use_moddiv32bit_helper: boolean;
first_moddivintnull34          function first_moddivint: tnode; override;
35          procedure pass_generate_code;override;
36       end;
37 
38       ti8086shlshrnode = class(tx86shlshrnode)
39          procedure second_64bit;override;
first_shlshr64bitintnull40          function first_shlshr64bitint: tnode; override;
41       end;
42 
43       ti8086unaryminusnode = class(tx86unaryminusnode)
44       end;
45 
46       ti8086notnode = class(tx86notnode)
47       end;
48 
49 
50 implementation
51 
52     uses
53       globtype,systems,constexp,
54       cutils,verbose,globals,
55       symconst,symdef,aasmbase,aasmtai,aasmdata,aasmcpu,defutil,
56       cgbase,pass_2,
57       ncon,
58       cpubase,cpuinfo,
59       cga,ncgutil,cgobj,cgutils,
60       hlcgobj;
61 
62 {*****************************************************************************
63                              ti8086moddivnode
64 *****************************************************************************}
65 
66 
ti8086moddivnode.use_moddiv32bit_helpernull67     function ti8086moddivnode.use_moddiv32bit_helper: boolean;
68       begin
69         result:=is_32bit(left.resultdef) or
70                 is_64bit(left.resultdef) or
71                 is_32bit(right.resultdef) or
72                 is_64bit(right.resultdef);
73       end;
74 
75 
ti8086moddivnode.first_moddivintnull76     function ti8086moddivnode.first_moddivint: tnode;
77       begin
78         if use_moddiv32bit_helper then
79           result:=inherited first_moddivint
80         else
81           result:=nil;
82       end;
83 
84 
log2null85     function log2(i : word) : word;
86       begin
87         result:=0;
88         i:=i shr 1;
89         while i<>0 do
90           begin
91             i:=i shr 1;
92             inc(result);
93           end;
94       end;
95 
96 
97    procedure ti8086moddivnode.pass_generate_code;
98       var
99         hreg1,hreg2:Tregister;
100         power:longint;
101         hl:Tasmlabel;
102         op:Tasmop;
103         e : smallint;
104         d,l,r,s,m,a,n,t : word;
105         m_low,m_high,j,k : dword;
106         invertsign: Boolean;
107       begin
108         secondpass(left);
109         if codegenerror then
110           exit;
111         secondpass(right);
112         if codegenerror then
113           exit;
114 
115         if is_64bitint(resultdef) or is_32bitint(resultdef) then
116           { should be handled in pass_1 (JM) }
117           internalerror(200109052);
118         { put numerator in register }
119         location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
120         hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false);
121         hreg1:=left.location.register;
122 
123         if (nodetype=divn) and (right.nodetype=ordconstn) then
124           begin
125             if isabspowerof2(tordconstnode(right).value,power) then
126               begin
127                 { for signed numbers, the numerator must be adjusted before the
128                   shift instruction, but not wih unsigned numbers! Otherwise,
129                   "Cardinal($ffffffff) div 16" overflows! (JM) }
130                 if is_signed(left.resultdef) Then
131                   begin
132                     invertsign:=tordconstnode(right).value<0;
133                     if (current_settings.optimizecputype > cpu_386) and
134                        not(cs_opt_size in current_settings.optimizerswitches) then
135                       { use a sequence without jumps, saw this in
136                         comp.compilers (JM) }
137                       begin
138                         { no jumps, but more operations }
139                         hreg2:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
140                         emit_reg_reg(A_MOV,S_W,hreg1,hreg2);
141                         if power=1 then
142                           begin
143                             {If the left value is negative, hreg2=(1 shl power)-1=1, otherwise 0.}
144                             cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SHR,OS_16,15,hreg2);
145                           end
146                         else
147                           begin
148                             {If the left value is negative, hreg2=$ffff, otherwise 0.}
149                             cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SAR,OS_16,15,hreg2);
150                             {If negative, hreg2=(1 shl power)-1, otherwise 0.}
151                             emit_const_reg(A_AND,S_W,(aint(1) shl power)-1,hreg2);
152                           end;
153                         { add to the left value }
154                         emit_reg_reg(A_ADD,S_W,hreg2,hreg1);
155                         { do the shift }
156                         cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SAR,OS_16,power,hreg1);
157                       end
158                     else
159                       begin
160                         { a jump, but less operations }
161                         emit_reg_reg(A_TEST,S_W,hreg1,hreg1);
162                         current_asmdata.getjumplabel(hl);
163                         cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NS,hl);
164                         if power=1 then
165                           emit_reg(A_INC,S_W,hreg1)
166                         else
167                           emit_const_reg(A_ADD,S_W,(aint(1) shl power)-1,hreg1);
168                         cg.a_label(current_asmdata.CurrAsmList,hl);
169                         cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SAR,OS_16,power,hreg1);
170                       end;
171                     if invertsign then
172                       emit_reg(A_NEG,S_W,hreg1);
173                   end
174                 else
175                   cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SHR,OS_16,power,hreg1);
176                 location.register:=hreg1;
177               end
178             else
179               begin
180                 if is_signed(left.resultdef) then
181                   begin
182                     e:=tordconstnode(right).value.svalue;
183                     d:=abs(e);
184                     { Determine algorithm (a), multiplier (m), and shift factor (s) for 16-bit
185                       signed integer division. Based on: Granlund, T.; Montgomery, P.L.:
186                       "Division by Invariant Integers using Multiplication". SIGPLAN Notices,
187                       Vol. 29, June 1994, page 61.
188                     }
189 
190                     l:=log2(d);
191                     j:=dword($8000) mod dword(d);
192                     k:=(dword(1) shl (16+l)) div (dword($8000-j));
193                     m_low:=((dword(1)) shl (16+l)) div d;
194                     m_high:=(((dword(1)) shl (16+l)) + k) div d;
195                     while ((m_low shr 1) < (m_high shr 1)) and (l > 0) do
196                       begin
197                         m_low:=m_low shr 1;
198                         m_high:=m_high shr 1;
199                         dec(l);
200                       end;
201                     m:=word(m_high);
202                     s:=l;
203                     if (m_high shr 15)<>0 then
204                       a:=1
205                     else
206                       a:=0;
207                     cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
208                     emit_const_reg(A_MOV,S_W,aint(m),NR_AX);
209                     cg.getcpuregister(current_asmdata.CurrAsmList,NR_DX);
210                     emit_reg(A_IMUL,S_W,hreg1);
211                     emit_reg_reg(A_MOV,S_W,hreg1,NR_AX);
212                     if a<>0 then
213                       begin
214                         emit_reg_reg(A_ADD,S_W,NR_AX,NR_DX);
215                         {
216                           printf ("; dividend: memory location or register other than AX or DX\n");
217                           printf ("\n");
218                           printf ("MOV AX, 0%08LXh\n", m);
219                           printf ("IMUL dividend\n");
220                           printf ("MOV AX, dividend\n");
221                           printf ("ADD DX, AX\n");
222                           if (s) printf ("SAR DX, %d\n", s);
223                           printf ("SHR AX, 15\n");
224                           printf ("ADD DX, AX\n");
225                           if (e < 0) printf ("NEG DX\n");
226                           printf ("\n");
227                           printf ("; quotient now in DX\n");
228                         }
229                       end;
230                       {
231                         printf ("; dividend: memory location of register other than AX or DX\n");
232                         printf ("\n");
233                         printf ("MOV AX, 0%08LXh\n", m);
234                         printf ("IMUL dividend\n");
235                         printf ("MOV AX, dividend\n");
236                         if (s) printf ("SAR DX, %d\n", s);
237                         printf ("SHR AX, 15\n");
238                         printf ("ADD DX, AX\n");
239                         if (e < 0) printf ("NEG DX\n");
240                         printf ("\n");
241                         printf ("; quotient now in DX\n");
242                       }
243                     if s<>0 then
244                       cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SAR,OS_16,s,NR_DX);
245                     cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SHR,OS_16,15,NR_AX);
246                     emit_reg_reg(A_ADD,S_W,NR_AX,NR_DX);
247                     if e<0 then
248                       emit_reg(A_NEG,S_W,NR_DX);
249                     cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_DX);
250                     cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
251                     location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
252                     cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_INT,OS_INT,NR_DX,location.register)
253                   end
254                 else
255                   begin
256                     d:=tordconstnode(right).value.svalue;
257                     if d>=$8000 then
258                       begin
259                         emit_const_reg(A_CMP,S_W,aint(d),hreg1);
260                         location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
261                         emit_const_reg(A_MOV,S_W,0,location.register);
262                         emit_const_reg(A_SBB,S_W,-1,location.register);
263                       end
264                     else
265                       begin
266                         { Reduce divisor until it becomes odd }
267                         n:=0;
268                         t:=d;
269                         while (t and 1)=0 do
270                           begin
271                             t:=t shr 1;
272                             inc(n);
273                           end;
274                         { Generate m, s for algorithm 0. Based on: Granlund, T.; Montgomery,
275                         P.L.: "Division by Invariant Integers using Multiplication".
276                         SIGPLAN Notices, Vol. 29, June 1994, page 61.
277                         }
278                         l:=log2(t)+1;
279                         j:=dword($ffff) mod dword(t);
280                         k:=(dword(1) shl (16+l)) div (dword($ffff-j));
281                         m_low:=((dword(1)) shl (16+l)) div t;
282                         m_high:=(((dword(1)) shl (16+l)) + k) div t;
283                         while ((m_low shr 1) < (m_high shr 1)) and (l>0) do
284                           begin
285                             m_low:=m_low shr 1;
286                             m_high:=m_high shr 1;
287                             l:=l-1;
288                           end;
289                         if (m_high shr 16)=0 then
290                           begin
291                             m:=word(m_high);
292                             s:=l;
293                             a:=0;
294                           end
295 
296                         { Generate m, s for algorithm 1. Based on: Magenheimer, D.J.; et al:
297                         "Integer Multiplication and Division on the HP Precision Architecture".
298                         IEEE Transactions on Computers, Vol 37, No. 8, August 1988, page 980.
299                         }
300                         else
301                           begin
302                             s:=log2(t);
303                             m_low:=(dword(1) shl (16+s)) div dword(t);
304                             r:=word(((dword(1)) shl (16+s)) mod dword(t));
305                             if (r < ((t>>1)+1)) then
306                               m:=word(m_low)
307                             else
308                               m:=word(m_low)+1;
309                             a:=1;
310                           end;
311                         { Reduce multiplier for either algorithm to smallest possible }
312                         while (m and 1)=0 do
313                           begin
314                             m:=m shr 1;
315                             dec(s);
316                           end;
317                         { Adjust multiplier for reduction of even divisors }
318                         inc(s,n);
319                         cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
320                         emit_const_reg(A_MOV,S_W,aint(m),NR_AX);
321                         cg.getcpuregister(current_asmdata.CurrAsmList,NR_DX);
322                         emit_reg(A_MUL,S_W,hreg1);
323                         if a<>0 then
324                           begin
325                             {
326                             printf ("; dividend: register other than AX or memory location\n");
327                             printf ("\n");
328                             printf ("MOV AX, 0%08lXh\n", m);
329                             printf ("MUL dividend\n");
330                             printf ("ADD AX, 0%08lXh\n", m);
331                             printf ("ADC DX, 0\n");
332                             if (s) printf ("SHR DX, %d\n", s);
333                             printf ("\n");
334                             printf ("; quotient now in DX\n");
335                             }
336                             emit_const_reg(A_ADD,S_W,aint(m),NR_AX);
337                             emit_const_reg(A_ADC,S_W,0,NR_DX);
338                           end;
339                         if s<>0 then
340                           cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SHR,OS_16,aint(s),NR_DX);
341                         cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_DX);
342                         cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
343                         location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
344                         cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_INT,OS_INT,NR_DX,location.register)
345                       end;
346                   end
347               end
348           end
349         else
350           begin
351             cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
352             emit_reg_reg(A_MOV,S_W,hreg1,NR_AX);
353             cg.getcpuregister(current_asmdata.CurrAsmList,NR_DX);
354             {Sign extension depends on the left type.}
355             if torddef(left.resultdef).ordtype=u16bit then
356               emit_reg_reg(A_XOR,S_W,NR_DX,NR_DX)
357             else
358               emit_none(A_CWD,S_NO);
359 
360             {Division depends on the right type.}
361             if Torddef(right.resultdef).ordtype=u16bit then
362               op:=A_DIV
363             else
364               op:=A_IDIV;
365 
366             if right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then
367               emit_ref(op,S_W,right.location.reference)
368             else if right.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
369               emit_reg(op,S_W,right.location.register)
370             else
371               begin
372                 hreg1:=cg.getintregister(current_asmdata.CurrAsmList,right.location.size);
373                 hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,u16inttype,right.location,hreg1);
374                 emit_reg(op,S_W,hreg1);
375               end;
376 
377             {Copy the result into a new register. Release AX & DX.}
378             cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_DX);
379             cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
380             location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
381             if nodetype=divn then
382               cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_INT,OS_INT,NR_AX,location.register)
383             else
384               cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_INT,OS_INT,NR_DX,location.register);
385           end;
386       end;
387 
388 
389 {*****************************************************************************
390                              TI8086SHLRSHRNODE
391 *****************************************************************************}
392 
393 
ti8086shlshrnode.first_shlshr64bitintnull394     function ti8086shlshrnode.first_shlshr64bitint: tnode;
395       begin
396         result := nil;
397       end;
398 
399     procedure ti8086shlshrnode.second_64bit;
400       var
401         hreg64hi,hreg64lo:Tregister;
402         v : TConstExprInt;
403         tmpreg64: tregister64;
404       begin
405         location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
406 
407         { load left operator in a register }
408         hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false);
409         hreg64hi:=left.location.register64.reghi;
410         hreg64lo:=left.location.register64.reglo;
411         location.register64.reglo:=hreg64lo;
412         location.register64.reghi:=hreg64hi;
413 
414         if right.nodetype=ordconstn then
415           begin
416             v:=Tordconstnode(right).value and 63;
417             location.register64.reglo:=cg.getintregister(current_asmdata.CurrAsmList,OS_32);
418             location.register64.reghi:=cg.getintregister(current_asmdata.CurrAsmList,OS_32);
419             if nodetype=shln then
420               cg64.a_op64_const_reg_reg(current_asmdata.CurrAsmList,OP_SHL,OS_64,v,left.location.register64,location.register64)
421             else
422               cg64.a_op64_const_reg_reg(current_asmdata.CurrAsmList,OP_SHR,OS_64,v,left.location.register64,location.register64);
423           end
424         else
425           begin
426             { load right operators in a register }
427             tmpreg64.reghi:=NR_NO;
428             tmpreg64.reglo:=cg.getintregister(current_asmdata.CurrAsmList,OS_16);
429             hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,u16inttype,right.location,tmpreg64.reglo);
430             if nodetype=shln then
431               cg64.a_op64_reg_reg(current_asmdata.CurrAsmList,OP_SHL,OS_64,tmpreg64,location.register64)
432             else
433               cg64.a_op64_reg_reg(current_asmdata.CurrAsmList,OP_SHR,OS_64,tmpreg64,location.register64);
434           end;
435       end;
436 
437 
438 begin
439    cunaryminusnode:=ti8086unaryminusnode;
440    cmoddivnode:=ti8086moddivnode;
441    cshlshrnode:=ti8086shlshrnode;
442    cnotnode:=ti8086notnode;
443 end.
444