1 { 2 Copyright (c) 1998-2002 by Florian Klaempfl 3 4 Generate i8086 assembler for math nodes 5 6 This program is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 2 of the License, or 9 (at your option) any later version. 10 11 This program is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with this program; if not, write to the Free Software 18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 19 20 **************************************************************************** 21 } 22 unit n8086mat; 23 24 {$i fpcdefs.inc} 25 26 interface 27 28 uses 29 node,nmat,ncgmat,nx86mat; 30 31 type 32 ti8086moddivnode = class(tmoddivnode) use_moddiv32bit_helpernull33 function use_moddiv32bit_helper: boolean; first_moddivintnull34 function first_moddivint: tnode; override; 35 procedure pass_generate_code;override; 36 end; 37 38 ti8086shlshrnode = class(tx86shlshrnode) 39 procedure second_64bit;override; first_shlshr64bitintnull40 function first_shlshr64bitint: tnode; override; 41 end; 42 43 ti8086unaryminusnode = class(tx86unaryminusnode) 44 end; 45 46 ti8086notnode = class(tx86notnode) 47 end; 48 49 50 implementation 51 52 uses 53 globtype,systems,constexp, 54 cutils,verbose,globals, 55 symconst,symdef,aasmbase,aasmtai,aasmdata,aasmcpu,defutil, 56 cgbase,pass_2, 57 ncon, 58 cpubase,cpuinfo, 59 cga,ncgutil,cgobj,cgutils, 60 hlcgobj; 61 62 {***************************************************************************** 63 ti8086moddivnode 64 *****************************************************************************} 65 66 ti8086moddivnode.use_moddiv32bit_helpernull67 function ti8086moddivnode.use_moddiv32bit_helper: boolean; 68 begin 69 result:=is_32bit(left.resultdef) or 70 is_64bit(left.resultdef) or 71 is_32bit(right.resultdef) or 72 is_64bit(right.resultdef); 73 end; 74 75 ti8086moddivnode.first_moddivintnull76 function ti8086moddivnode.first_moddivint: tnode; 77 begin 78 if use_moddiv32bit_helper then 79 result:=inherited first_moddivint 80 else 81 result:=nil; 82 end; 83 84 log2null85 function log2(i : word) : word; 86 begin 87 result:=0; 88 i:=i shr 1; 89 while i<>0 do 90 begin 91 i:=i shr 1; 92 inc(result); 93 end; 94 end; 95 96 97 procedure ti8086moddivnode.pass_generate_code; 98 var 99 hreg1,hreg2:Tregister; 100 power:longint; 101 hl:Tasmlabel; 102 op:Tasmop; 103 e : smallint; 104 d,l,r,s,m,a,n,t : word; 105 m_low,m_high,j,k : dword; 106 invertsign: Boolean; 107 begin 108 secondpass(left); 109 if codegenerror then 110 exit; 111 secondpass(right); 112 if codegenerror then 113 exit; 114 115 if is_64bitint(resultdef) or is_32bitint(resultdef) then 116 { should be handled in pass_1 (JM) } 117 internalerror(200109052); 118 { put numerator in register } 119 location_reset(location,LOC_REGISTER,def_cgsize(resultdef)); 120 hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false); 121 hreg1:=left.location.register; 122 123 if (nodetype=divn) and (right.nodetype=ordconstn) then 124 begin 125 if isabspowerof2(tordconstnode(right).value,power) then 126 begin 127 { for signed numbers, the numerator must be adjusted before the 128 shift instruction, but not wih unsigned numbers! Otherwise, 129 "Cardinal($ffffffff) div 16" overflows! (JM) } 130 if is_signed(left.resultdef) Then 131 begin 132 invertsign:=tordconstnode(right).value<0; 133 if (current_settings.optimizecputype > cpu_386) and 134 not(cs_opt_size in current_settings.optimizerswitches) then 135 { use a sequence without jumps, saw this in 136 comp.compilers (JM) } 137 begin 138 { no jumps, but more operations } 139 hreg2:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT); 140 emit_reg_reg(A_MOV,S_W,hreg1,hreg2); 141 if power=1 then 142 begin 143 {If the left value is negative, hreg2=(1 shl power)-1=1, otherwise 0.} 144 cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SHR,OS_16,15,hreg2); 145 end 146 else 147 begin 148 {If the left value is negative, hreg2=$ffff, otherwise 0.} 149 cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SAR,OS_16,15,hreg2); 150 {If negative, hreg2=(1 shl power)-1, otherwise 0.} 151 emit_const_reg(A_AND,S_W,(aint(1) shl power)-1,hreg2); 152 end; 153 { add to the left value } 154 emit_reg_reg(A_ADD,S_W,hreg2,hreg1); 155 { do the shift } 156 cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SAR,OS_16,power,hreg1); 157 end 158 else 159 begin 160 { a jump, but less operations } 161 emit_reg_reg(A_TEST,S_W,hreg1,hreg1); 162 current_asmdata.getjumplabel(hl); 163 cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NS,hl); 164 if power=1 then 165 emit_reg(A_INC,S_W,hreg1) 166 else 167 emit_const_reg(A_ADD,S_W,(aint(1) shl power)-1,hreg1); 168 cg.a_label(current_asmdata.CurrAsmList,hl); 169 cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SAR,OS_16,power,hreg1); 170 end; 171 if invertsign then 172 emit_reg(A_NEG,S_W,hreg1); 173 end 174 else 175 cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SHR,OS_16,power,hreg1); 176 location.register:=hreg1; 177 end 178 else 179 begin 180 if is_signed(left.resultdef) then 181 begin 182 e:=tordconstnode(right).value.svalue; 183 d:=abs(e); 184 { Determine algorithm (a), multiplier (m), and shift factor (s) for 16-bit 185 signed integer division. Based on: Granlund, T.; Montgomery, P.L.: 186 "Division by Invariant Integers using Multiplication". SIGPLAN Notices, 187 Vol. 29, June 1994, page 61. 188 } 189 190 l:=log2(d); 191 j:=dword($8000) mod dword(d); 192 k:=(dword(1) shl (16+l)) div (dword($8000-j)); 193 m_low:=((dword(1)) shl (16+l)) div d; 194 m_high:=(((dword(1)) shl (16+l)) + k) div d; 195 while ((m_low shr 1) < (m_high shr 1)) and (l > 0) do 196 begin 197 m_low:=m_low shr 1; 198 m_high:=m_high shr 1; 199 dec(l); 200 end; 201 m:=word(m_high); 202 s:=l; 203 if (m_high shr 15)<>0 then 204 a:=1 205 else 206 a:=0; 207 cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX); 208 emit_const_reg(A_MOV,S_W,aint(m),NR_AX); 209 cg.getcpuregister(current_asmdata.CurrAsmList,NR_DX); 210 emit_reg(A_IMUL,S_W,hreg1); 211 emit_reg_reg(A_MOV,S_W,hreg1,NR_AX); 212 if a<>0 then 213 begin 214 emit_reg_reg(A_ADD,S_W,NR_AX,NR_DX); 215 { 216 printf ("; dividend: memory location or register other than AX or DX\n"); 217 printf ("\n"); 218 printf ("MOV AX, 0%08LXh\n", m); 219 printf ("IMUL dividend\n"); 220 printf ("MOV AX, dividend\n"); 221 printf ("ADD DX, AX\n"); 222 if (s) printf ("SAR DX, %d\n", s); 223 printf ("SHR AX, 15\n"); 224 printf ("ADD DX, AX\n"); 225 if (e < 0) printf ("NEG DX\n"); 226 printf ("\n"); 227 printf ("; quotient now in DX\n"); 228 } 229 end; 230 { 231 printf ("; dividend: memory location of register other than AX or DX\n"); 232 printf ("\n"); 233 printf ("MOV AX, 0%08LXh\n", m); 234 printf ("IMUL dividend\n"); 235 printf ("MOV AX, dividend\n"); 236 if (s) printf ("SAR DX, %d\n", s); 237 printf ("SHR AX, 15\n"); 238 printf ("ADD DX, AX\n"); 239 if (e < 0) printf ("NEG DX\n"); 240 printf ("\n"); 241 printf ("; quotient now in DX\n"); 242 } 243 if s<>0 then 244 cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SAR,OS_16,s,NR_DX); 245 cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SHR,OS_16,15,NR_AX); 246 emit_reg_reg(A_ADD,S_W,NR_AX,NR_DX); 247 if e<0 then 248 emit_reg(A_NEG,S_W,NR_DX); 249 cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_DX); 250 cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX); 251 location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT); 252 cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_INT,OS_INT,NR_DX,location.register) 253 end 254 else 255 begin 256 d:=tordconstnode(right).value.svalue; 257 if d>=$8000 then 258 begin 259 emit_const_reg(A_CMP,S_W,aint(d),hreg1); 260 location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT); 261 emit_const_reg(A_MOV,S_W,0,location.register); 262 emit_const_reg(A_SBB,S_W,-1,location.register); 263 end 264 else 265 begin 266 { Reduce divisor until it becomes odd } 267 n:=0; 268 t:=d; 269 while (t and 1)=0 do 270 begin 271 t:=t shr 1; 272 inc(n); 273 end; 274 { Generate m, s for algorithm 0. Based on: Granlund, T.; Montgomery, 275 P.L.: "Division by Invariant Integers using Multiplication". 276 SIGPLAN Notices, Vol. 29, June 1994, page 61. 277 } 278 l:=log2(t)+1; 279 j:=dword($ffff) mod dword(t); 280 k:=(dword(1) shl (16+l)) div (dword($ffff-j)); 281 m_low:=((dword(1)) shl (16+l)) div t; 282 m_high:=(((dword(1)) shl (16+l)) + k) div t; 283 while ((m_low shr 1) < (m_high shr 1)) and (l>0) do 284 begin 285 m_low:=m_low shr 1; 286 m_high:=m_high shr 1; 287 l:=l-1; 288 end; 289 if (m_high shr 16)=0 then 290 begin 291 m:=word(m_high); 292 s:=l; 293 a:=0; 294 end 295 296 { Generate m, s for algorithm 1. Based on: Magenheimer, D.J.; et al: 297 "Integer Multiplication and Division on the HP Precision Architecture". 298 IEEE Transactions on Computers, Vol 37, No. 8, August 1988, page 980. 299 } 300 else 301 begin 302 s:=log2(t); 303 m_low:=(dword(1) shl (16+s)) div dword(t); 304 r:=word(((dword(1)) shl (16+s)) mod dword(t)); 305 if (r < ((t>>1)+1)) then 306 m:=word(m_low) 307 else 308 m:=word(m_low)+1; 309 a:=1; 310 end; 311 { Reduce multiplier for either algorithm to smallest possible } 312 while (m and 1)=0 do 313 begin 314 m:=m shr 1; 315 dec(s); 316 end; 317 { Adjust multiplier for reduction of even divisors } 318 inc(s,n); 319 cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX); 320 emit_const_reg(A_MOV,S_W,aint(m),NR_AX); 321 cg.getcpuregister(current_asmdata.CurrAsmList,NR_DX); 322 emit_reg(A_MUL,S_W,hreg1); 323 if a<>0 then 324 begin 325 { 326 printf ("; dividend: register other than AX or memory location\n"); 327 printf ("\n"); 328 printf ("MOV AX, 0%08lXh\n", m); 329 printf ("MUL dividend\n"); 330 printf ("ADD AX, 0%08lXh\n", m); 331 printf ("ADC DX, 0\n"); 332 if (s) printf ("SHR DX, %d\n", s); 333 printf ("\n"); 334 printf ("; quotient now in DX\n"); 335 } 336 emit_const_reg(A_ADD,S_W,aint(m),NR_AX); 337 emit_const_reg(A_ADC,S_W,0,NR_DX); 338 end; 339 if s<>0 then 340 cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SHR,OS_16,aint(s),NR_DX); 341 cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_DX); 342 cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX); 343 location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT); 344 cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_INT,OS_INT,NR_DX,location.register) 345 end; 346 end 347 end 348 end 349 else 350 begin 351 cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX); 352 emit_reg_reg(A_MOV,S_W,hreg1,NR_AX); 353 cg.getcpuregister(current_asmdata.CurrAsmList,NR_DX); 354 {Sign extension depends on the left type.} 355 if torddef(left.resultdef).ordtype=u16bit then 356 emit_reg_reg(A_XOR,S_W,NR_DX,NR_DX) 357 else 358 emit_none(A_CWD,S_NO); 359 360 {Division depends on the right type.} 361 if Torddef(right.resultdef).ordtype=u16bit then 362 op:=A_DIV 363 else 364 op:=A_IDIV; 365 366 if right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then 367 emit_ref(op,S_W,right.location.reference) 368 else if right.location.loc in [LOC_REGISTER,LOC_CREGISTER] then 369 emit_reg(op,S_W,right.location.register) 370 else 371 begin 372 hreg1:=cg.getintregister(current_asmdata.CurrAsmList,right.location.size); 373 hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,u16inttype,right.location,hreg1); 374 emit_reg(op,S_W,hreg1); 375 end; 376 377 {Copy the result into a new register. Release AX & DX.} 378 cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_DX); 379 cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX); 380 location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT); 381 if nodetype=divn then 382 cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_INT,OS_INT,NR_AX,location.register) 383 else 384 cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_INT,OS_INT,NR_DX,location.register); 385 end; 386 end; 387 388 389 {***************************************************************************** 390 TI8086SHLRSHRNODE 391 *****************************************************************************} 392 393 ti8086shlshrnode.first_shlshr64bitintnull394 function ti8086shlshrnode.first_shlshr64bitint: tnode; 395 begin 396 result := nil; 397 end; 398 399 procedure ti8086shlshrnode.second_64bit; 400 var 401 hreg64hi,hreg64lo:Tregister; 402 v : TConstExprInt; 403 tmpreg64: tregister64; 404 begin 405 location_reset(location,LOC_REGISTER,def_cgsize(resultdef)); 406 407 { load left operator in a register } 408 hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false); 409 hreg64hi:=left.location.register64.reghi; 410 hreg64lo:=left.location.register64.reglo; 411 location.register64.reglo:=hreg64lo; 412 location.register64.reghi:=hreg64hi; 413 414 if right.nodetype=ordconstn then 415 begin 416 v:=Tordconstnode(right).value and 63; 417 location.register64.reglo:=cg.getintregister(current_asmdata.CurrAsmList,OS_32); 418 location.register64.reghi:=cg.getintregister(current_asmdata.CurrAsmList,OS_32); 419 if nodetype=shln then 420 cg64.a_op64_const_reg_reg(current_asmdata.CurrAsmList,OP_SHL,OS_64,v,left.location.register64,location.register64) 421 else 422 cg64.a_op64_const_reg_reg(current_asmdata.CurrAsmList,OP_SHR,OS_64,v,left.location.register64,location.register64); 423 end 424 else 425 begin 426 { load right operators in a register } 427 tmpreg64.reghi:=NR_NO; 428 tmpreg64.reglo:=cg.getintregister(current_asmdata.CurrAsmList,OS_16); 429 hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,u16inttype,right.location,tmpreg64.reglo); 430 if nodetype=shln then 431 cg64.a_op64_reg_reg(current_asmdata.CurrAsmList,OP_SHL,OS_64,tmpreg64,location.register64) 432 else 433 cg64.a_op64_reg_reg(current_asmdata.CurrAsmList,OP_SHR,OS_64,tmpreg64,location.register64); 434 end; 435 end; 436 437 438 begin 439 cunaryminusnode:=ti8086unaryminusnode; 440 cmoddivnode:=ti8086moddivnode; 441 cshlshrnode:=ti8086shlshrnode; 442 cnotnode:=ti8086notnode; 443 end. 444