1 { 2 Copyright (c) 1998-2002 by Florian Klaempfl 3 4 Generate SPARC assembler for math nodes 5 6 This program is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 2 of the License, or 9 (at your option) any later version. 10 11 This program is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with this program; if not, write to the Free Software 18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 19 20 **************************************************************************** 21 } 22 unit ncpumat; 23 24 {$i fpcdefs.inc} 25 26 interface 27 28 uses 29 node,nmat,ncgmat; 30 31 type 32 tSparcmoddivnode = class(tmoddivnode) 33 procedure pass_generate_code;override; 34 {$ifdef SPARC64} use_moddiv64bitint_helpernull35 function use_moddiv64bitint_helper : boolean; override; 36 {$endif SPARC64} 37 end; 38 39 tSparcshlshrnode = class(tcgshlshrnode) 40 {$ifndef SPARC64} 41 procedure second_64bit;override; 42 { everything will be handled in pass_2 } first_shlshr64bitintnull43 function first_shlshr64bitint: tnode; override; 44 {$endif SPARC64} 45 end; 46 47 tSparcnotnode = class(tcgnotnode) 48 procedure second_boolean;override; 49 end; 50 51 tsparcunaryminusnode = class(tcgunaryminusnode) 52 procedure second_float; override; 53 end; 54 55 implementation 56 57 uses 58 globtype,systems,constexp, 59 cutils,verbose,globals, 60 symconst,symdef, 61 aasmbase,aasmcpu,aasmtai,aasmdata, 62 defutil, 63 cgbase,cgobj,hlcgobj,pass_2,procinfo, 64 ncon, 65 cpubase, 66 ncgutil,cgcpu,cgutils; 67 68 {***************************************************************************** 69 TSparcMODDIVNODE 70 *****************************************************************************} 71 72 {$ifdef sparc64} tSparcmoddivnode.use_moddiv64bitint_helpernull73 function tSparcmoddivnode.use_moddiv64bitint_helper: boolean; 74 begin 75 { sparc64 has no overflow checked 64 bit div } 76 result:=(is_64bitint(left.resultdef) or is_64bitint(right.resultdef)) and 77 (cs_check_overflow in current_settings.localswitches); 78 end; 79 80 81 procedure tSparcmoddivnode.pass_generate_code; 82 const 83 { 64 bit signed overflow } 84 divops: array[boolean, boolean, boolean] of tasmop = 85 (((A_UDIV,A_UDIVcc),(A_SDIV,A_SDIVcc)), 86 ((A_UDIVX,A_NOP),(A_SDIVX,A_NOP)) 87 ); 88 var 89 power : longint; 90 op : tasmop; 91 tmpreg, 92 numerator, 93 divider, 94 resultreg : tregister; 95 overflowlabel : tasmlabel; 96 ai : taicpu; 97 no_overflow : boolean; 98 begin 99 secondpass(left); 100 secondpass(right); 101 location_reset(location,LOC_REGISTER,def_cgsize(resultdef)); 102 location.register:=cg.GetIntRegister(current_asmdata.CurrAsmList,OS_INT); 103 104 { put numerator in register } 105 hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true); 106 numerator := left.location.register; 107 resultreg := location.register; 108 109 if is_64bit(resultdef) then 110 begin 111 if (nodetype = divn) and 112 (right.nodetype = ordconstn) and 113 ispowerof2(tordconstnode(right).value.svalue,power) and 114 (not (cs_check_overflow in current_settings.localswitches)) then 115 begin 116 if is_signed(left.resultdef) Then 117 begin 118 tmpreg:=cg.GetIntRegister(current_asmdata.CurrAsmList,OS_INT); 119 cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SAR,OS_INT,63,numerator,tmpreg); 120 { if signed, tmpreg=right value-1, otherwise 0 } 121 cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_AND,OS_INT,tordconstnode(right).value.svalue-1,tmpreg); 122 { add to the left value } 123 cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_ADD,OS_INT,numerator,tmpreg); 124 cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SAR,OS_INT,aword(power),tmpreg,resultreg); 125 end 126 else 127 cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SHR,OS_INT,aword(power),numerator,resultreg); 128 end 129 else 130 begin 131 { load divider in a register if necessary } 132 divider:=NR_NO; 133 if (right.location.loc<>LOC_CONSTANT) or 134 (right.location.value<simm13lo) or 135 (right.location.value>simm13hi) then 136 begin 137 hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location, 138 right.resultdef,right.resultdef,true); 139 divider:=right.location.register; 140 end; 141 142 op := divops[true, is_signed(right.resultdef), 143 cs_check_overflow in current_settings.localswitches]; 144 if op=A_NOP then 145 { current_asmdata.CurrAsmList.concat(tai_comment.create(strpnew('Wrong code generated here'))); } 146 begin 147 no_overflow:=true; 148 op:=divops[true,is_signed(right.resultdef),false]; 149 end 150 else 151 no_overflow:=false; 152 if (divider<>NR_NO) then 153 current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(op,numerator,divider,resultreg)) 154 else 155 current_asmdata.CurrAsmList.concat(taicpu.op_reg_const_reg(op,numerator,right.location.value,resultreg)); 156 157 if (nodetype = modn) then 158 begin 159 if not no_overflow then 160 begin 161 current_asmdata.getjumplabel(overflowlabel); 162 ai:=taicpu.op_cond_sym(A_Bxx,C_VS,overflowlabel); 163 ai.delayslot_annulled:=true; 164 current_asmdata.CurrAsmList.concat(ai); 165 end; 166 current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_NOT,resultreg)); 167 if not no_overflow then 168 cg.a_label(current_asmdata.CurrAsmList,overflowlabel); 169 if (divider<>NR_NO) then 170 current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_MULX,resultreg,divider,resultreg)) 171 else 172 current_asmdata.CurrAsmList.concat(taicpu.op_reg_const_reg(A_MULX,resultreg,right.location.value,resultreg)); 173 current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_SUB,numerator,resultreg,resultreg)); 174 end; 175 end; 176 end 177 else 178 begin 179 if (nodetype = divn) and 180 (right.nodetype = ordconstn) and 181 ispowerof2(tordconstnode(right).value.svalue,power) and 182 (not (cs_check_overflow in current_settings.localswitches)) then 183 begin 184 if is_signed(left.resultdef) Then 185 begin 186 tmpreg:=cg.GetIntRegister(current_asmdata.CurrAsmList,OS_INT); 187 cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SAR,OS_INT,31,numerator,tmpreg); 188 { if signed, tmpreg=right value-1, otherwise 0 } 189 cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_AND,OS_INT,tordconstnode(right).value.svalue-1,tmpreg); 190 { add to the left value } 191 cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_ADD,OS_INT,numerator,tmpreg); 192 cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SAR,OS_INT,aword(power),tmpreg,resultreg); 193 end 194 else 195 cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SHR,OS_INT,aword(power),numerator,resultreg); 196 end 197 else 198 begin 199 { load divider in a register if necessary } 200 divider:=NR_NO; 201 if (right.location.loc<>LOC_CONSTANT) or 202 (right.location.value<simm13lo) or 203 (right.location.value>simm13hi) then 204 begin 205 hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location, 206 right.resultdef,right.resultdef,true); 207 divider:=right.location.register; 208 end; 209 210 { needs overflow checking, (-maxlongint-1) div (-1) overflows! } 211 { And on Sparc, the only way to catch a div-by-0 is by checking } 212 { the overflow flag (JM) } 213 214 { Fill %y with the -1 or 0 depending on the highest bit } 215 if is_signed(left.resultdef) then 216 begin 217 tmpreg:=cg.GetIntRegister(current_asmdata.CurrAsmList,OS_INT); 218 current_asmdata.CurrAsmList.concat(taicpu.op_reg_const_reg(A_SRA,numerator,31,tmpreg)); 219 current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MOV,tmpreg,NR_Y)); 220 end 221 else 222 current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MOV,NR_G0,NR_Y)); 223 { wait 3 instructions slots before we can read %y } 224 current_asmdata.CurrAsmList.concat(taicpu.op_none(A_NOP)); 225 current_asmdata.CurrAsmList.concat(taicpu.op_none(A_NOP)); 226 current_asmdata.CurrAsmList.concat(taicpu.op_none(A_NOP)); 227 228 op := divops[false, is_signed(right.resultdef), 229 cs_check_overflow in current_settings.localswitches]; 230 if (divider<>NR_NO) then 231 current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(op,numerator,divider,resultreg)) 232 else 233 current_asmdata.CurrAsmList.concat(taicpu.op_reg_const_reg(op,numerator,right.location.value,resultreg)); 234 235 if (nodetype = modn) then 236 begin 237 current_asmdata.getjumplabel(overflowlabel); 238 ai:=taicpu.op_cond_sym(A_Bxx,C_VS,overflowlabel); 239 ai.delayslot_annulled:=true; 240 current_asmdata.CurrAsmList.concat(ai); 241 current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_NOT,resultreg)); 242 cg.a_label(current_asmdata.CurrAsmList,overflowlabel); 243 if (divider<>NR_NO) then 244 current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_SMUL,resultreg,divider,resultreg)) 245 else 246 current_asmdata.CurrAsmList.concat(taicpu.op_reg_const_reg(A_SMUL,resultreg,right.location.value,resultreg)); 247 current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_SUB,numerator,resultreg,resultreg)); 248 end; 249 end; 250 end; 251 { set result location } 252 location.loc:=LOC_REGISTER; 253 location.register:=resultreg; 254 cg.g_overflowcheck(current_asmdata.CurrAsmList,Location,resultdef); 255 end; 256 {$else sparc64} 257 procedure tSparcmoddivnode.pass_generate_code; 258 const 259 { signed overflow } 260 divops: array[boolean, boolean] of tasmop = 261 ((A_UDIV,A_UDIVcc),(A_SDIV,A_SDIVcc)); 262 var 263 power : longint; 264 op : tasmop; 265 tmpreg, 266 numerator, 267 divider, 268 resultreg : tregister; 269 overflowlabel : tasmlabel; 270 ai : taicpu; 271 begin 272 secondpass(left); 273 secondpass(right); 274 location_reset(location,LOC_REGISTER,def_cgsize(resultdef)); 275 location.register:=cg.GetIntRegister(current_asmdata.CurrAsmList,OS_INT); 276 277 { put numerator in register } 278 hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true); 279 numerator := left.location.register; 280 resultreg := location.register; 281 282 if (nodetype = divn) and 283 (right.nodetype = ordconstn) and 284 ispowerof2(tordconstnode(right).value.svalue,power) and 285 (not (cs_check_overflow in current_settings.localswitches)) then 286 begin 287 if is_signed(left.resultdef) Then 288 begin 289 tmpreg:=cg.GetIntRegister(current_asmdata.CurrAsmList,OS_INT); 290 cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SAR,OS_INT,31,numerator,tmpreg); 291 { if signed, tmpreg=right value-1, otherwise 0 } 292 cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_AND,OS_INT,tordconstnode(right).value.svalue-1,tmpreg); 293 { add to the left value } 294 cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_ADD,OS_INT,numerator,tmpreg); 295 cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SAR,OS_INT,aword(power),tmpreg,resultreg); 296 end 297 else 298 cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SHR,OS_INT,aword(power),numerator,resultreg); 299 end 300 else 301 begin 302 { load divider in a register if necessary } 303 divider:=NR_NO; 304 if (right.location.loc<>LOC_CONSTANT) or 305 (right.location.value<simm13lo) or 306 (right.location.value>simm13hi) then 307 begin 308 hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location, 309 right.resultdef,right.resultdef,true); 310 divider:=right.location.register; 311 end; 312 313 { needs overflow checking, (-maxlongint-1) div (-1) overflows! } 314 { And on Sparc, the only way to catch a div-by-0 is by checking } 315 { the overflow flag (JM) } 316 317 { Fill %y with the -1 or 0 depending on the highest bit } 318 if is_signed(left.resultdef) then 319 begin 320 tmpreg:=cg.GetIntRegister(current_asmdata.CurrAsmList,OS_INT); 321 current_asmdata.CurrAsmList.concat(taicpu.op_reg_const_reg(A_SRA,numerator,31,tmpreg)); 322 current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MOV,tmpreg,NR_Y)); 323 end 324 else 325 current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MOV,NR_G0,NR_Y)); 326 { wait 3 instructions slots before we can read %y } 327 current_asmdata.CurrAsmList.concat(taicpu.op_none(A_NOP)); 328 current_asmdata.CurrAsmList.concat(taicpu.op_none(A_NOP)); 329 current_asmdata.CurrAsmList.concat(taicpu.op_none(A_NOP)); 330 331 op := divops[is_signed(right.resultdef), 332 cs_check_overflow in current_settings.localswitches]; 333 if (divider<>NR_NO) then 334 current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(op,numerator,divider,resultreg)) 335 else 336 current_asmdata.CurrAsmList.concat(taicpu.op_reg_const_reg(op,numerator,right.location.value,resultreg)); 337 338 if (nodetype = modn) then 339 begin 340 current_asmdata.getjumplabel(overflowlabel); 341 ai:=taicpu.op_cond_sym(A_Bxx,C_VS,overflowlabel); 342 ai.delayslot_annulled:=true; 343 current_asmdata.CurrAsmList.concat(ai); 344 current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_NOT,resultreg)); 345 cg.a_label(current_asmdata.CurrAsmList,overflowlabel); 346 if (divider<>NR_NO) then 347 current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_SMUL,resultreg,divider,resultreg)) 348 else 349 current_asmdata.CurrAsmList.concat(taicpu.op_reg_const_reg(A_SMUL,resultreg,right.location.value,resultreg)); 350 current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_SUB,numerator,resultreg,resultreg)); 351 end; 352 end; 353 { set result location } 354 location.loc:=LOC_REGISTER; 355 location.register:=resultreg; 356 cg.g_overflowcheck(current_asmdata.CurrAsmList,Location,resultdef); 357 end; 358 {$endif sparc64} 359 360 {***************************************************************************** 361 TSparcSHLRSHRNODE 362 *****************************************************************************} 363 364 {$ifndef SPARC64} TSparcShlShrNode.first_shlshr64bitintnull365 function TSparcShlShrNode.first_shlshr64bitint:TNode; 366 begin 367 { 64bit without constants need a helper } 368 if is_64bit(left.resultdef) and 369 (right.nodetype<>ordconstn) then 370 begin 371 result:=inherited first_shlshr64bitint; 372 exit; 373 end; 374 375 result := nil; 376 end; 377 378 379 procedure tSparcshlshrnode.second_64bit; 380 var 381 hregister,hreg64hi,hreg64lo : tregister; 382 op : topcg; 383 shiftval: aword; 384 const 385 ops: array [boolean] of topcg = (OP_SHR,OP_SHL); 386 begin 387 { 64bit without constants need a helper, and is 388 already replaced in pass1 } 389 if (right.nodetype<>ordconstn) then 390 internalerror(200405301); 391 392 location_reset(location, LOC_REGISTER, def_cgsize(resultdef)); 393 394 { load left operator in a register } 395 hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,true); 396 hreg64hi:=left.location.register64.reghi; 397 hreg64lo:=left.location.register64.reglo; 398 399 shiftval := tordconstnode(right).value.svalue and 63; 400 op := ops[nodetype=shln]; 401 location.register64.reglo:=cg.GetIntRegister(current_asmdata.CurrAsmList,OS_32); 402 location.register64.reghi:=cg.GetIntRegister(current_asmdata.CurrAsmList,OS_32); 403 404 { Emitting "left shl 1" as "left+left" is twice shorter } 405 if (nodetype=shln) and (shiftval=1) then 406 cg64.a_op64_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,OS_64,left.location.register64,left.location.register64,location.register64) 407 else if shiftval > 31 then 408 begin 409 if nodetype = shln then 410 begin 411 cg.a_load_const_reg(current_asmdata.CurrAsmList,OS_32,0,location.register64.reglo); 412 { if shiftval and 31 = 0, it will optimize to MOVE } 413 cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SHL, OS_32, shiftval and 31, hreg64lo, location.register64.reghi); 414 end 415 else 416 begin 417 cg.a_load_const_reg(current_asmdata.CurrAsmList,OS_32,0,location.register64.reghi); 418 cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SHR, OS_32, shiftval and 31, hreg64hi, location.register64.reglo); 419 end; 420 end 421 else 422 begin 423 hregister := cg.getintregister(current_asmdata.CurrAsmList, OS_32); 424 425 cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, op, OS_32, shiftval, hreg64hi, location.register64.reghi); 426 cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, op, OS_32, shiftval, hreg64lo, location.register64.reglo); 427 if shiftval <> 0 then 428 begin 429 if nodetype = shln then 430 begin 431 cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SHR, OS_32, 32-shiftval, hreg64lo, hregister); 432 cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList, OP_OR, OS_32, hregister, location.register64.reghi, location.register64.reghi); 433 end 434 else 435 begin 436 cg.a_op_const_reg_reg(current_asmdata.CurrAsmList, OP_SHL, OS_32, 32-shiftval, hreg64hi, hregister); 437 cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList, OP_OR, OS_32, hregister, location.register64.reglo, location.register64.reglo); 438 end; 439 end; 440 end; 441 end; 442 {$endif SPARC64} 443 444 445 {***************************************************************************** 446 TSPARCNOTNODE 447 *****************************************************************************} 448 449 procedure tsparcnotnode.second_boolean; 450 begin 451 if not handle_locjump then 452 begin 453 secondpass(left); 454 case left.location.loc of 455 LOC_FLAGS : 456 begin 457 location_copy(location,left.location); 458 inverse_flags(location.resflags); 459 end; 460 LOC_REGISTER, LOC_CREGISTER, 461 LOC_REFERENCE, LOC_CREFERENCE, 462 LOC_SUBSETREG, LOC_CSUBSETREG, 463 LOC_SUBSETREF, LOC_CSUBSETREF: 464 begin 465 hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true); 466 {$ifndef SPARC64} 467 if is_64bit(left.resultdef) then 468 current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_ORcc, 469 left.location.register64.reglo,left.location.register64.reghi,NR_G0)) 470 else 471 {$endif SPARC64} 472 current_asmdata.CurrAsmList.concat(taicpu.op_reg_const_reg(A_SUBcc,left.location.register,0,NR_G0)); 473 location_reset(location,LOC_FLAGS,OS_NO); 474 location.resflags.Init(NR_ICC,F_E); 475 end; 476 else 477 internalerror(2003042401); 478 end; 479 end; 480 end; 481 482 483 {***************************************************************************** 484 TSPARCUNARYMINUSNODE 485 *****************************************************************************} 486 487 procedure tsparcunaryminusnode.second_float; 488 begin 489 secondpass(left); 490 hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,true); 491 location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef)); 492 location.register:=cg.getfpuregister(current_asmdata.CurrAsmList,location.size); 493 case location.size of 494 OS_F32: 495 current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FNEGs,left.location.register,location.register)); 496 OS_F64: 497 current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FNEGd,left.location.register,location.register)); 498 OS_F128: 499 current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FNEGq,left.location.register,location.register)); 500 else 501 internalerror(2013030501); 502 end; 503 end; 504 505 begin 506 cmoddivnode:=tSparcmoddivnode; 507 cshlshrnode:=tSparcshlshrnode; 508 cnotnode:=tSparcnotnode; 509 cunaryminusnode:=tsparcunaryminusnode; 510 end. 511