1%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 2% 3% File: PXC:386-LAP.SL 4% Description: Intel i386/i486 PSL Assembler 5% Author: H. Melenk 6% Created: 1-August 1989 7% Modified: 8% Mode: Lisp 9% Status: Open Source: BSD License 10% Package: 11% 12% Redistribution and use in source and binary forms, with or without 13% modification, are permitted provided that the following conditions are met: 14% 15% * Redistributions of source code must retain the relevant copyright 16% notice, this list of conditions and the following disclaimer. 17% * Redistributions in binary form must reproduce the above copyright 18% notice, this list of conditions and the following disclaimer in the 19% documentation and/or other materials provided with the distribution. 20% 21% THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22% AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 23% THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24% PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNERS OR 25% CONTRIBUTORS 26% BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 27% CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 28% SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 29% INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 30% CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 31% ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 32% POSSIBILITY OF SUCH DAMAGE. 33%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 34% 35% Revisions 36% 28-Apr-92 (Herbert Melenk) 37% no relocation for quoted small ID's 38% 39% 3-Apr-90 (Winfried Neun) 40% added support for new car and cdr scheme in modr/m 41 42 43% ------------------------------------------------------------ 44% Fluid declarations: 45% ------------------------------------------------------------ 46 47(fluid '(LabelOffsets* % just the label entries from 48 % BranchAndLabelAList!* 49 % Has the form ( (Label.Offset) ... ) 50 BranchCodeList* % Used in Branch optimization 51 BranchAndLabelAList* % Used in Branch optimization 52 CurrentOffset* % the global value of the current 53 % byte displacement from the starting 54 % point of the code 55 CodeSize* % Current number of bytes generated 56 CodeBase* % Starting address of the module 57 Entries* % list of procedure entries of the 58 % form 59 % ((ProcedureName 60 % ProcedureType 61 % NumberOfArguments) . 62 % CurrentOffset!* ) 63 InstructionChanged* % Boolean - indicates if any 64 % instructions have changed due to 65 % branch optimization 66 InstructionSize* % Contains the size constant Byte 67 % Word or Long during length compute 68 % and assembly of individual instr. 69 ForwardInternalReferences* % a-list of offsets of references to 70 % internal functions, to be patched 71 % by SystemFaslFixup 72 LapReturnValue* % set by SaveEntry to the pointer 73 % to be returned by LAP 74 OperandRegisterNumber* % see EffectiveAddress 75 *WritingFaslFile % FLAG: if true, then we are writing 76 % the resulting code to a file, 77 % otherwise we are depositing it into 78 % memory directly 79 InitOffset* % The offset from the module to the 80 % Initialiization code which is to be 81 % run when the module is loaded 82 *PGWD % FLAG: if true, then mnemonics and 83 % assembled instructions are printed 84 *PWrds % FLAG: if true, then base address 85 % and size of each compiled 86 % procedure are printed as they are 87 % deposited into memory 88 *align16 % align lables to 16 byte 89 % boundaries 90 *lapopt 91 *trlapopt 92)) 93 94(setq *lapopt t) 95 96(fluid '(*immediatequote)) 97(setq *immediatequote nil) 98(fluid '(*testlap)) % diagnostic output from LAP MK 99 100(ds LabelP (X) (atom X)) 101 102(setq *PWrds t) % By default show where the code is 103 % put in memory 104 105% ------------------------------------------------------------ 106% Constant declarations: 107% ------------------------------------------------------------ 108 109(DefConst 110 RELOC_ID_NUMBER 1 111 RELOC_HALFWORD 2 112 RELOC_WORD 1 113 RELOC_INF 3) 114 115(DefConst MaximumShortBranch 127) 116 117% ------------------------------------------------------------ 118% Start of actual code 119% ------------------------------------------------------------ 120 121(de Lap (U) 122(prog (LabelOffsets* LapReturnValue* Entries* temp) 123 (cond ((not *WritingFaslFile) (setq CurrentOffset* 0))) 124 (setq U (&fillframeholes u)) 125 126%%% (setq u (lapopt1 u)) % optimize macros 127 128 (setq U (Pass1Lap U)) % Pass1lap 129 % expand all the LAP macros 130 % Note that this is defined in 131 % PC:PASS-1-LAP.SL 132 133 (setq U (LapoptFrame u)) % optimize frame-register transports 134 (setq U (LapoptPeep u)) % peephole optimizer for 486 code 135 136 (when *WritingFaslFile % round off to fullword address 137 (while (not (eq (wshift (wshift currentOffset* -3) 3) currentOffset*)) 138 (depositbyte 0) )) 139 140 (SETQ U (ReformBranches U)) % process conditional branches 141 (setq U (OptimizeBranches U)) % optimize branches and 142 % calculate offsets and total length 143 144 (when (not *WritingFaslFile) 145 (setq CodeBase* (GTBPS (Quotient (Plus2 CodeSize* 3) 4)))) 146 147 148% Print the machine specific assembly code 149% if the object is an atom then it is a LABEL 150% otherwise it is an instruction 151 152 (cond (*PGWD (foreach X in U do 153 (cond ((LabelP X) (Prin2 X)) (t (PrintF " %p%n" X)))))) 154 155 (foreach Instruction_or_Label in U do 156 (cond 157 ((LabelP Instruction_or_Label) (DepositLabel Instruction_or_Label)) 158 ((equal (first Instruction_or_Label) '*entry) 159 (SaveEntry Instruction_or_Label)) 160 (t (DepositInstruction Instruction_or_Label) ))) 161 162 (DefineEntries) % define entries to whom? 163 164% If you are depositing it into memory the tell the user how much space the 165% code took and where it was loaded. 166% ??? Why is this using the error channel ??? 167 168 (cond ((and (not *WritingFaslFile) *PWrds) 169 (ErrorPrintF "*** %p: base 16#%x, length 10#%d bytes" 170 (foreach X in Entries* collect (first (car X))) 171 CodeBase* CodeSize*))) 172 173 % Do not call MkCODE on LapReturnValue* if it is nil 174 % LapReturnValue* is a fluid variable that got set up to hold 175 % the address of the compiled function when the code was generated 176 (cond 177 (LapReturnValue* 178 (return 179 (MkCODE LapReturnValue*)))))) 180 181 182% CheckForInitCode will scan the Codelist for the first !*Entry 183% testing for !*!*FASL!*!*Initcode!*!*. 184 185(de CheckForInitCode (CodeList) 186 (foreach Instruction in CodeList do 187 (progn (cond ((PairP Instruction) 188 (cond ((equal (car Instruction) '*entry) 189 (cond ((equal (second Instruction) '**Fasl**InitCode**) 190 (return t)))))))))) 191 192% SaveEntry( '(!*entry ProcedureName ProcedureType NumberOfArguments) ) 193% Purpose: To associate with a procedure its location (so other routines can 194% access it 195 196(de saveentry (x) 197 (cond 198 % if X = ( _____ !*!*!*Code!*!*Pointer!*!*!* ... ) 199 ((equal (second x) '***code**pointer***) 200 (setq lapreturnvalue* 201 (if *writingfaslfile currentoffset* (wplus2 codebase* currentoffset*)))) 202 203 % If depositing into memory 204 ((not *writingfaslfile) 205 (setq entries* (cons (cons (rest x) currentoffset*) entries*)) 206 (unless lapreturnvalue* (setq lapreturnvalue* 207 (wplus2 codebase* currentoffset*)))) 208 209 % if X = ( _____ !*!*Fasl!*!*InitCode!*!* ... ) 210 ((equal (second x) '**fasl**initcode**) 211 (setq initoffset* currentoffset*)) 212 213 % if X is an InternalFunction 214 ((flagp (second x) 'internalfunction) 215 (put (second x) 'internalentryoffset currentoffset*)) 216 217 (t (progn 218 (put (second x) 'internalentryoffset currentoffset*) % MK 219 (findidnumber (second x)) 220 (dfprintfasl (list 'putentry (mkquote (second x)) 221 (mkquote (third x)) currentoffset*)))))) 222 223 224% DefineEntries() 225% Purpose: Defines each of the procedures named in the list Entries!* 226% by putting the code pointer into the function cells 227 228(de DefineEntries nil 229 (foreach X in Entries* do 230 (PutD (first (car X)) (second (car X)) 231 (MkCODE (wplus2 CodeBase* (cdr X)))))) 232 233%(de DepositInstruction (X) 234%% This actually dispatches to the procedures to assemble the instructions 235%(prog (Y) 236% (if (eqcar x 'movq) (progn (Depositbyte 16#48) % REX Prefix 237% (rplaca x 'mov)) 238% (when (reg64bitp x) (Depositbyte 16#48))) % REX Prefix 239% (cond ((setq Y (get (first X) 'InstructionDepositFunction)) 240% (Apply Y (list X))) 241% ((setq Y (get (first X) 'InstructionDepositMacro)) 242% (apply2safe y (cdr x))) 243% (t (StdError (BldMsg "Unknown x86_64 instruction %p" X)))))) 244 245 246(de DepositLabel (x) nil) 247 248(fluid '(*testlap REX-Prefix REX? allowextrarexprefix)) 249(de DepositInstruction (X) 250% This actually dispatches to the procedures to assemble the instrucitons 251% version with address calculation test 252(prog (Y offs allowextrarexprefix REX?) 253 254 (when *testlap (prin2 currentoffset*) (tab 10) (print x)) 255 (setq allowextrarexprefix 0) 256 (setq REX-Prefix 16#48) 257 (when *writingfaslfile (setq offs currentoffset*)) 258 (cond ((and (eqcar x 'movq) (not (xmmregp (cadr x))) (not (xmmregp (caddr x)))) 259 (Depositbyte 16#48) % REX Prefix 260 (SETQ REX? (plus codebase!* currentoffset* -1)) 261 (setq allowextrarexprefix 0) 262 (rplaca x 'mov)) 263 ((eqcar x 'addq) (Depositbyte 16#48) % REX Prefix 264 (SETQ REX? (plus codebase!* currentoffset* -1)) 265 (setq allowextrarexprefix 0) 266 (rplaca x 'add)) 267 ((eqcar x 'subq) (Depositbyte 16#48) % REX Prefix 268 (SETQ REX? (plus codebase!* currentoffset* -1)) 269 (setq allowextrarexprefix 0) 270 (rplaca x 'sub)) 271 ((eqcar x 'cmpq) (Depositbyte 16#48) % REX Prefix 272 (SETQ REX? (plus codebase!* currentoffset* -1)) 273 (setq allowextrarexprefix 0) 274 (rplaca x 'cmp)) 275 ((and (pairp x) (flagp (car x) 'norexprefix)) NIL) 276 ((and (pairp x) (flagp (car x) 'onlyupperregrexprefix)) 277 (cond ((upperreg64p x) 278 (setq REX-prefix 16#40) 279 (Depositbyte 16#40) 280 (SETQ REX? (plus codebase!* currentoffset* -1)) 281 (setq allowextrarexprefix 0)))) 282 ((reg64bitp x) (Depositbyte 16#48) % REX Prefix 283 (SETQ REX? (plus codebase!* currentoffset* -1)))) 284 (cond ((setq Y (get (first X) 'InstructionDepositFunction)) 285 (Apply Y (list X))) 286 ((setq Y (get (first X) 'InstructionDepositMacro)) 287 (apply2safe y (cdr x))) 288 (t (StdError (BldMsg "Unknown x86_64 instruction %p" X)))) 289 (when REX? (putbyte REX? 0 REX-Prefix)) %overwrite REX-Prefix 290 291 (when (and (not (eq 0 allowextrarexprefix)) 292 offs (not (equal currentoffset* 293 (plus allowextrarexprefix offs 294 (instructionlength x))))) 295 (StdError (BldMsg "length error with instruction %p: %p" 296 x (difference (difference currentoffset* offs) 297 (instructionlength x))))) 298)) 299 300(flag '(JMP CALL 301 movups movss movupd movsd 302 movhlps movlps movlpd movddup movsldup unpcklps unpcklpd 303 MOVLHPS MOVHPS MOVHPD MOVSHPDUP movaps movapd 304 CVTPI2PS CVTPI2PD CVTSI2SS CVTSI2SD CVTSI2SDQ 305 CVTTPS2PI CVTTPD2PI CVTTSS2SI CVTTSD2SI 306 CVTPS2PI CVTPD2PI CVTSS2SI CVTSD2SI 307 UCOMISS UCOMISD COMISS COMISD 308 MOVW MOVSX MOVZX 309 ADDSD MULsd subsd divsd sqrtsd 310 movd movq ldmxcsr stmxcsr 311 pand pandn por pxor andpd andnpd orpd xorpd 312 ) 'norexprefix) 313 314% Instructions that need a REX.B prefix only if using upper 8 registers 315(flag '(push pop movl movb) 'onlyupperregrexprefix) 316 317(de DepositLabel (x) 318 (when *testlap (prin2 currentoffset*) (tab 10) (print x)) 319 (when (and *writingfaslfile 320 (not (equal currentoffset* (LabelOffset x)))) 321 (StdError (BldMsg "wrong address for label %p: difference = %p" 322 x (difference currentoffset* (LabelOffset x)))))) 323 324 325(CompileTime (progn 326 327(dm DefOpcode (U) 328% 329% (DefOpcode name (parameters) pattern) 330% 331(prog (OpName vars pattern fname) 332 (setq U (rest U)) 333 (setq OpName (pop U)) 334 (setq fname (intern (bldmsg "%w.INSTR" OpName))) 335 (setq OpName (MkQuote OpName)) 336 (setq vars (pop u)) 337 (setq pattern 338 (append u 339 `((t (laperr ',OpName (list .,vars)))))) 340 (setq pattern (cons 'cond pattern)) 341 % (setq u `(lambda ,vars ,pattern)) 342 % (return `(put ,OpName 'InstructionDepositMacro ',u)) 343 (return 344 `(progn 345 (de ,fname ,vars ,pattern) 346 (put ,OpName 'InstructionDepositMacro ',fname))) 347 )) 348 349(dm DefOpLength (U) 350% 351% (DefOpLength name (parameters) pattern) 352% 353(prog (OpName vars pattern fname) 354 (setq U (rest U)) 355 (setq OpName (pop U)) % (quote name) 356 (setq fname (intern (bldmsg "%w.LTH" OpName))) 357 (setq OpName (MkQuote OpName)) % (quote name) 358 (setq vars (pop u)) 359 (setq pattern 360 (append u 361 `((t (laperr ',OpName (list .,vars)))))) 362 (setq pattern (cons 'cond pattern)) 363 % (setq u `(lambda ,vars ,pattern)) 364 % (return `(put ,OpName 'InstructionLengthFunction ',u)) 365 (return 366 `(progn 367 (de ,fname ,vars ,pattern) 368 (put ,OpName 'InstructionLengthFunction ',fname))) 369)) 370 371 372 373)) 374 375 376%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 377% 378% getting the instructions in 379 380 381%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 382% 383% additional test functions 384 385(fluid '(sregs xmmregs)) 386 387(setq sregs '(ES CS SS DS FS GS )) 388 389(setq xmmregs '(xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 390 xmm8 xmm9 xmm10 xmm11 xmm12 xmm13 xmm14 xmm15)) 391 392(de sregp(x) 393 % test for a segment register 394 (and (eqcar x 'reg) 395 (memq (cadr x) sregs))) 396 397(de xmmregp(x) 398 % test for an xmm register 399 (and (eqcar x 'reg) 400 (memq (cadr x) xmmregs))) 401 402(de eaxp(x) 403 (and (eqcar x 'reg) 404 (setq x (cadr x)) 405 (or (eq x 'EAX) (eq x 1)))) 406 407(de memoryp(x) 408 % supports reference to explicit addresses 409 (if (atom x) nil 410 (progn 411 (setq x (car x)) 412 (or 413 (eq x 'label) 414 (eq x '$FLUID) 415 (eq x '$GLOBAL) 416 )))) 417 418(de effap(x) 419 % supports most general memory and register references 420 (or (stringp x) (idp x) (regp x) (xmmregp x) 421 (and (pairp x) 422 (memq (car x) 423 '(indirect displacement indexed $fluid $global 424 fluid global extrareg) )))) 425 426(de stdimmediatep(x) 427 % full size immediate 428 (or (numberp x)(eqcar x 'immediate)(eqcar x 'idloc))) 429 430(de shortlabelp (x)(or (labelp x) (eqcar x 'IMMEDIATE))) 431 432(de adrp (x) (or (atom x) 433 (memq (car x)'(label entry internalentry foreignentry)) 434 (and (eqcar x 'IMMEDIATE) (adrp (cadr x))))) 435 436(de indirectadrp (x) (and (eqcar x 'INDIRECT) 437 (or (adrp (cadr x)) (effap (cadr x))(regp (cadr x))))) 438 439(de smallimmediatep (x) 440 (when (eqcar x 'IMMEDIATE) (setq x (unimmediate x))) 441 (bytep x)) 442 443%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 444% 445% Instruction deposit functions 446 447 448(de laperr(inst par) 449 (StdError (BldMsg "Illegal format: (%p %p)" inst par))) 450 451(de modR/M (op1 op2) 452% The modR/M byte is built from two operands. 453% op1 is always a register (or an absolute code), op2 454% a register or a memory reference 455(prog (OpFn mode base ireg n) 456 457 (when (or (regp op1) (xmmregp op1)) (setq op1 (lsh (reg2int op1 'REXR) 3))) 458 (when (pairp op2) (setq mode (car op2))) 459 460 % case: reg - reg 461 (when (or (regp op2) (xmmregp op2)) 462 (depositbyte (lor 2#11000000 (lor op1 (reg2int op2 'REXB)))) 463 (return nil)) 464 465 % case: reg - (indirect (reg EBP/R13) ) % no format without offset 466 (when (and (eq mode 'indirect) 467 (regp (cadr op2)) 468 (setq base (reg2int (cadr op2) 'REXB)) 469 (equal base 2#101) ) 470 (return (modR/M op1 (list 'displacement (cadr op2) 0)))) 471 472 % case: reg - (indirect (reg ESP/R12) ) 473 (when (and (eq mode 'indirect) 474 (regp (cadr op2)) 475 (setq base (reg2int (cadr op2) 'REXB)) 476 (equal base 2#100) ) 477 (depositbyte (lor 2#00000100 op1)) 478 (depositbyte 2#00100100) % s-i-b byte 479 (return nil)) 480 481 % case: reg - (indirect reg) non ESP/EBP 482 (when (and (eq mode 'indirect) 483 (regp (cadr op2))) 484 % no zero displacement for reg EBP: 485 (setq base (reg2int (cadr op2) 'REXB)) 486 (when (or (and (equal base 2#100) (not (upperreg64p (cadr op2)))) (equal base 2#101)) 487 (modR/Merror op2)) 488 (depositbyte (lor 2#00000000 (lor op1 base))) 489 (return nil)) 490 491 % case: reg - (displacement (reg ESP/R12) const) 492 (when (and (eq mode 'displacement) 493 (regp (cadr op2)) 494 (numberp (caddr op2)) 495 (setq base (reg2int (cadr op2) 'REXB)) 496 (equal base 2#100) ) 497 (return 498 (if (bytep (caddr op2)) % 8 bit displacement 499 (progn 500 (depositbyte (lor 2#01000100 op1)) 501 (depositbyte 2#00100100) % s-i-b byte 502 (depositbyte (land 255 (caddr op2)))) 503 (progn 504 (depositbyte (lor 2#10000100 op1 base)) 505 (depositbyte 2#00100100) % s-i-b byte 506 (deposit32bitword (caddr op2) ))))) 507 508 % case: reg - (displacement reg const), non ESP 509 (when (and (eq mode 'displacement) 510 (regp (cadr op2)) 511 (numberp (caddr op2))) 512 (setq base (reg2int (cadr op2) 'REXB)) 513 (return 514 (if (bytep (caddr op2)) % 8 bit displacement 515 (progn 516 (depositbyte (lor 2#01000000 (lor op1 base))) 517 (depositbyte (land 255 (caddr op2)))) 518 (progn 519 (depositbyte (lor 2#10000000 (lor op1 base))) 520 (deposit32bitword (int2sys (caddr op2) )))))) 521 522 % case: reg - (indexed ....) 523 (when (eq mode 'indexed) 524 (return (sibbyte-for-indexed (lor 2#00000100 op1) op2))) 525 526 527 % all other cases: reg - absolute 32 bit displacement 528 (depositbyte (lor 2#00000100 op1 )) 529 (depositbyte 2#00100101 ) % AMD64 no RIP relative addressing 530 (depositextension op2))) 531 532 533(de sibbyte-for-indexed(modr/m op2) 534 (prog(base index factor n) 535 (setq base (caddr op2) index (cadr op2)) 536 (setq factor 1) 537 (when (eqcar index 'times) 538 (setq factor (caddr index)) 539 (setq index (cadr index))) 540 (setq factor (atsoc factor 541 '((1 . 0)(2 . 2#01000000)(4 . 2#10000000)(8 . 2#11000000)))) 542 (when (null factor) (modR/Merror op2)) 543 (setq factor (cdr factor)) 544 (cond 545 ((eqcar base 'displacement) 546 (when (or (not (numberp (setq n (caddr base)))) 547 (not (regp (cadr base)))) (modR/Merror op2)) 548 (setq base (reg2int (cadr base) 'REXB)) 549 (when (or (not (equal n 0))(eq base 2#101)) 550 (prin2t "****** Fall noch nicht vorgesehen") 551 (modR/Merror op2)) 552 (depositbyte modr/m) 553 (depositbyte(lor factor (lor (lsh (reg2int index 'REXX) 3) base)))) 554 ((labelp base) 555 (depositbyte modr/m) 556 (depositbyte(lor factor (lor (lsh (reg2int index 'REXX) 3) 2#101 ))) 557 (depositextension base)) 558 (t (modR/Merror op2))))) 559 560(de modR/Merror(op2) 561 (stderror (bldmsg "illegal x86_64 addressing mode %w" op2))) 562 563(de depositextension(op2) 564 % generate a relocated fullword extension 565 (prog (OfFn) 566 (when (atom op2) (return (depositwordexpression op2))) 567 (when (setq OfFn (get (car op2) 'OperandDepositFunction)) 568 (return (apply OfFn (list op2)))) 569 (depositwordexpression op2))) 570 571(de lthmodR/M (op1 op2) 572 % calculate the length of the address part by modR/M 573 (prog (OpFn mode base ireg n) 574 575 % case: reg - reg 576 (when (regp op2) (return 1)) 577 (when (pairp op2) (setq mode (car op2))) 578 579 % case: reg - (indirect (reg ESP/R12) ) 580 (when (and (eq mode 'indirect) 581 (regp (cadr op2)) 582 (setq base (reg2int (cadr op2) 'REXB)) 583 (equal base 2#100) ) 584 (return 2)) 585 586 % case: reg - (indirect (reg EBP/R13) ) % no format without offset 587 (when (and (eq mode 'indirect) 588 (regp (cadr op2)) 589 (setq base (reg2int (cadr op2) 'REXB)) 590 (equal base 2#101) ) 591 (return (lthmodR/M op1 (list 'displacement (cadr op2) 0)))) 592 593 % case: reg - (indirect reg) non ESP/EBP 594 (when (and (eq mode 'indirect) 595 (regp (cadr op2))) 596 (return 1)) 597 598 % case: reg - (displacement (reg ESP/R12) const) 599 (when (and (eq mode 'displacement) 600 (regp (cadr op2)) 601 (numberp (caddr op2)) 602 (setq base (reg2int (cadr op2) 'REXB)) 603 (equal base 2#100) ) 604 (if (bytep (caddr op2) ) % 8 bit displacement 605 (return 3) 606 (return 6))) 607 608 % case: reg - (displacement reg const), non ESP 609 (when (and (eq mode 'displacement) 610 (regp (cadr op2)) 611 (numberp (caddr op2))) 612 (return (if (bytep (caddr op2)) 2 5))) 613 614 % case: (indexed reg (displacement reg 0)) 615 (when (eq mode 'indexed) 616 (return (add1 (lth-sibbyte-for-indexed op2)))) 617 618 % all other cases: reg - absolute 32 bit displacement 619 (return 6))) 620 621 622(de lth-sibbyte-for-indexed(op2) 623 (prog(base index factor offset) 624 (setq base (caddr op2) index (cadr op2)) 625 (cond 626 ((eqcar base 'displacement) 627 (setq offset (caddr base)) 628 (when (or (not (equal offset 0)) 629 (not (regp (cadr base)))) (modR/Merror op2)) 630 (setq base (reg2int (cadr base) 'REXB)) 631 (when (eq base 2#101) 632 (prin2t "****** Fall noch nicht vorgesehen") 633 (modR/Merror op2)) 634 (return 1)) 635 ((labelp base) (return 5)) 636 (t (modR/Merror op2))))) 637 638% Procedures to compute specific OperandRegisterNumber!* 639% Each of the cases returns the Addrssing MODE 640% and sets OperandRegisterNumber!* as a side effect 641 642(fluid '(numericRegisterNames REX-Prefix)) 643 644(setq numericRegisterNames [nil EAX EBX ECX EDX EBP]) 645 646(put 'REXR 'prefixcode 2#100) 647(put 'REXX 'prefixcode 2#010) 648(put 'REXB 'prefixcode 2#001) 649 650(de reg2int (u prefixbit) 651 % calculate binary number for register 652 (prog (r) (setq r u) 653 % strip off tag 'reg 654 (cond ((xmmregp r) (setq r (cadr r))) 655 (t (cond ((eqcar r 'reg)(setq r (cadr r)))) 656 %convert a LISP-register into an x86_64 register 657 (if (numberp r) (setq r (getv numericRegisterNames r))))) 658 (setq r (get r 'registercode)) 659 (when (and r (wgreaterp r 7)) (setq r (wand r 7)) 660 (setq REX-Prefix 661 (wor REX-Prefix (get prefixbit 'prefixcode)))) 662 (if r (return r) 663 (stderror (bldmsg "unknown register %w" u))))) 664 665(deflist '((EAX 0) (ECX 1) (EDX 2) (EBX 3) 666 (rAX 0) (rCX 1) (rDX 2) (rBX 3) 667 (ESP 4) (EBP 5) (ESI 6) (EDI 7) 668 (rSP 4) (rBP 5) (rSI 6) (rDI 7) 669 (st 4) % LISP stack register 670 (T1 7) % EDI 671 (T2 6) % ESI 672 (T3 8) 673 (T4 9) 674 (heaplast 10) (heaptrapbound 11) 675 (bndstkptr 12) (bndstkupperbound 13) 676 (nil 15) (bndstklowerbound 14) 677 678 (r8 8) (r9 9) (r10 10) (r11 11) 679 (r12 12) (r13 13) (r14 14) (r15 15) 680 (xmm0 0) (xmm1 1) (xmm2 2) (xmm3 3) 681 (xmm4 4) (xmm5 5) (xmm6 6) (xmm7 7) 682 (xmm8 8) (xmm9 9) (xmm10 10) (xmm11 11) 683 (xmm12 12) (xmm13 13) (xmm14 14) (xmm15 15) 684 685 % byte and word registers 686 (AL 0) (CL 1) 687 (AX 0) (CX 1) 688 % segment registers 689 (ES 0) (CS 1) (SS 2) (DS 3)(FS 4)(GS 5) 690 ) 'registercode) 691 692(put 'NIL 'RegisterCode 15) 693 694(de bytep(n) 695 (when (and (numberp n) (lessp n 128) (greaterp n -128)) 696 (land n 255))) 697 698(de halfwordp(n) 699 (when (and (numberp n) (lessp n 32768) (greaterp n -32768)) 700 (land n 65535))) 701 702(de unimmediate(u) 703 (if (eqcar u 'immediate) (cadr u) u)) 704 705%------------------------------------------------------------------------ 706% (displacement (reg 5) ...) has to be prefixed in order to address 707% the DS segment rther than the SS segment 708(de indexed-reg-5-p(op) 709 (and (pairp op) NIL % not useful in 64 bit mode 710 (or (eq (car op) 'indexed) 711 (eq (car op) 'displacement) 712 (eq (car op) 'indirect)) 713 (equal (cadr op) '(reg 5))) ) 714 715(de lth-reg-5-prefix(op) 716 (if (indexed-reg-5-p op) 1 0)) 717 718(de reg-5-prefix(op) 719 (when (indexed-reg-5-p op) 720 (depositbyte 16#3e) )) % DS segment override prefix 721 722%------------------------------------------------------------------------ 723% special format for EAX-instructions 724 725(de OP-mem-eax (code op1 op2) 726 (when (eqcar op1 'reg)(setq op1 op2)) 727 (depositbyte (car code)) 728 (depositextension (unimmediate op1))) 729 730(de LTH-mem-eax (code op1 op2) 5) 731 732%------------------------------------------------------------------------ 733% code is one byte, op1 is a register, op2 is an effective address 734(de OP-reg-effa (code op1 op2) 735 (reg-5-prefix op2) 736 (depositbyte (car code)) 737 (modR/M op1 op2)) 738 739(de LTH-reg-effa (code op1 op2) 740 (plus 1 (lth-reg-5-prefix op2) (lthmodR/M op1 op2))) 741 742%------------------------------------------------------------------------ 743% op1 is an immediate, op2 is an effective address which patches into 744% the second byte of the code 745(de OP-imm-effa (code op1 op2) 746 (reg-5-prefix op2) 747 (depositbyte (car code)) 748 (modR/M (cadr code) op2) 749 (depositextension (unimmediate op1))) 750 751(de lth-imm-effa (code op1 op2) 752 (plus 5 (lth-reg-5-prefix op2) (lthmodR/M (cadr code) op2))) 753 754(de OP-imm8-effa (code op1 op2) 755 (reg-5-prefix op2) 756 (depositbyte (car code)) 757 (modR/M (cadr code) op2) 758 (depositbyte (unimmediate op1))) 759(de lth-imm8-effa (code op1 op2) 760 (plus 2 (lth-reg-5-prefix op2) (lthmodR/M (cadr code) op2))) 761 762%------------------------------------------------------------------------ 763% code is two bytes (prefix+opcode) with an optional rex prefix in between, 764% op1 is a register, op2 is an effective address 765(de OP-reg-effa-2 (code op1 op2) 766 (prog (need_rex) 767 (setq need_rex (or (upperreg64p op1) (upperreg64p op2))) 768 (depositbyte (car code)) 769 (setq code (cdr code)) 770 % check for optional rex byte 771 (if (eqcar code 'rex) 772 (progn 773 (setq code (cdr code)) % skip symbolic "rex" byte in inst. def. 774 (if need_rex % deposit necessary rex byte 775 (progn 776 (setq REX-prefix 16#48) 777 (depositbyte 16#48) 778 (SETQ REX? (plus codebase!* currentoffset* -1)) 779 (setq allowextrarexprefix (if (eqcar code 'rex) 0 1) ))) 780 )) 781 (depositbyte (car code))) 782 (modR/M op1 op2)) 783 784(de LTH-reg-effa-2 (code op1 op2) 785 (prog (codelength) 786 (setq codelength (length code)) 787 (if (and (memq 'rex code) (not (upperreg64p op1)) (not (upperreg64p op2))) 788 % optional rex byte not needed 789 (setq codelength (sub1 codelength))) 790 (return (plus codelength (lthmodR/M op1 op2))))) 791 792%------------------------------------------------------------------------ 793% code is two bytes with up to three prefixes, op1 is a register, op2 is an effective address 794(de OP-xmmreg-effa (code op1 op2) 795 (prog (need_rex) 796 (setq need_rex (or (upperreg64p op1) (upperreg64p op2))) 797 % deposit prefix bytes 798 (while (and (cddr code) (not (eqcar code 'rex))) 799 (depositbyte (car code)) 800 (setq code (cdr code))) 801 (if (or need_rex (eqcar code 'rex)) 802 (progn 803 (setq REX-prefix (if (eqcar code 'rex) 16#48 16#40)) 804 (depositbyte REX-prefix) 805 (SETQ REX? (plus codebase!* currentoffset* -1)) 806 (setq allowextrarexprefix (if (eqcar code 'rex) 0 1) ) 807 (if (eqcar code 'rex) (setq code (cdr code))))) 808 (depositbyte (car code))) 809 (depositbyte (cadr code)) 810 (modR/M op1 op2)) 811 812(de LTH-xmmreg-effa (code op1 op2) 813 (prog (codelength) 814 (setq codelength (length code)) 815 (if (and (memq 'rex code) (not (upperreg64p op1)) (not (upperreg64p op2))) 816 % optional rex byte not needed 817 (setq codelength (sub1 codelength))) 818 (return (plus codelength (lthmodR/M op1 op2))))) 819 820%----------------------------------------------------------------------- 821% format: fixed modR/M byte 822(de OP-EFFA (code op1) (OP-reg-effa code (cadr code) op1)) 823(de lth-EFFA (code op1) (LTH-reg-effa code (cadr code) op1)) 824 825(de OP2-effa(code op1) 826 (depositbyte (car code)) 827 (op-EFFA (cdr code) op1)) 828 829(de lth2-EFFA(code op1) (add1 (lth-effa(cdr code) op1))) 830 831%----------------------------------------------------------------------- 832% immediate to EAX 833(de OP-imm-EAX (code op1 op2-is-alway-EAX) 834 (depositbyte (car code)) (depositextension (unimmediate op1))) 835 836(de LTH-imm-EAX (code op1 op2-is-alway-EAX) 5) 837 838%----------------------------------------------------------------------- 839% INT with parameter 840(de OP-INT (code op1) 841 (depositbyte (car code)) (depositbyte (unimmediate op1))) 842 843(de LTH-INT (code op1) 2) 844 845%--------------------------------------------------------------------- 846% immediate to reg 847% code is one byte + ModR?m byte, op1 the immediate, op2 the reg 848% sometimes there is no ModR/M byte; then the reg is placed in the opcode 849% (adc 17 (reg ABX)) 850(de OP-imm-reg (code op1 op2) 851 (prog(n c1 c2) 852 (when (cdr code) (depositbyte (car code))(setq code (cdr code))) 853 (depositbyte (lor (car code) (reg2int op2 'REXB))) 854 (depositextension (unimmediate op1)))) 855 856(de LTH-imm-reg (code op1 op2) (if (cdr code) 6 5)) 857 858(de OP-imm8-reg (code op1 op2) 859 (prog(n c1 c2) 860 (when (cdr code) (depositbyte (car code))(setq code (cdr code))) 861 (depositbyte (lor (car code) (reg2int op2 'REXB))) 862 (depositbyte (bytep op1)))) 863 864(de LTH-imm8-reg (code op1 op2) (if (cdr code) 3 2)) 865 866 867%--------------------------------------------------------------------- 868% absolute n-byte instruction 869(de OP-byte (code) 870 (foreach x in code do (depositbyte x))) 871(de lth-byte (code) (length code)) 872 873%--------------------------------------------------------------------- 874% push/pop with register: code is one byte modified with reg number 875(de OP-Push-Reg(code op1) (depositbyte (lor (car code) (reg2int op1 'REXB)))) 876(de LTH-Push-Reg(code op1) 1) 877 878%--------------------------------------------------------------------- 879% jump to absolute address 880% 386 has only relative jumps 881(de OP-Jump (code op1) 882 (prog(n) 883 (depositbyte (car code)) 884 (when (cdr code) (depositbyte (cadr code))) 885 (setq op1 (saniere-Sprungziel op1)) 886 (setq n(MakeExpressionrelative op1 4)) % offset wrt next instr 887 (depositliteralword n) 888 (when *testlap (tab 15)(prin2 "-> ") 889 (prin2 n) (prin2 " rel = ") 890 (prin2 (plus currentoffset* n))(prin2t " abs")))) 891 892(de lth-jump (code op1) (if (cdr code) 6 5)) 893 894%jump short (8-bit displacement) 895(de OP-JUMP-SHORT (code op1) 896 (prog(n a) 897 (depositbyte (car code)) 898 (setq op1 (saniere-Sprungziel op1)) 899 (setq n (MakeExpressionRelative op1 1)) % offset wrt next instr 900 (when (not (bytep n)) (stderror "distance too long for short jump")) 901 (depositbyte (bytep n)) 902 (when *testlap (tab 15)(prin2 "-> ") 903 (prin2 n) (prin2 " rel = ") 904 (prin2 (plus currentoffset* n))(prin2t " abs")))) 905 906(de lth-JUMP-SHORT (code op1) 2) 907 908% indirect jump to effective address 909(de OP-JUMP-EFFA (code op1) 910 % a tag "indirect" contained already in the operation if not 911 % explicit reg reference 912 (when (and (eqcar op1 'indirect) (not (regp (cadr op1)))) 913 (setq op1 (cadr op1))) 914 % need REX byte if upper 8 register 915 (if (upperreg64p op1) 916 (progn 917 (setq REX-prefix 16#40) 918 (depositbyte REX-prefix) 919 (SETQ REX? (plus codebase!* currentoffset* -1)) 920 (setq allowextrarexprefix 1))) 921 (op-reg-effa code (cadr code) op1)) 922 923(de LTH-JUMP-EFFA (code op1) 924 (when (and (eqcar op1 'indirect) (not (regp (cadr op1)))) 925 (setq op1 (cadr op1))) 926 (if (upperreg64p op1) (add1 (lth-reg-effa code (cadr code) op1)) 927 (lth-reg-effa code (cadr code) op1))) 928 929 930(commentoutcode 931%jump full size (32 bit displacement) 932(de OP-JUMP-LONG(code op1) 933 (depositbyte (car code)) 934 (setq op1 (saniere-Sprungziel op1)) 935 (when (cdr code) (depositbyte (cadr code))) %conditional jumps 936 (depositExtension op1)) 937(de lth-JUMP-LONG(code op1) (if (cdr code) 6 5)) 938) 939 940(de saniere-Sprungziel(l) 941 (cond ((atom l) l) 942 ((eqcar l 'IMMEDIATE) (saniere-Sprungziel (cadr l))) 943 ((eqcar l 'LABEL) (saniere-Sprungziel (cadr l))) 944 (T l))) 945% RET n 946(de OP-RET-n (code op1) 947 (depositbyte (car code)) 948 (deposithalfword (halfwordp (unimmediate op1)))) 949(de lth-RET-n (code op1) 3) 950 951%------------------------------------------------------------- 952%enter 953(de OP-enter (code op1) 954 (depositbyte (car code)) 955 (deposithalfword (unimmediate op1)) 956 (depositbyte 0)) % support for level 0 only 957(de lth-enter (code op1) 4) 958 959%------------------------------------------------------------- 960% PUSH imm32 961(de OP-imm (code op1) 962 (depositbyte (car code)) 963 (depositextension (unimmediate op1))) 964(de lth-imm (code op1) 5) 965 966 967%------------------------------------------------------------- 968% shift with one parameter 969(de op-shift (code dummy op1) 970 (depositbyte (car code)) 971 (modr/m (cadr code) op1)) 972(de lth-shift (code op1) (add1 (lthmodR/M (cadr code) op1))) 973 974%shift with immediate amount 975(de op-shiftimm(code op2 op1) 976 (depositbyte (car code)) 977 (depositbyte (lor 2#11000000 (lor (cadr code) (reg2int op1 'REXB)))) 978 (depositbyte (bytep (unimmediate op2)))) 979(de lth-shiftimm(code op1 op2) 3) 980 981% double shifts 982(de op-dshift (code dummy op1) 983 (depositbyte (cadr code)) 984 (modR/M op1 0)) 985(de lth-dshift (code op1) (plus 2 (lthmodR/M op1 0))) 986 987(de op-dshiftimm (code op2 op1) 988 (depositbyte (cadr code)) 989 (modR/M op1 0) 990 (depositbyte (bytep (unimmediate op2)))) 991(de lth-dshiftimm (code op1) (plus 3 (lthmodR/M op1 0))) 992 993%------------------------------------------------------------- 994% MUL and DIV 995(de OP-MUL (code op1) (op-reg-effa code (cadr code) op1)) 996(de lth-mul (code op1) (lth-reg-effa code (cadr code) op1)) 997 998% special: IMUL 999(de OP-IMUL (code op1 op2) 1000 (depositbyte (car code)) 1001 (depositbyte (cadr code)) 1002 (modR/M op1 op2)) 1003(de lth-imul (code op1 op2) 3) 1004 1005% ------------------------------------------------------------ 1006% standard operand tags 1007% ------------------------------------------------------------ 1008 1009 1010 1011(de DepositFluid (X) 1012 (DepositValueCellLocation (second X))) % Defined in System-Faslin.Red 1013 1014(de DepositExtraReg (X) 1015 (DepositExtraRegLocation (second X))) % Defined in System-Faslin.Red 1016 1017(de DepositEntry (X) 1018 (DepositFunctionCellLocation (second X))) % Defined in System-Faslin.Red 1019 1020(de depositforeignentry (x) 1021 (depositfunctioncelllocation (second x))) 1022 1023(put 'fluid 'OperandDepositFunction (function DepositFluid)) 1024(put '$fluid 'OperandDepositFunction (function DepositFluid)) 1025(put 'global 'OperandDepositFunction (function DepositFluid)) 1026(put '$global 'OperandDepositFunction (function DepositFluid)) 1027(put 'ExtraReg 'OperandDepositFunction (function DepositExtraReg)) 1028(put 'entry 'OperandDepositFunction (function DepositEntry)) 1029(put 'foreignentry 'operanddepositfunction (function depositforeignentry)) 1030 1031 1032 1033% ------------------------------------------------------------ 1034% Deposit instructions for Pseudo ops 1035% ------------------------------------------------------------ 1036 1037(de DepositWordBlock (X) % (FULLWORD xxx xxx ... xxx) 1038 (foreach Y in (cdr X) do (DepositQuadWordExpression Y))) 1039 1040(de DepositHalfWordBlock (X) % (HALFWORD xxx xxx ... xxx) 1041 (foreach Y in (cdr X) do (DepositHalfWordExpression Y))) 1042 1043(de DepositByteBlock (X) % (BYTE xxx xxx ... xxx) 1044(prog (Z) 1045 (setq Z 0) 1046 (foreach Y in (cdr X) do 1047 (progn (DepositByte Y) 1048 (setq Z (LXOR Z 1)))) % toggle Z 1049 (cond ((not (equal Z 0)) (DepositByte 0))))) % go to halfword boundary 1050 1051 1052% Deposit a string with a trailing ZERO byte 1053 1054(de DepositString (X) % (STRING "xxxxxx") 1055 (prog nil 1056 (setq X (second X)) 1057 (for (from I 0 (Size X) 1) (do (DepositByte (Indx X I)))) 1058 (DepositByte 0) 1059 (while (not (eq 0 (remainder CurrentOffset!* 8))) 1060 (depositbyte 0)))) 1061% align to word boundary 1062 1063(de DepositFloat (X) % this will not work in cross-assembly 1064(progn (setq X (FltInf (second X))) 1065 (DepositWord (FloatlowOrder X)) )) 1066 1067(put 'fullword 'InstructionDepositFunction 'DepositWordBlock) 1068(put 'halfword 'InstructionDepositFunction 'DepositHalfWordBlock) 1069(put 'byte 'InstructionDepositFunction 'DepositByteBlock) 1070(put 'string 'InstructionDepositFunction 'DepositString) 1071(put 'float 'InstructionDepositFunction 'DepositFloat) 1072 1073% Auxiliary functions for computing instruction bit patterns 1074 1075(de MakeExpressionRelative (Exp OffsetFromHere) 1076(prog (X Y Z) 1077 (cond ((EqCar Exp 'InternalEntry) 1078 (return (MakeInternalEntryRelative (second Exp) OffsetFromHere)))) 1079 1080 (cond ((not (LabelP Exp)) 1081 (return (StdError "Only labels can be relative")))) 1082 1083 (setq X (plus CurrentOffset* OffsetFromHere)) 1084 (setq Y (LabelOffset Exp)) 1085 (return (Difference Y X)))) 1086 1087 1088(de makeinternalentryrelative (nam offsetfromhere) 1089 (prog (offset) 1090 (setq offset (atsoc nam labeloffsets*)) 1091 (setq offset (if offset 1092 (cdr offset) 1093 (get nam 'internalentryoffset))) 1094 (return (if offset 1095 (progn 1096 (setq offset 1097 (difference offset 1098 (plus2 currentoffset* offsetfromhere)))) 1099 (progn 1100 (setq forwardinternalreferences* 1101 (cons (cons currentoffset* nam) 1102 forwardinternalreferences*)) 1103 0))))) 1104 % will be fixed in SystemFasl... 1105 1106 1107(de labeloffset (l) 1108 (let (offset) 1109 (cond 1110 ((codep l) (if *writingfaslfile 1111 (inf l) 1112 (wdifference (inf l) codebase*))) 1113 ((setq offset (atsoc l labeloffsets*)) (cdr offset)) 1114 (t (stderror (bldmsg "Unknown label %r" l))) 1115 ))) 1116 1117 1118 1119% ------------------------------------------------------------ 1120% Branch analysis (conditional jumps) 1121% ------------------------------------------------------------ 1122 1123(fluid '(ConditionalJumps*)) 1124(setq ConditionalJumps* 1125 '((jo . jno) (jno . jo) 1126 (jp . jnp) (jnp . jp) 1127 (jz . jnz) (jnz . jz) 1128 (je . jne) (jne . je) 1129 (jb . jae) (jae . jb) (jbe . ja) (ja . jbe) 1130 (jnb . jnae) (jnae . jnb) (jnbe . jna) (jna . jnbe) 1131 (jl . jge) (jge . jl) (jle . jg) (jg . jle) 1132 (jnl . jnge) (jnge . jnl) (jnle . jng) (jng . jnle) 1133)) 1134 1135(de reformBranches (code) 1136 (prog (rcode instr bottom x y z) 1137 (while code 1138 (setq instr (pop code)) 1139 % replace simple negation jumps 1140 % case: (jne lab)(j anywhere) lab 1141 % -> (je anywhere) 1142 (when (and 1143 (pairp instr) 1144 (setq x (atsoc (car instr) ConditionalJumps*)) 1145 (setq y (saniere-Sprungziel (cadr instr))) 1146 (eqcar (car code) 'JMP) 1147 (cdr code) 1148 (equal (cadr code) y)) 1149 (setq instr (cons (cdr x) (cdr (pop code))))) 1150 % replace indirect conditional jumps (not avail on 386) 1151 (when (and 1152 (pairp instr) 1153 (atsoc (car instr) ConditionalJumps*) 1154 (not (atom (saniere-Sprungziel (cadr instr))))) 1155 (setq x (gensym)) 1156 (push (cons 'JMP (cdr instr)) bottom) 1157 (push x bottom) 1158 (setq instr (list (car instr) x)) ) 1159 (push instr rcode)) 1160 (return (nconc (reversip rcode) bottom)))) 1161 1162% ------------------------------------------------------------ 1163% Branch optimization (in favour of short jumps) 1164% ------------------------------------------------------------ 1165 1166(deflist '( 1167 (JMP JMPL) (JO JOL) (JB JBL)(JNAE JNAEL)(JNB JNBL) 1168 (JAE JAEL) (JE JEL) (JZ JZL)(JNE JNEL)(JNZ JNZL) 1169 (JBE JBEL)(JNA JNAL)(JNBE JNBEL)(JA JAL)(JS JSL) 1170 (JNS JNSL)(JP JPL)(JPE JPEL)(JNP JNPL)(JPO JPOL) 1171 (JL JLL)(JNGE JNGEL)(JNL JNLL)(JGE JGEL)(JLE JLEL) 1172 (JNG JNGL)(JNLE JNLEL)(JG JGL) 1173) 'WordBranch) 1174 1175(de GeneralBranchInstructionP (i) (get i 'WordBranch)) 1176(de LocalLabelp (l) (atom (saniere-sprungziel l))) 1177 1178% ProcessInitCode CodeList 1179% Purpose: Take a code list which has already been expanded by Pass1Lap 1180% and replaces all generic branches with word mode branches. 1181% Returns: a new code list 1182 1183(de ProcessInitCode (CodeList) 1184(prog (BranchAndLabelAList*) % find branches, labels, and entries 1185 (setq CodeList (BuildInitCodeOffsetTable CodeList)) 1186 (setq LabelOffsets* (DeleteAllButLabels BranchAndLabelAList*)) 1187 (return CodeList))) 1188 1189 1190% OptimizeBranches BranchCodeList!*; 1191% Purpose: Take a code list which has already been expanded by Pass1Lap 1192% and try to optimize the branches 1193% Returns: a new code list 1194 1195(de OptimizeBranches (u) (OptimizeBranches0 u *WritingFaslFile)) 1196 1197(de OptimizeBranches0 (u m) 1198(prog (BranchAndLabelAList* InstructionChanged* q w) 1199 (setq BranchCodeList* (if m (alignCode u) u)) 1200 (BuildOffsetTable) % find branches, labels, and entries 1201 (setq InstructionChanged* nil) 1202 (FindLongBranches) 1203 (when (and m InstructionChanged*) 1204 % give up aligned code 1205 (return (OptimizeBranches0 u nil))) 1206 (while InstructionChanged* 1207 (setq InstructionChanged* nil) 1208 (FindLongBranches)) 1209 (setq LabelOffsets* (DeleteAllButLabels BranchAndLabelAList*)) 1210 (return BranchCodeList*))) 1211 1212(de &make-nop(n) 1213 % make n bytes of nop instructions 1214 (cond ((wleq n 0) nil) 1215 ((eq n 1)'((nop))) 1216 ((eq n 2)'((nop) (nop))) 1217 ((eq n 3)'((mov (reg t1)(reg t1)))) 1218 ((eq n 4)'((lea (displacement(reg t1)0) (reg t1)))) 1219 (t (append (&make-nop 3)(&make-nop (difference n 3)))) )) 1220 1221(de alignCode(u) 1222 (if (&smember 'fastapply u) u (alignCode1 u))) 1223 1224(de alignCode1(u) 1225 (let(rcode w (a currentoffset*) l x y z q s nops) 1226 (while u 1227 (setq w (pop u)) 1228 (setq nops 0) 1229 (cond 1230 1231 % initial start: sync. entry point 1232 ((null rcode) 1233 (setq x a) 1234 (setq y u q w) 1235 (setq s (eqcar w '*entry)) 1236 (while y 1237 (when (pairp q)(setq x (iplus2 x (instructionlength q)))) 1238 (if (eqcar q '*entry) (setq y nil) (setq q (pop y)))) 1239 (setq x (wand x 15)) 1240 (when (not (eq x 0)) (setq nops (idifference 16 x))) 1241 ) 1242 1243 % entry: executable code starts 1244 ((eqcar w '*entry)(setq s t)) 1245 1246 % fullword: executable code terminated 1247 ((eqcar w 'fullword)(setq s nil)) 1248 1249 % label under *align16 1250 ((and s (atom w) *align16) 1251 % next instruction should begin on cache line 1252 (setq x (wand a 15)) 1253 (when (not (eq x 0)) 1254 (setq nops(wdifference 16 x)))) 1255 1256 % label in standard mode 1257 ((and s (atom w) u (pairp (car u))) 1258 % next instruction should not split cache lines 1259 (setq x (iplus2 (wand a 15) (instructionlength (car u)))) 1260 (when (not (igreaterp x 16)) 1261 (setq nops (idifference 16 (wand a 15)))) 1262 ) 1263 1264 % call under *align16 1265 ((and *align16 (eqcar w 'call)) 1266 % put call exactly at the end of cache line 1267 (setq x (wand (iplus2 a (instructionlength w)) 15)) 1268 (when (not (eq x 0)) (setq nops (idifference 16 x))) 1269 ) 1270 1271 % call 1272 ((and (eqcar w 'call) u (pairp (car u))) 1273 % following instruction should not split over cache line 1274 (setq x (wand (iplus2 a (instructionlength w)) 15)) 1275 (when (igreaterp (iplus2 x (instructionlength (car u)))16) 1276 (setq nops (idifference 16 x))) 1277 ) 1278 ) 1279 (when (and (igreaterp nops 0) 1280 (ilessp nops 9)) % not too many 1281 (foreach q in (&make-nop nops) do (push q rcode)) 1282 (setq a (iplus2 a nops))) 1283 (when (pairp w)(setq a (iplus2 a (InstructionLength w)))) 1284 (push w rcode) 1285 ) 1286 (while rcode 1287 (when (not (eq (setq w (pop rcode)) '!%temp-label)) 1288 (push w u))) 1289 u 1290)) 1291 1292(de DeleteAllButLabels (X) 1293(prog (Y) 1294 (while (not (LabelP (car (first X)))) (setq X (cdr X))) 1295 1296 (cond ((null X) (return nil))) 1297 (setq Y X) 1298 (while (cdr Y) 1299 (cond ((LabelP (car (second Y))) 1300 (setq Y (cdr Y))) (t 1301 1302 (Rplacd Y (cddr Y))))) 1303 (return X))) 1304 1305 1306% BuildInitCodeOffsetTable CodeList; 1307% Purpose: generate a association list of labels, procedure entries 1308% Input is: 1309 % labels: label 1310 % instructions: ( opcode [operands]* ) 1311 % entry points: ( !*entry procedurename proceduretype numberofargs) 1312% The Alist has the form: 1313 % labels: ( label . CurrentOffset) 1314 % procedures: ( procedurename . CurrentOffset) 1315 % branch instrs ( (opcode label) . CurrentOffset) 1316% otherwise, the CurrentOffset is advanced by the length of the instruction 1317 1318(de BuildInitCodeOffsetTable (CodeList) 1319(prog (AList Instr) 1320 (setq CodeSize* CurrentOffset*) 1321 (foreach X in CodeList do 1322 (progn (cond ((LabelP X) 1323 (setq AList (cons (cons X CodeSize*) AList))) 1324 ((equal (setq Instr (first X)) '*entry) 1325 (setq AList (cons (cons (second X) CodeSize*) AList))) 1326 ((and (GeneralBranchInstructionP Instr)(locallabelp (second X))) 1327 (progn (Rplaca X (get Instr 'WordBranch)) 1328 1329 (setq CodeSize* (plus CodeSize* (InstructionLength X))) 1330 (setq AList (cons (cons X CodeSize*) AList)))) (t 1331 1332 (setq CodeSize* (plus CodeSize* (InstructionLength X))))))) 1333 1334 1335 (setq BranchAndLabelAList* (ReversIP AList)) 1336 (return CodeList))) 1337 1338 1339% BuildOffsetTable(); 1340% Purpose: generate a association list of labels, procedure entries 1341% The Alist has the form: 1342 % labels: ( label . CurrentOffset) 1343 % procedures: ( procedurename . CurrentOffset) 1344 % branch instrs ( (opcode label) . CurrentOffset) 1345 1346(de BuildOffsetTable nil 1347 (prog (AList Instr) 1348 (setq CodeSize* CurrentOffset*) 1349 (foreach X in BranchCodeList* do 1350 (progn 1351 (cond ((LabelP X) (setq AList (cons (cons X CodeSize*) AList))) 1352 ((equal (setq Instr (first X)) '*entry) 1353 (setq AList (cons (cons (second X) CodeSize*) AList))) 1354 % branch: enter the address of the following instruction 1355 ((and (GeneralBranchInstructionP Instr) (locallabelp (second X))) 1356 (setq CodeSize* (plus CodeSize* (InstructionLength X))) 1357 (setq AList (cons (cons X CodeSize*) AList))) 1358 (t (setq CodeSize* (plus CodeSize* (InstructionLength X))))))) 1359 (setq BranchAndLabelAList* (ReversIP AList)) 1360 (setq InstructionChanged* BranchAndLabelAList*) 1361 (return BranchAndLabelAList*) )) 1362 1363 1364(de FindLongBranches nil 1365 (prog (CurrentDisplacement) 1366 (foreach entry on BranchAndLabelAList* do 1367 (cond ((not (LabelP (car (first entry)))) 1368 (progn 1369 (setq CurrentDisplacement (FindDisplacement (first entry))) 1370 (cond 1371 ((or (GreaterP CurrentDisplacement (const MaximumShortBranch)) 1372 (ZeroP CurrentDisplacement)) % Must have long brahch. 1373 1374 (progn (setq InstructionChanged* t) 1375 (IncreaseAllOffsets entry (MakeLongBranch entry))))))))))) 1376 1377 1378 1379(de FindDisplacement (InstructionOffsetPair) 1380 (Abs (Difference (cdr InstructionOffsetPair) 1381 (FindLabelOffset (second (first InstructionOffsetPair)))))) 1382 1383% FindLabelOffset(Label) 1384% Purpose: looks up the location of Label in BranchAndLabelAList!* 1385% Returns: the offset of said Label 1386 1387(de FindLabelOffset (L) 1388(prog (Offset) 1389 (return 1390 (cond ((EqCar L 'InternalEntry) (FindEntryOffset (second L))) 1391 1392 ((setq Offset (Atsoc (saniere-sprungziel L) BranchAndLabelAList*)) (cdr Offset)) 1393 (t (StdError (BldMsg "Unknown label %r" L))))))) 1394 1395 1396% FindEntryOffset(ProcedureName); 1397% Purpose: looks up the location of ProcedureName in BranchAndLabelAList!* 1398% Returns: the offset of said ProcedureName 1399% if an entry point is not known for this procedure it returns a 1400% dummy value of -2000 1401 1402(de FindEntryOffset (L) 1403 (cond ((setq L (Atsoc L BranchAndLabelAList*)) (cdr L)) (t -2000))) 1404 1405(de MakeLongBranch (AList) 1406(prog (InstructionList Result OppositeBranch n) 1407 (setq InstructionList (car (first AList))) 1408 (setq n (instructionlength InstructionList)) 1409 (Rplaca InstructionList (get (first InstructionList) 'WordBranch)) 1410 (setq n (difference (instructionlength InstructionList) n)) 1411 (cond ((cdr AList) (Rplacw AList (cdr AList))) 1412 (t (Rplacw AList (list (cons '~DummyLabel~ 0))))) 1413 (return n))) % increased length of subsequent code 1414 1415 1416(de IncreaseAllOffsets (X N) 1417 (foreach Y in X do (Rplacd Y (plus (cdr Y) N))) 1418 (setq CodeSize* (plus CodeSize* N))) 1419 1420% ------------------------------------------------------------ 1421% Procedures to compute instruction lengths 1422% ------------------------------------------------------------ 1423 1424(de InstructionLength (X) 1425 (cond ((and (eqcar x 'movq) (not (xmmregp (cadr x))) (not (xmmregp (caddr x)))) 1426 (wplus2 1 (InstructionLength1 (cons 'mov (cdr x))))) 1427 ((eqcar x 'addq) (wplus2 1 (InstructionLength1 1428 (cons 'add (cdr x))))) 1429 ((eqcar x 'subq) (wplus2 1 (InstructionLength1 1430 (cons 'sub (cdr x))))) 1431 ((eqcar x 'cmpq) (wplus2 1 (InstructionLength1 1432 (cons 'cmp (cdr x))))) 1433 ((and (pairp x) (flagp (car x) 'norexprefix)) 1434 (InstructionLength1 x)) 1435 ((and (pairp x) (flagp (car x) 'onlyupperregrexprefix)) 1436 (if (upperreg64p x) 1437 (wplus2 1 (InstructionLength1 x)) 1438 (InstructionLength1 x))) 1439 ((reg64bitp x) (wplus2 1 (InstructionLength1 x))) 1440 (t (InstructionLength1 x)))) 1441 1442(de InstructionLength1 (X) 1443 (prog (Y) 1444 (when (setq Y (get (car x) 'InstructionLengthFunction)) 1445 (return (apply2safe y (cdr x)))) 1446 (when (setq Y (get (car x) 'INSTRUCTIONLENGTH)) 1447 (return (if (numberp y) y (apply y (list x))))) 1448 (stderror (bldmsg "*** Unknown x86_64 instruction:%w " x)))) 1449 1450(de apply2safe(y x) % ensure that plly has two parameters at least 1451 (cond ((null x) (apply y (list nil nil))) 1452 ((null (cdr x)) (apply y (list (car x) nil))) 1453 (t (apply y (list (car x)(cadr x)))))) 1454 1455(de InlineConstantLength (X) 1456% Purpose: returns the Size_Of_Unit_In_Bytes * Number_Of_Such_Units 1457% X has the form: 1458% (Unit value_1 value_2 value_3 .... ) 1459 (Times2 (cond ((equal (first X) 'fullword) 8) (t 2)) 1460 (length (rest X)))) 1461 1462(de ByteConstantLength (X) 1463 (Times2 (Quotient (Plus2 (length (rest X)) 1) 2) 2)) 1464 1465(de LapStringLength (X) % must fall on word boundary 1466% Purpose: Calculate the number of bytes occupied by a given string 1467% X has the form: (STRING "xxxxxx") 1468 (Times2 (Quotient (Plus2 (Size (second X)) 9) 8) 8)) 1469 1470(DefList '((fullword InlineConstantLength) 1471 (halfword InlineConstantLength) 1472 (byte ByteConstantLength) 1473 (float 8) 1474 (string LapStringLength)) 'InstructionLength) 1475 1476 1477(put '*entry 'InstructionLength 0) 1478 1479% ------------------------------------------------------------ 1480% Depositing Operations 1481% ------------------------------------------------------------ 1482 1483(de DepositByte (X) 1484(progn (putbyte (wPlus2 CodeBase* CurrentOffset*) 0 X) 1485 (UpdateBitTable 1 0) 1486 (setq CurrentOffset* (plus CurrentOffset* 1)))) 1487 1488(de DepositHalfWord (X) 1489(progn (puthalfword (wPlus2 CodeBase* CurrentOffset*) 0 X) 1490 (UpdateBitTable 2 0) 1491 (setq CurrentOffset* (plus CurrentOffset* 2)))) 1492 1493(compiletime (put 'put_a_halfword 'opencode '( 1494 (mov (reg ebx) (displacement (reg eax) 0))))) %% (reg 2) (displacement (reg eax) 0))))) 1495 1496(de deposit32bitword (x) %% cross 1497 (put_a_halfword (wplus2 codebase* currentoffset*) x) 1498 (updatebittable 4 0) 1499 (setq currentoffset* (plus currentoffset* 4))) 1500 1501(de depositword (x) 1502 (putword (wplus2 codebase* currentoffset*) 0 x) 1503 (updatebittable 8 0) 1504 (setq currentoffset* (plus currentoffset* 8))) 1505 1506(de depositliteralword (x) %% Cross 1507 (put_a_halfword (wplus2 codebase!* currentoffset!*) x) 1508 (updatebittable 4 0) 1509 (setq currentoffset* (plus currentoffset* 4))) 1510 1511(de deposit-relocated-word (offset) 1512 % Given an OFFSET from CODEBASE*, deposit a word containing the 1513 % absolute address of that offset. 1514 (put_a_halfword (wplus2 codebase* currentoffset*) 1515 (iplus2 offset (if *writingfaslfile 0 codebase*))) 1516 (updatebittable 4 (const reloc_word)) 1517 (setq currentoffset* (plus currentoffset* 4))) 1518 1519(de depositwordexpression (x) 1520 % Only limited expressions now handled 1521 (let (y) 1522 (cond 1523 ((fixp x) (deposit32bitword (int2sys x))) 1524 ((labelp x) (deposit-relocated-word (labeloffset x))) 1525 ((equal (first x) 'internalentry) 1526 (let ((offset (get (second x) 'internalentryoffset))) 1527 (if offset 1528 (deposit-relocated-word offset) 1529 (progn 1530 (setq forwardinternalreferences* 1531 (cons (cons currentoffset* (second x)) 1532 forwardinternalreferences*)) 1533 (deposit-relocated-word 0))))) 1534 ((equal (first x) 'idloc) (depositwordidnumber (second x))) 1535 ((equal (first x) 'entry) (depositentry x)) 1536 (t (stderror (bldmsg "Expression too complicated %r" x)))))) 1537 1538(de depositquadwordexpression (x) 1539 % Only limited expressions now handled 1540 (let (y) 1541 (cond 1542 ((fixp x) (depositword (int2sys x))) 1543 ((labelp x) (deposit-relocated-word (labeloffset x))) 1544 ((equal (first x) 'internalentry) 1545 (let ((offset (get (second x) 'internalentryoffset))) 1546 (if offset 1547 (deposit-relocated-word offset) 1548 (progn 1549 (setq forwardinternalreferences* 1550 (cons (cons currentoffset* (second x)) 1551 forwardinternalreferences*)) 1552 (deposit-relocated-word 0))))) 1553 ((and (eq (car x) 'mkitem) 1554 (eq (cadr x) id-tag) 1555 (eqcar (setq y (caddr x)) 'idloc) 1556 (wlessp (id2int(cadr y)) 129)) 1557 (depositword (cadr y))) 1558 ((equal (first x) 'idloc) (depositwordidnumber (second x))) 1559 ((equal (first x) 'mkitem) (deposititem (second x) (third x))) 1560 ((equal (first x) 'entry) (depositentry x)) 1561 ((setq y (wconstevaluable x)) (depositword (int2sys y))) 1562 (t (stderror (bldmsg "Expression too complicated %r" x)))))) 1563 1564(de depositwordidnumber (x) 1565 (cond 1566 ((or (not *writingfaslfile) (leq (idinf x) 128)) 1567 (deposit32bitword (idinf X))) 1568 (t 1569 (put_a_halfword (wplus2 codebase* currentoffset*) 1570 (makerelocword (const reloc_id_number) (findidnumber x))) 1571 (setq currentoffset* (plus currentoffset* 4)) 1572 (updatebittable 4 (const reloc_word))))) 1573 1574(de DepositHalfWordExpression (X) 1575(prog (Y) 1576 (return (cond ((FixP X) (DepositHalfWord X)) 1577 ((LabelP X) 1578 (progn (puthalfword (wPlus2 CodeBase* CurrentOffset*) 0 1579 (IPlus2 (LabelOffset X) 1580 (cond (*WritingFaslFile 0) (t CodeBase*)))) 1581 (UpdateBitTable 2 (const RELOC_HALFWORD)) 1582 (setq CurrentOffset* (plus CurrentOffset* 2)))) 1583 1584 ((and (equal (first X) 'Difference) (LabelP (second X)) 1585 (LabelP (third X))) 1586 (DepositHalfWord (Difference (LabelOffset (second X)) 1587 (LabelOffset (third X))))) 1588 ((equal (first X) 'IDLoc) 1589 (DepositHalfWordIDNumber (second X))) 1590 ((setq Y (WConstEvaluable X)) 1591 (DepositHalfWord Y)) (t 1592 (StdError (BldMsg "Halfword expression too complicated %r" X))))))) 1593 1594 1595(de DepositItem (TagPart InfPart) 1596 (cond ((not *WritingFaslFile) 1597 (DepositWord 1598 (MkItem TagPart 1599 (cond ((LabelP InfPart) 1600 (wPlus2 CodeBase* (LabelOffset InfPart))) 1601 ((equal (first InfPart) 'IDLoc) 1602 (IDInf (second InfPart))) 1603 (t 1604 (StdError 1605 (BldMsg "Unknown inf in MkItem %r" 1606 InfPart))))))) 1607 (t 1608 (progn (cond 1609 ((LabelP InfPart) % RELOC_CODE_OFFSET = 0 1610 (putword (wPlus2 CodeBase* CurrentOffset*) 0 1611 (mkitem tagpart (LabelOffset InfPart)))) 1612 ((equal (first InfPart) 'IDLoc) 1613 (putword (wPlus2 CodeBase* CurrentOffset*) 0 1614 (MkItem TagPart 1615 (MakeRelocInf (const RELOC_ID_NUMBER) 1616 (FindIDNumber (second InfPart)))))) 1617 (t 1618 (StdError (BldMsg "Unknown inf in MkItem %r" 1619 InfPart)))) 1620 (setq CurrentOffset* (plus CurrentOffset* 8)) 1621 (UpdateBitTable 8 (const RELOC_INF)))))) 1622 1623(de DepositHalfWordIDNumber (X) 1624 (cond ((or (not *WritingFaslFile) (LEQ (IDInf X) 128)) 1625 (DepositHalfWord (IDInf X))) (t 1626 1627 (progn (puthalfword (wplus2 codebase* currentoffset*) 0 1628 (makerelochalfword (const reloc_id_number) (findidnumber x))) 1629 (setq currentoffset* (plus currentoffset* 2)) 1630 (updatebittable 2 (const reloc_halfword)))))) 1631 1632% ------------------------------------------------------------ 1633% this procedure was "inserted" to eliminate the problem with the compiler 1634% not generating the code for: 1635% function lambda y;remprop(y, 'internalentryoffset); 1636% who knows why it didn't generate anything reasonable - it generated nil. 1637(de remove-ieo-property (y) 1638 (remprop y 'internalentryoffset)) 1639% ------------------------------------------------------------ 1640 1641(de systemfaslfixup () 1642 (prog (x) 1643 % THIS VERSION ASSUMES 32 bit RELATIVE ADDESSES, HM. 1644 (setq x (remainder currentoffset* 16)) 1645 (while (greaterp x 0) (depositbyte 0) (setq x (sub1 x))) 1646 (while forwardinternalreferences* 1647 (setq x (get (cdr (first forwardinternalreferences*)) 1648 'internalentryoffset)) 1649 (when (null x) 1650 (errorprintf "***** %r not defined in this module, call incorrect" 1651 (cdr (first forwardinternalreferences*)))) 1652 % calculate the offset 1653 (setq x (plus -4 % offset to next word 1654 (difference x (car (first forwardinternalreferences*))))) 1655 % insert the fixup 1656 (put_a_halfword 1657 (iplus2 codebase* (car (first forwardinternalreferences*))) x) 1658 (setq forwardinternalreferences* (cdr forwardinternalreferences*))) 1659 % Now remove the InternalEntry offsets from everyone 1660 (mapobl 'remove-ieo-property))) 1661 1662%----------------------------------------------------------------- 1663% 1664% Optimize: 1665% 1. (prog (a b c d e) : 1666% (mov (quote nil) (reg t1)) (mov (reg t1)(frame 1)) 1667% (mov (quote nil) (reg t1) (mov (reg t1)(frame 2)) 1668% ...... 1669% 1670% 2. (setq a b)(xxx ... b ) 1671% (mov (reg 1) (frame 1)) (mov (frame 1) (reg 2)) 1672 1673(de LapoptFrame(u) 1674 (prog (rcode instr op nextinstr src dest x) 1675 (while u 1676 (setq instr (pop u)) 1677 (when (and (pairp instr) u) 1678 (setq op (car instr)) 1679 (setq nextinstr (car u)) 1680 % pattern: 1681 % (mov (reg n) (frame m)) 1682 % (mov (frame m) (reg k)) 1683 (when (and (eq op 'MOV) 1684 (regp (setq src (cadr instr))) 1685 (not (sregp src)) % not for segment registers 1686 (setq dest (caddr instr)) 1687 (eqcar nextinstr 'MOV) 1688 (equal (cadr nextinstr)dest) 1689 (regp (setq x (caddr nextinstr)))) 1690 (pop u) 1691 (push (list 'mov src x) u)) 1692 % pattern: 1693 % (mov (quote nil) (frame 1)) 1694 % (mov (quote nil) (frame 2)) ... 1695 (when (and 1696 (eq op 'mov) 1697 (immediatep (setq src (cadr instr))) 1698 (not (regp (caddr instr))) 1699 (eqcar nextinstr 'mov) 1700 (equal (cadr nextinstr) src)) % at 2 of that type 1701 (setq u (LapoptFrame1 src (push instr u))) 1702 (setq instr (list 'mov src '(reg t1)))) 1703 % pattern: 1704 % (push (quote nil) ) 1705 % (push (quote nil) ) ... 1706 (when (and 1707 (eq op 'push) 1708 (immediatep (setq src (cadr instr))) 1709 (or (and 1710 (eqcar nextinstr 'mov) 1711 (equal (cadr nextinstr) src)) 1712 (and 1713 (eqcar nextinstr 'push) 1714 (equal (cadr nextinstr) src)))) 1715 (setq u (LapoptFrame1 src (push instr u))) 1716 (setq instr (list 'mov src '(reg t1)))) 1717 1718 ) 1719 (push instr rcode)) 1720 (return (reversip rcode)))) 1721 1722(de LapoptFrame1 (src u) 1723 % here subsequent instructions are modified for source T1 1724 (cond ((or (null u) (atom (car u))) u) 1725 ((and (eq (caar u) 'PUSH) (equal (cadr (car u)) src)) 1726 (cons '(PUSH (reg t1)) (LapoptFrame1 src (cdr u)))) 1727 ((not (and (eq(caar u) 'MOV) % nor more such instr. 1728 (equal (cadar u) src))) 1729 u) 1730 ((equal (caddr (car u)) '(reg t1)) 1731 (LapoptFrame1 src (cdr u))) 1732 (t (cons 1733 (list 'mov '(reg t1) (caddr (car u))) 1734 (LapoptFrame1 src (cdr u)))))) 1735 1736(fluid '(!*optimize-i486)) 1737 1738(setq *optimize-i486 t) 1739 1740(de LapoptPeep(code) 1741 (when *optimize-i486 (setq code (LapoptPeep486 code))) 1742 code) 1743 1744(de LapoptPeep486(code) 1745% peephole optimizer for 486 code 1746% interchanging instructions for dependencies. 1747 (let (rcode i1 i2 i3 r rb) 1748 (while code 1749 (setq i1 (pop code)) 1750 (when (and code (cdr code)) 1751 (setq i2(car code) i3(cadr code)) 1752 (cond 1753 % case 1754 % something 1755 % (add 16 (reg st)) 1756 % (ret) 1757 % move (add 16 (reg st)) one step up if possible. 1758 ((and (equal i3 '(ret)) 1759 (pairp i1) 1760 (pairp i2) 1761 (eq (car i2) 'add) 1762 (equal (caddr i2) '(reg st)) 1763 (not (&jumpcontrol i1)) 1764 (not (&smember '(reg st) i1)) 1765 ) 1766 1767 (pop code r) 1768 (push i1 code) 1769 (setq i1 i2)) 1770 1771 % case 1772 % something 1773 % (mov (reg x) ...) 1774 % (yyy ... (displacement (reg x)) 1775 % move (mov (reg x) ...) one step up if independent 1776 ((and 1777 (pairp i1) 1778 (eqcar i2 'mov) 1779 (eqcar (setq r (caddr i2)) 'reg) 1780 (equal r (&indirectbase i3)) 1781 (not (&jumpcontrol i1)) 1782 % test independence (target i2) and i1 1783 (not (&smember r i1)) 1784 % test independence (source i2) and (target i1) 1785 (setq rb (®base (cadr i2))) 1786 (or (and (eqcar i1 'mov) (not (&smember rb (caddr i1))) ) 1787 (not (&smember rb i1)) 1788 ) 1789 ) 1790 1791 (pop code) 1792 (push i1 code) 1793 (setq i1 i2) 1794 )) % cond 1795 )% when 1796 (push i1 rcode) 1797 )% while 1798 (reversip rcode) 1799)) % let,de 1800 1801(de &smember(a l) 1802 (cond ((equal a l) t) 1803 ((atom l) nil) 1804 ((&smember a (car l)) t) 1805 (t (&smember a (cdr l))) )) 1806 1807(de &indirectbase(u) 1808 (cond ((atom u) nil) 1809 ((atom (cdr u)) nil) 1810 ((eq (car u) 'displacement)(cadr u)) 1811 ((eq (car u) 'indirect) (cadr u)) 1812 (t (or (&indirectbase (car u))(&indirectbase (cdr u)))) )) 1813 1814(de ®base(u) 1815 % u is an operand of *MOVE. Extract the source base. 1816 (cond((or (atom u)(eq (car u) 'quote)) t) 1817 ((eq (car u) 'reg) u) 1818 ((or (eq (car u) 'indirect) (eq(car u)'displacement)) 1819 (cadr u)) 1820 (t t))) 1821 1822(de &jumpcontrol(u) 1823 (or (atom u) 1824 (LocalLabelp u) 1825 (GeneralBranchInstructionP (setq u (car u))) 1826 (eq u 'call) 1827 (eq u 'ret) 1828 (eq u '*entry) 1829 )) 1830 1831(de lapoptprint(l) 1832 (terpri) 1833 (prin2t " 486 tauschen:") 1834 (mapc l 'prin2t)) 1835 1836(fluid '(!64bitregs upper64bitregs)) 1837 1838(setq !64bitregs '((reg 1) (reg 2) (reg 3) (reg 4) (reg 5) (reg st) 1839 (reg rax) (reg rbx) (reg rcx) (reg rdx) 1840 (reg t3) (reg t4) (reg NIL) (reg heaplast) 1841 (reg bndstkptr) (reg bndstkupperbound) 1842 (reg bndstklowerbound) (reg heaptrapbound) 1843 (reg t1) (reg t2) (reg esp) (reg rdi) (reg rsi) 1844 (reg r8) (reg r9) (reg r10) (reg r11) 1845 (reg r12) (reg r13)(reg r14) (reg r15) 1846 )) 1847 1848(setq upper64bitregs '( 1849 (reg t3) (reg t4) (reg NIL) (reg heaplast) 1850 (reg bndstkptr) (reg bndstkupperbound) 1851 (reg bndstklowerbound) (reg heaptrapbound) 1852 (reg r8) (reg r9) (reg r10) (reg r11) 1853 (reg r12) (reg r13)(reg r14) (reg r15) 1854 (reg xmm8) (reg xmm9) (reg xmm10) (reg xmm11) 1855 (reg xmm12) (reg xmm13) (reg xmm15) (reg xmm15) 1856 )) 1857 1858 1859(de reg64bitP (i) (reg64bitp1 !64bitregs i)) 1860 1861(de upperreg64p (i) (reg64bitp1 upper64bitregs i)) 1862 1863(de reg64bitp1 (reglist inst) 1864 (if (null reglist) nil 1865 (if (&smember (car reglist) inst) t 1866 (reg64bitp1 (cdr reglist) inst)))) 1867 1868 1869%--------------------------------------------------------------------- 1870% cmacro optimizer 1871%--------------------------------------------------------------------- 1872 1873(de lapopt1 (u) 1874 (prog() 1875 (when (not *lapopt) (return u)) 1876 (setq u (lapopt-move-special-cases u)) 1877 (return u))) 1878 1879% Move bodies of if-then-elseif ... sequences such that at runtime as few 1880% jumps as possible are taken. 1881 1882(fluid '(&cond-cm&)) 1883 1884(setq &cond-cm& 1885 '((*jumpeq . *jumpnoteq) (*jumpnoteq . *jumpeq) 1886 (*jumpwgeq . *jumpwlessp) (*jumpwlessp . *jumpwgeq) 1887 (*jumpwleq . *jumpwgreaterp) (*jumpwgreaterp . *jumpwleq) 1888 (*jumptype . *jumpnottype) (*jumpnottype . *jumptype) 1889 (*jumpintype . *jumpnotintype) (*jumpnotintype . *jumpintype) 1890 (*jumpeqtag . *jumpnoteqtag) (*jumpnoteqtag . *jumpeqtag) 1891 (*jumpwgeqtag . *jumpwlessptag) (*jumpwlessptag . *jumpwgeqtag) 1892 % no inverse jumps for 1893 (*jumpwgreaterptag) 1894 (*jumpon) 1895 1896)) 1897 1898(de lapopt-move-special-cases(code) 1899 (let (rcode bcode inst u w lbl lab) 1900 (while (not (lapopt-bottom code)) 1901 (setq inst (pop code)) 1902 (when 1903 (and 1904 (pairp inst) 1905 (setq u (atsoc (car inst) &cond-cm&)) 1906 (setq u (cdr u)) 1907 (setq lbl (cadr inst)) 1908 (or (memq lbl code)(member (setq lbl (list '*lbl lbl)) code)) 1909 (setq w (lapopt-move-special-cases1 code lbl)) 1910 ) 1911 (setq lab (list 'label (gensym))) 1912 (setq bcode (nconc bcode (cons (list '*lbl lab) (car w)))) 1913 (push (cons u (cons lab (cddr inst))) rcode) 1914 (setq inst nil) 1915 (setq code (cdr w)) 1916 ) 1917 (when inst (push inst rcode)) 1918 ) 1919 (setq code (nconc (reversip rcode) (nconc bcode code))) 1920 (when *trlapopt 1921 (prin2t "=== move special cases:") 1922 (MAPCAR code 'PRINT)) 1923 code 1924)) 1925 1926(de lapopt-bottom(u) 1927 (or (null u) 1928 (eqcar (car u) 'fullword))) 1929 1930(de lapopt-move-special-cases1 (code lbl) 1931 % Find basic block until lbl which ends by an unconditional jump. 1932 % Return nil or pair of bblock and rest of code. 1933 (let (rcode fcode inst op) 1934 (while code 1935 (setq inst (pop code)) 1936 (push inst rcode) 1937 (cond ((or (atom inst) (equal inst lbl)) (setq code nil)) 1938 ((and (memq (setq op (car inst)) 1939 '(*jump *linke *exit)) 1940 (cdr code) 1941 (equal (car code) lbl) 1942 ) 1943 (setq fcode code) (setq code nil)) 1944 ((or (eq op '*lbl) (atsoc op &cond-cm&)) 1945 (setq code nil) 1946 ) 1947 )) 1948 (when fcode (cons (reversip rcode) fcode)) 1949)) 1950( dskin "AMD64-inst.dat") 1951