1%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2%
3% File:         PXC:386-LAP.SL
4% Description:  Intel i386/i486 PSL Assembler
5% Author:       H. Melenk
6% Created:      1-August 1989
7% Modified:
8% Mode:         Lisp
9% Status:	Open Source: BSD License
10% Package:
11%
12% Redistribution and use in source and binary forms, with or without
13% modification, are permitted provided that the following conditions are met:
14%
15%    * Redistributions of source code must retain the relevant copyright
16%      notice, this list of conditions and the following disclaimer.
17%    * Redistributions in binary form must reproduce the above copyright
18%      notice, this list of conditions and the following disclaimer in the
19%      documentation and/or other materials provided with the distribution.
20%
21% THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22% AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
23% THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24% PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNERS OR
25% CONTRIBUTORS
26% BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27% CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28% SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29% INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30% CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31% ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32% POSSIBILITY OF SUCH DAMAGE.
33%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
34%
35% Revisions
36% 28-Apr-92 (Herbert Melenk)
37% no relocation for quoted small ID's
38%
39% 3-Apr-90 (Winfried Neun)
40% added support for new car and cdr scheme in modr/m
41
42
43% ------------------------------------------------------------
44% Fluid declarations:
45% ------------------------------------------------------------
46
47(fluid '(LabelOffsets*                  % just the label entries from
48					% BranchAndLabelAList!*
49					% Has the form ( (Label.Offset) ... )
50	BranchCodeList*                 % Used in Branch optimization
51	BranchAndLabelAList*            % Used in Branch optimization
52	CurrentOffset*                  % the global value of the current
53					% byte displacement from the starting
54					% point of the code
55	CodeSize*                       % Current number of bytes generated
56	CodeBase*                       % Starting address of the module
57	Entries*                        % list of procedure entries of the
58					% form
59					% ((ProcedureName
60					%   ProcedureType
61					%   NumberOfArguments) .
62					%   CurrentOffset!* )
63	InstructionChanged*             % Boolean - indicates if any
64					% instructions have changed due to
65					% branch optimization
66	InstructionSize*                % Contains the size constant Byte
67					% Word or Long during length compute
68					% and assembly of individual instr.
69	ForwardInternalReferences*      % a-list of offsets of references to
70					% internal functions, to be patched
71					% by SystemFaslFixup
72	LapReturnValue*                 % set by SaveEntry to the pointer
73					% to be returned by LAP
74	OperandRegisterNumber*          % see EffectiveAddress
75	*WritingFaslFile                % FLAG: if true, then we are writing
76					% the resulting code to a file,
77					% otherwise we are depositing it into
78					% memory directly
79	InitOffset*                     % The offset from the module to the
80					% Initialiization code which is to be
81					% run when the module is loaded
82	*PGWD                           % FLAG: if true, then mnemonics and
83					% assembled instructions are printed
84	*PWrds                          % FLAG: if true, then base address
85					% and size of each compiled
86					% procedure are printed as they are
87					% deposited into memory
88	*align16                        % align lables to 16 byte
89					% boundaries
90	*lapopt
91	*trlapopt
92))
93
94(setq *lapopt t)
95
96(fluid '(*immediatequote))
97(setq *immediatequote nil)
98(fluid '(*testlap))                     % diagnostic output from LAP  MK
99
100(ds LabelP (X) (atom X))
101
102(setq *PWrds t)                         % By default show where the code is
103					% put in memory
104
105% ------------------------------------------------------------
106% Constant declarations:
107% ------------------------------------------------------------
108
109(DefConst
110	 RELOC_ID_NUMBER 1
111	 RELOC_HALFWORD 2
112	 RELOC_WORD 1
113	 RELOC_INF 3)
114
115(DefConst MaximumShortBranch 127)
116
117% ------------------------------------------------------------
118% Start of actual code
119% ------------------------------------------------------------
120
121(de Lap (U)
122(prog (LabelOffsets* LapReturnValue* Entries* temp)
123    (cond ((not *WritingFaslFile) (setq CurrentOffset* 0)))
124    (setq U (&fillframeholes u))
125
126%%%    (setq u (lapopt1 u))                % optimize macros
127
128    (setq U (Pass1Lap U))               % Pass1lap
129					% expand all the LAP macros
130					% Note that this is defined in
131					% PC:PASS-1-LAP.SL
132
133    (setq U (LapoptFrame u))            % optimize frame-register transports
134    (setq U (LapoptPeep u))             % peephole optimizer for 486 code
135
136    (when *WritingFaslFile       % round off to fullword address
137	  (while (not (eq (wshift (wshift currentOffset* -3) 3) currentOffset*))
138		 (depositbyte 0) ))
139
140    (SETQ U (ReformBranches U))         % process conditional branches
141    (setq U (OptimizeBranches U))       % optimize branches and
142					% calculate offsets and total length
143
144    (when (not *WritingFaslFile)
145	  (setq CodeBase* (GTBPS (Quotient (Plus2 CodeSize* 3) 4))))
146
147
148% Print the machine specific assembly code
149% if the object is an atom then it is a LABEL
150% otherwise it is an instruction
151
152    (cond (*PGWD (foreach X in U do
153	(cond ((LabelP X) (Prin2 X)) (t (PrintF "          %p%n" X))))))
154
155    (foreach Instruction_or_Label in U do
156	(cond
157	    ((LabelP Instruction_or_Label) (DepositLabel Instruction_or_Label))
158	    ((equal (first Instruction_or_Label) '*entry)
159		      (SaveEntry Instruction_or_Label))
160	    (t (DepositInstruction Instruction_or_Label) )))
161
162    (DefineEntries)                     % define entries to whom?
163
164% If you are depositing it into memory the tell the user how much space the
165% code took and where it was loaded.
166% ??? Why is this using the error channel ???
167
168    (cond ((and (not *WritingFaslFile) *PWrds)
169	(ErrorPrintF "*** %p: base 16#%x, length 10#%d bytes"
170		(foreach X in Entries* collect (first (car X)))
171				CodeBase* CodeSize*)))
172
173    % Do not call MkCODE on LapReturnValue* if it is nil
174    % LapReturnValue* is a fluid variable that got set up to hold
175    % the address of the compiled function when the code was generated
176    (cond
177     (LapReturnValue*
178      (return
179       (MkCODE LapReturnValue*))))))
180
181
182% CheckForInitCode will scan the Codelist for the first !*Entry
183% testing for !*!*FASL!*!*Initcode!*!*.
184
185(de CheckForInitCode (CodeList)
186     (foreach Instruction in CodeList do
187       (progn (cond ((PairP Instruction)
188	   (cond ((equal (car Instruction) '*entry)
189	     (cond ((equal (second Instruction) '**Fasl**InitCode**)
190		(return t))))))))))
191
192% SaveEntry( '(!*entry ProcedureName ProcedureType NumberOfArguments) )
193% Purpose: To associate with a procedure its location (so other routines can
194%          access it
195
196(de saveentry (x)
197  (cond
198   % if X = ( _____ !*!*!*Code!*!*Pointer!*!*!* ... )
199   ((equal (second x) '***code**pointer***)
200    (setq lapreturnvalue*
201      (if *writingfaslfile currentoffset* (wplus2 codebase* currentoffset*))))
202
203   % If depositing into memory
204   ((not *writingfaslfile)
205    (setq entries* (cons (cons (rest x) currentoffset*) entries*))
206    (unless lapreturnvalue* (setq lapreturnvalue*
207		 (wplus2 codebase* currentoffset*))))
208
209   % if X = ( _____ !*!*Fasl!*!*InitCode!*!* ... )
210   ((equal (second x) '**fasl**initcode**)
211    (setq initoffset* currentoffset*))
212
213   % if X is an InternalFunction
214   ((flagp (second x) 'internalfunction)
215    (put (second x) 'internalentryoffset currentoffset*))
216
217   (t (progn
218       (put (second x) 'internalentryoffset currentoffset*) % MK
219       (findidnumber (second x))
220       (dfprintfasl (list 'putentry (mkquote (second x))
221			  (mkquote (third x)) currentoffset*))))))
222
223
224% DefineEntries()
225% Purpose: Defines each of the procedures named in the list Entries!*
226%          by putting the code pointer into the function cells
227
228(de DefineEntries nil
229    (foreach X in Entries* do
230	(PutD (first (car X)) (second (car X))
231		 (MkCODE (wplus2 CodeBase* (cdr X))))))
232
233%(de DepositInstruction (X)
234%% This actually dispatches to the procedures to assemble the instructions
235%(prog (Y)
236%    (if (eqcar x 'movq)  (progn (Depositbyte  16#48)  % REX Prefix
237%				(rplaca x 'mov))
238%    (when (reg64bitp x) (Depositbyte  16#48)))  % REX Prefix
239%    (cond ((setq Y (get (first X) 'InstructionDepositFunction))
240%	   (Apply Y (list X)))
241%	  ((setq Y (get (first X) 'InstructionDepositMacro))
242%	   (apply2safe y (cdr x)))
243%	  (t (StdError (BldMsg "Unknown x86_64 instruction %p" X))))))
244
245
246(de DepositLabel (x) nil)
247
248(fluid '(*testlap REX-Prefix REX? allowextrarexprefix))
249(de DepositInstruction (X)
250% This actually dispatches to the procedures to assemble the instrucitons
251% version with address calculation test
252(prog (Y offs allowextrarexprefix REX?)
253
254    (when *testlap (prin2 currentoffset*) (tab 10) (print x))
255    (setq allowextrarexprefix 0)
256    (setq REX-Prefix 16#48)
257    (when *writingfaslfile (setq offs currentoffset*))
258    (cond ((and (eqcar x 'movq) (not (xmmregp (cadr x))) (not (xmmregp (caddr x))))
259                          (Depositbyte  16#48)  % REX Prefix
260			  (SETQ REX? (plus codebase!* currentoffset* -1))
261			  (setq allowextrarexprefix 0)
262                          (rplaca x 'mov))
263          ((eqcar x 'addq) (Depositbyte  16#48)  % REX Prefix
264			  (SETQ REX? (plus codebase!* currentoffset* -1))
265			  (setq allowextrarexprefix 0)
266                          (rplaca x 'add))
267          ((eqcar x 'subq) (Depositbyte  16#48)  % REX Prefix
268			  (SETQ REX? (plus codebase!* currentoffset* -1))
269			  (setq allowextrarexprefix 0)
270                          (rplaca x 'sub))
271          ((eqcar x 'cmpq) (Depositbyte  16#48)  % REX Prefix
272			  (SETQ REX? (plus codebase!* currentoffset* -1))
273			  (setq allowextrarexprefix 0)
274                          (rplaca x 'cmp))
275	  ((and (pairp x) (flagp (car x) 'norexprefix)) NIL)
276	  ((and (pairp x) (flagp (car x) 'onlyupperregrexprefix))
277	   (cond ((upperreg64p x)
278		  (setq REX-prefix 16#40)
279		  (Depositbyte 16#40)
280		  (SETQ REX? (plus codebase!* currentoffset* -1))
281		  (setq allowextrarexprefix 0))))
282          ((reg64bitp x) (Depositbyte  16#48)    % REX Prefix
283			 (SETQ REX? (plus codebase!* currentoffset* -1))))
284    (cond ((setq Y (get (first X) 'InstructionDepositFunction))
285	   (Apply Y (list X)))
286	  ((setq Y (get (first X) 'InstructionDepositMacro))
287	   (apply2safe y (cdr x)))
288	  (t (StdError (BldMsg "Unknown x86_64 instruction %p" X))))
289    (when REX? (putbyte REX? 0 REX-Prefix)) %overwrite REX-Prefix
290
291    (when (and (not (eq 0 allowextrarexprefix))
292               offs (not (equal currentoffset*
293			 (plus allowextrarexprefix offs
294				 (instructionlength x)))))
295	  (StdError (BldMsg "length error with instruction %p: %p"
296		  x (difference (difference currentoffset* offs)
297				(instructionlength x)))))
298))
299
300(flag '(JMP CALL
301	movups movss movupd movsd
302	movhlps movlps movlpd movddup movsldup unpcklps unpcklpd
303	MOVLHPS MOVHPS MOVHPD MOVSHPDUP movaps movapd
304	CVTPI2PS CVTPI2PD CVTSI2SS CVTSI2SD CVTSI2SDQ
305	CVTTPS2PI CVTTPD2PI CVTTSS2SI CVTTSD2SI
306	CVTPS2PI CVTPD2PI CVTSS2SI CVTSD2SI
307	UCOMISS UCOMISD COMISS COMISD
308	MOVW MOVSX MOVZX
309	ADDSD MULsd subsd divsd sqrtsd
310	movd movq ldmxcsr stmxcsr
311	pand pandn por pxor andpd andnpd orpd xorpd
312	) 'norexprefix)
313
314% Instructions that need a REX.B prefix only if using upper 8 registers
315(flag '(push pop movl movb) 'onlyupperregrexprefix)
316
317(de DepositLabel (x)
318    (when *testlap (prin2 currentoffset*) (tab 10) (print x))
319    (when (and *writingfaslfile
320	       (not (equal currentoffset* (LabelOffset x))))
321      (StdError (BldMsg "wrong address for label %p: difference = %p"
322			x    (difference currentoffset* (LabelOffset x))))))
323
324
325(CompileTime (progn
326
327(dm DefOpcode (U)
328%
329% (DefOpcode name (parameters) pattern)
330%
331(prog (OpName vars pattern fname)
332    (setq U (rest U))
333    (setq OpName (pop U))
334    (setq fname (intern (bldmsg "%w.INSTR" OpName)))
335    (setq OpName (MkQuote OpName))
336    (setq vars (pop u))
337    (setq pattern
338      (append u
339	`((t (laperr ',OpName  (list .,vars))))))
340    (setq pattern (cons 'cond pattern))
341    % (setq u `(lambda ,vars ,pattern))
342    % (return `(put ,OpName 'InstructionDepositMacro ',u))
343    (return
344      `(progn
345	 (de ,fname ,vars ,pattern)
346	 (put ,OpName 'InstructionDepositMacro ',fname)))
347 ))
348
349(dm DefOpLength (U)
350%
351% (DefOpLength name (parameters) pattern)
352%
353(prog (OpName vars pattern fname)
354    (setq U (rest U))
355    (setq OpName (pop U))   % (quote name)
356    (setq fname (intern (bldmsg "%w.LTH" OpName)))
357    (setq OpName (MkQuote OpName))   % (quote name)
358    (setq vars (pop u))
359    (setq pattern
360      (append u
361	`((t (laperr ',OpName  (list .,vars))))))
362    (setq pattern (cons 'cond pattern))
363    % (setq u `(lambda ,vars ,pattern))
364    % (return `(put ,OpName 'InstructionLengthFunction ',u))
365    (return
366      `(progn
367	 (de ,fname ,vars ,pattern)
368	 (put ,OpName 'InstructionLengthFunction ',fname)))
369))
370
371
372
373))
374
375
376%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
377%
378%    getting the instructions in
379
380
381%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
382%
383%  additional test functions
384
385(fluid '(sregs xmmregs))
386
387(setq sregs '(ES CS SS DS FS GS ))
388
389(setq xmmregs '(xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7
390		xmm8 xmm9 xmm10 xmm11 xmm12 xmm13 xmm14 xmm15))
391
392(de sregp(x)
393  % test for a segment register
394  (and (eqcar x 'reg)
395       (memq (cadr x) sregs)))
396
397(de xmmregp(x)
398  % test for an xmm register
399  (and (eqcar x 'reg)
400       (memq (cadr x) xmmregs)))
401
402(de eaxp(x)
403  (and (eqcar x 'reg)
404       (setq x (cadr x))
405       (or (eq x 'EAX) (eq x 1))))
406
407(de memoryp(x)
408  % supports reference to explicit addresses
409   (if (atom x) nil
410       (progn
411	 (setq x (car x))
412	 (or
413	   (eq x 'label)
414	   (eq x '$FLUID)
415	   (eq x '$GLOBAL)
416   ))))
417
418(de effap(x)
419  % supports most general memory and register references
420   (or (stringp x) (idp x) (regp x) (xmmregp x)
421      (and (pairp x)
422       (memq (car x)
423	     '(indirect displacement indexed $fluid $global
424	       fluid global extrareg) ))))
425
426(de stdimmediatep(x)
427  % full size immediate
428  (or (numberp x)(eqcar x 'immediate)(eqcar x 'idloc)))
429
430(de shortlabelp (x)(or (labelp x) (eqcar x 'IMMEDIATE)))
431
432(de adrp (x) (or (atom x)
433		 (memq (car x)'(label entry internalentry foreignentry))
434		 (and (eqcar x 'IMMEDIATE) (adrp (cadr x)))))
435
436(de indirectadrp (x) (and (eqcar x 'INDIRECT)
437			  (or (adrp (cadr x)) (effap (cadr x))(regp (cadr x)))))
438
439(de smallimmediatep (x)
440     (when (eqcar x 'IMMEDIATE) (setq x (unimmediate x)))
441     (bytep x))
442
443%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
444%
445%  Instruction deposit functions
446
447
448(de laperr(inst par)
449   (StdError (BldMsg "Illegal format: (%p %p)" inst par)))
450
451(de modR/M (op1 op2)
452% The modR/M byte is built from two operands.
453% op1 is always a register (or an absolute code), op2
454% a register or a memory reference
455(prog (OpFn mode base ireg n)
456
457    (when (or (regp op1) (xmmregp op1)) (setq op1 (lsh (reg2int op1 'REXR) 3)))
458    (when (pairp op2) (setq mode (car op2)))
459
460    % case: reg - reg
461    (when (or (regp op2) (xmmregp op2))
462         (depositbyte (lor 2#11000000 (lor op1 (reg2int op2 'REXB))))
463         (return nil))
464
465    % case: reg - (indirect (reg EBP/R13) ) % no format without offset
466    (when (and (eq mode 'indirect)
467         (regp (cadr op2))
468         (setq base (reg2int (cadr op2) 'REXB))
469         (equal base 2#101) )
470	    (return (modR/M op1 (list 'displacement (cadr op2) 0))))
471
472    % case: reg - (indirect (reg ESP/R12) )
473    (when (and (eq mode 'indirect)
474	       (regp (cadr op2))
475	       (setq base (reg2int (cadr op2) 'REXB))
476	       (equal base 2#100) )
477	  (depositbyte (lor 2#00000100 op1))
478	  (depositbyte 2#00100100)  % s-i-b byte
479	  (return nil))
480
481    % case: reg - (indirect reg) non ESP/EBP
482    (when (and (eq mode   'indirect)
483	       (regp (cadr op2)))
484	  % no zero displacement for reg EBP:
485	  (setq base (reg2int (cadr op2) 'REXB))
486	  (when (or (and (equal base 2#100) (not (upperreg64p (cadr op2)))) (equal base 2#101))
487	    (modR/Merror op2))
488	  (depositbyte (lor 2#00000000 (lor op1 base)))
489	  (return nil))
490
491    % case: reg - (displacement (reg ESP/R12) const)
492    (when (and (eq mode   'displacement)
493	       (regp (cadr op2))
494	       (numberp (caddr op2))
495	       (setq base (reg2int (cadr op2) 'REXB))
496	       (equal base 2#100) )
497	  (return
498	     (if (bytep (caddr op2))  % 8 bit displacement
499		 (progn
500		   (depositbyte (lor 2#01000100 op1))
501		   (depositbyte 2#00100100)  % s-i-b byte
502		   (depositbyte (land 255 (caddr op2))))
503		 (progn
504		   (depositbyte (lor 2#10000100 op1 base))
505		   (depositbyte 2#00100100)  % s-i-b byte
506		   (deposit32bitword (caddr op2) )))))
507
508    % case: reg - (displacement reg const), non ESP
509    (when (and (eq mode   'displacement)
510	       (regp (cadr op2))
511	       (numberp (caddr op2)))
512	  (setq base (reg2int (cadr op2) 'REXB))
513	  (return
514	    (if (bytep (caddr op2))  % 8 bit displacement
515		(progn
516		  (depositbyte (lor 2#01000000 (lor op1 base)))
517		  (depositbyte (land 255 (caddr op2))))
518		(progn
519		  (depositbyte (lor 2#10000000 (lor op1 base)))
520		  (deposit32bitword (int2sys (caddr op2) ))))))
521
522    % case: reg - (indexed ....)
523    (when (eq mode   'indexed)
524	  (return (sibbyte-for-indexed (lor 2#00000100 op1) op2)))
525
526
527     % all other cases: reg - absolute 32 bit displacement
528    (depositbyte (lor 2#00000101 op1 ))
529 %%   (depositbyte 2#00100101 ) % AMD64 no RIP relative addressing
530    (depositextension op2)))
531
532
533(de sibbyte-for-indexed(modr/m op2)
534    (prog(base index factor n)
535	 (setq base (caddr op2) index (cadr op2))
536	 (setq factor 1)
537	 (when (eqcar index 'times)
538	   (setq factor (caddr index))
539	   (setq index (cadr index)))
540	 (setq factor (atsoc factor
541			     '((1 . 0)(2 . 2#01000000)(4 . 2#10000000)(8 . 2#11000000))))
542	 (when (null factor) (modR/Merror op2))
543	 (setq factor (cdr factor))
544	 (cond
545	  ((eqcar base 'displacement)
546	   (when (or (not (numberp (setq n (caddr base))))
547		     (not (regp (cadr base))))   (modR/Merror op2))
548	   (setq base (reg2int (cadr base) 'REXB))
549	   (when (or (not (equal n 0))(eq base 2#101))
550	     (prin2t "****** Fall noch nicht vorgesehen")
551	     (modR/Merror op2))
552	   (depositbyte modr/m)
553	   (depositbyte(lor factor (lor (lsh (reg2int index 'REXX) 3) base))))
554	  ((labelp base)
555	   (depositbyte modr/m)
556	   (depositbyte(lor factor (lor (lsh (reg2int index 'REXX) 3) 2#101 )))
557	   (depositextension base))
558	  (t (modR/Merror op2)))))
559
560(de modR/Merror(op2)
561    (stderror (bldmsg "illegal x86_64 addressing mode %w" op2)))
562
563(de depositextension(op2)
564    % generate a relocated fullword extension
565    (prog (OfFn)
566	  (when (atom op2) (return (depositwordexpression op2)))
567	  (when (setq OfFn (get (car op2) 'OperandDepositFunction))
568	    (return (apply OfFn (list op2))))
569	  (depositwordexpression op2)))
570
571(de lthmodR/M (op1 op2)
572    % calculate the length of the address part by modR/M
573    (prog (OpFn mode base ireg n)
574
575	  % case: reg - reg
576	  (when (regp op2) (return 1))
577	  (when (pairp op2) (setq mode (car op2)))
578
579	  % case: reg - (indirect (reg ESP/R12) )
580	  (when (and (eq mode   'indirect)
581		     (regp (cadr op2))
582		     (setq base (reg2int (cadr op2) 'REXB))
583		     (equal base 2#100) )
584	    (return 2))
585
586	  % case: reg - (indirect (reg EBP/R13) ) % no format without offset
587	  (when (and (eq mode   'indirect)
588		     (regp (cadr op2))
589		     (setq base (reg2int (cadr op2) 'REXB))
590		     (equal base 2#101) )
591	    (return (lthmodR/M op1 (list 'displacement (cadr op2) 0))))
592
593	  % case: reg - (indirect reg) non ESP/EBP
594	  (when (and (eq mode   'indirect)
595		     (regp (cadr op2)))
596	    (return 1))
597
598	  % case: reg - (displacement (reg ESP/R12) const)
599	  (when (and (eq mode   'displacement)
600		     (regp (cadr op2))
601		     (numberp (caddr op2))
602		     (setq base (reg2int (cadr op2) 'REXB))
603		     (equal base 2#100) )
604	    (if (bytep (caddr op2) )  % 8 bit displacement
605		(return 3)
606		(return 6)))
607
608	  % case: reg - (displacement reg const), non ESP
609	  (when (and (eq mode   'displacement)
610		     (regp (cadr op2))
611		     (numberp (caddr op2)))
612	    (return (if (bytep (caddr op2)) 2 5)))
613
614	  % case: (indexed reg (displacement reg 0))
615	  (when (eq mode   'indexed)
616	    (return (add1 (lth-sibbyte-for-indexed op2))))
617
618	  % all other cases: reg - relative 32 bit displacement
619	  (return 5)))
620
621
622(de lth-sibbyte-for-indexed(op2)
623    (prog(base index factor offset)
624	 (setq base (caddr op2) index (cadr op2))
625	 (cond
626	  ((eqcar base 'displacement)
627	   (setq offset (caddr base))
628	   (when (or (not (equal offset 0))
629		     (not (regp (cadr base))))   (modR/Merror op2))
630	   (setq base (reg2int (cadr base) 'REXB))
631	   (when (eq base 2#101)
632	     (prin2t "****** Fall noch nicht vorgesehen")
633	     (modR/Merror op2))
634	   (return 1))
635	  ((labelp base) (return 5))
636	  (t (modR/Merror op2)))))
637
638% Procedures to compute specific OperandRegisterNumber!*
639% Each of the cases returns the Addrssing MODE
640% and sets OperandRegisterNumber!* as a side effect
641
642(fluid '(numericRegisterNames REX-Prefix))
643
644(setq numericRegisterNames [nil EAX EBX ECX EDX EBP])
645
646(put 'REXR 'prefixcode 2#100)
647(put 'REXX 'prefixcode 2#010)
648(put 'REXB 'prefixcode 2#001)
649
650(de reg2int (u prefixbit)
651    % calculate binary number for register
652    (prog (r) (setq r u)
653	  % strip off tag 'reg
654	  (cond ((xmmregp r) (setq r (cadr r)))
655		(t (cond ((eqcar r 'reg)(setq r (cadr r))))
656		   %convert a LISP-register into an x86_64 register
657		   (if (numberp r) (setq r (getv numericRegisterNames r)))))
658	  (setq r (get r 'registercode))
659	  (when (and r (wgreaterp r 7)) (setq r (wand r 7))
660		(setq REX-Prefix
661		      (wor REX-Prefix (get prefixbit 'prefixcode))))
662	  (if r (return r)
663	    (stderror (bldmsg "unknown register %w" u)))))
664
665(deflist '((EAX   0) (ECX   1) (EDX   2) (EBX   3)
666           (rAX   0) (rCX   1) (rDX   2) (rBX   3)
667	   (ESP   4) (EBP   5) (ESI   6) (EDI   7)
668	   (rSP   4) (rBP   5) (rSI   6) (rDI   7)
669	   (st    4)        % LISP stack register
670	   (T1    7) % EDI
671	   (T2    6) % ESI
672	   (T3    8)
673	   (T4    9)
674	   (heaplast 10) (heaptrapbound 11)
675           (bndstkptr 12) (bndstkupperbound 13)
676           (nil  15) (bndstklowerbound 14)
677
678	   (r8 8) (r9 9) (r10 10) (r11 11)
679	   (r12 12) (r13 13) (r14 14) (r15 15)
680	   (xmm0 0) (xmm1 1) (xmm2 2) (xmm3 3)
681	   (xmm4 4) (xmm5 5) (xmm6 6) (xmm7 7)
682	   (xmm8 8) (xmm9 9) (xmm10 10) (xmm11 11)
683	   (xmm12 12) (xmm13 13) (xmm14 14) (xmm15 15)
684
685	   % byte and word registers
686	   (AL    0) (CL    1)
687	   (AX    0) (CX    1)
688	   % segment registers
689	   (ES   0) (CS    1) (SS    2) (DS   3)(FS   4)(GS   5)
690	   ) 'registercode)
691
692(put 'NIL 'RegisterCode 15)
693
694(de bytep(n)
695    (when (and (numberp n) (lessp n 128) (greaterp n -128))
696      (land n 255)))
697
698(de halfwordp(n)
699    (when (and (numberp n) (lessp n 32768) (greaterp n -32768))
700      (land n 65535)))
701
702(de unimmediate(u)
703    (if (eqcar u 'immediate) (cadr u) u))
704
705%------------------------------------------------------------------------
706% (displacement (reg 5) ...) has to be prefixed in order to address
707% the DS segment rther than the SS segment
708(de indexed-reg-5-p(op)
709    (and (pairp op) NIL % not useful in 64 bit mode
710	 (or (eq (car op) 'indexed)
711	     (eq (car op) 'displacement)
712	     (eq (car op) 'indirect))
713	 (equal (cadr op) '(reg 5))) )
714
715(de lth-reg-5-prefix(op)
716    (if (indexed-reg-5-p op) 1 0))
717
718(de reg-5-prefix(op)
719    (when (indexed-reg-5-p op)
720      (depositbyte 16#3e) ))  % DS segment override prefix
721
722%------------------------------------------------------------------------
723%  special format for EAX-instructions
724
725(de OP-mem-eax (code op1 op2)
726    (when (eqcar op1 'reg)(setq op1 op2))
727    (depositbyte (car code))
728    (depositextension (unimmediate op1)))
729
730(de LTH-mem-eax (code op1 op2) 5)
731
732%------------------------------------------------------------------------
733% code is one byte, op1 is a register, op2 is an effective address
734(de OP-reg-effa (code op1 op2)
735    (reg-5-prefix op2)
736    (depositbyte (car code))
737    (modR/M op1 op2))
738
739(de LTH-reg-effa (code op1 op2)
740    (plus 1 (lth-reg-5-prefix op2) (lthmodR/M op1 op2)))
741
742%------------------------------------------------------------------------
743% op1 is an immediate, op2 is an effective address which patches into
744% the second byte of the code
745(de OP-imm-effa (code op1 op2)
746    (reg-5-prefix op2)
747    (depositbyte (car code))
748    (modR/M (cadr code) op2)
749    (depositextension (unimmediate op1)))
750
751(de lth-imm-effa (code op1 op2)
752    (plus 5 (lth-reg-5-prefix op2) (lthmodR/M (cadr code) op2)))
753
754(de OP-imm8-effa (code op1 op2)
755    (reg-5-prefix op2)
756    (depositbyte (car code))
757    (modR/M (cadr code) op2)
758    (depositbyte (unimmediate op1)))
759(de lth-imm8-effa (code op1 op2)
760    (plus 2 (lth-reg-5-prefix op2) (lthmodR/M (cadr code) op2)))
761
762%------------------------------------------------------------------------
763% code is two bytes (prefix+opcode) with an optional rex prefix in between,
764% op1 is a register, op2 is an effective address
765(de OP-reg-effa-2 (code op1 op2)
766    (prog (need_rex)
767	  (setq need_rex (or (upperreg64p op1)  (upperreg64p op2)))
768	  (depositbyte (car code))
769	  (setq code (cdr code))
770	  % check for optional rex byte
771	  (if (eqcar code 'rex)
772	      (progn
773		(setq code (cdr code))	% skip symbolic "rex" byte in inst. def.
774		(if need_rex		% deposit necessary rex byte
775		 (progn
776		   (setq REX-prefix 16#48)
777		   (depositbyte 16#48)
778		   (SETQ REX? (plus codebase!* currentoffset* -1))
779		   (setq allowextrarexprefix (if (eqcar code 'rex) 0 1) )))
780		))
781	  (depositbyte (car code)))
782    (modR/M op1 op2))
783
784(de LTH-reg-effa-2 (code op1 op2)
785  (prog (codelength)
786    (setq codelength (length code))
787    (if (and (memq 'rex code) (not (upperreg64p op1)) (not (upperreg64p op2)))
788	% optional rex byte not needed
789	(setq codelength (sub1 codelength)))
790    (return (plus codelength (lthmodR/M op1 op2)))))
791
792%------------------------------------------------------------------------
793% code is two bytes with up to three prefixes, op1 is a register, op2 is an effective address
794(de OP-xmmreg-effa (code op1 op2)
795  (prog (need_rex)
796    (setq need_rex (or (upperreg64p op1)  (upperreg64p op2)))
797    % deposit prefix bytes
798    (while (and (cddr code) (not (eqcar code 'rex)))
799      (depositbyte (car code))
800      (setq code (cdr code)))
801    (if (or need_rex (eqcar code 'rex))
802	(progn
803	  (setq REX-prefix (if (eqcar code 'rex) 16#48 16#40))
804	  (depositbyte REX-prefix)
805	  (SETQ REX? (plus codebase!* currentoffset* -1))
806	  (setq allowextrarexprefix (if (eqcar code 'rex) 0 1) )
807	  (if (eqcar code 'rex) (setq code (cdr code)))))
808    (depositbyte (car code)))
809    (depositbyte (cadr code))
810    (modR/M op1 op2))
811
812(de LTH-xmmreg-effa (code op1 op2)
813  (prog (codelength)
814    (setq codelength (length code))
815    (if (and (memq 'rex code) (not (upperreg64p op1)) (not (upperreg64p op2)))
816	% optional rex byte not needed
817	(setq codelength (sub1 codelength)))
818    (return (plus codelength (lthmodR/M op1 op2)))))
819
820%-----------------------------------------------------------------------
821% format: fixed modR/M byte
822(de OP-EFFA (code op1) (OP-reg-effa code (cadr code) op1))
823(de lth-EFFA (code op1) (LTH-reg-effa code (cadr code) op1))
824
825(de OP2-effa(code op1)
826    (depositbyte (car code))
827    (op-EFFA (cdr code) op1))
828
829(de lth2-EFFA(code op1) (add1 (lth-effa(cdr code) op1)))
830
831%-----------------------------------------------------------------------
832% immediate to EAX
833(de OP-imm-EAX (code op1 op2-is-alway-EAX)
834    (depositbyte (car code)) (depositextension (unimmediate op1)))
835
836(de LTH-imm-EAX (code op1 op2-is-alway-EAX) 5)
837
838%-----------------------------------------------------------------------
839% INT with parameter
840(de OP-INT (code op1)
841    (depositbyte (car code)) (depositbyte (unimmediate op1)))
842
843(de LTH-INT (code op1) 2)
844
845%---------------------------------------------------------------------
846% immediate to reg
847% code is one byte + ModR?m byte, op1 the immediate, op2 the reg
848% sometimes there is no ModR/M byte; then the reg is placed in the opcode
849%  (adc 17 (reg ABX))
850(de OP-imm-reg (code op1 op2)
851    (prog(n c1 c2)
852      (when (cdr code) (depositbyte (car code))(setq code (cdr code)))
853      (depositbyte (lor (car code) (reg2int op2 'REXB)))
854      (depositextension (unimmediate op1))))
855
856(de LTH-imm-reg (code op1 op2) (if (cdr code) 6 5))
857
858(de OP-imm8-reg (code op1 op2)
859    (prog(n c1 c2)
860      (when (cdr code) (depositbyte (car code))(setq code (cdr code)))
861      (depositbyte (lor (car code) (reg2int op2 'REXB)))
862      (depositbyte (bytep op1))))
863
864(de LTH-imm8-reg (code op1 op2) (if (cdr code) 3 2))
865
866
867%---------------------------------------------------------------------
868% absolute n-byte instruction
869(de OP-byte (code)
870	(foreach x in code do (depositbyte x)))
871(de lth-byte (code) (length code))
872
873%---------------------------------------------------------------------
874% push/pop with register: code is one byte modified with reg number
875(de OP-Push-Reg(code op1) (depositbyte (lor (car code) (reg2int op1 'REXB))))
876(de LTH-Push-Reg(code op1) 1)
877
878%---------------------------------------------------------------------
879% jump to absolute address
880% 386 has only relative jumps
881(de OP-Jump (code op1)
882  (prog(n)
883   (depositbyte (car code))
884   (when (cdr code) (depositbyte (cadr code)))
885   (setq op1 (saniere-Sprungziel op1))
886   (setq n(MakeExpressionrelative op1 4)) % offset wrt next instr
887   (depositliteralword n)
888   (when *testlap (tab 15)(prin2 "-> ")
889	 (prin2 n) (prin2 " rel = ")
890	 (prin2 (plus currentoffset* n))(prin2t " abs"))))
891
892(de lth-jump (code op1) (if (cdr code) 6 5))
893
894%jump short (8-bit displacement)
895(de OP-JUMP-SHORT (code op1)
896  (prog(n a)
897   (depositbyte (car code))
898   (setq op1 (saniere-Sprungziel op1))
899   (setq n (MakeExpressionRelative op1 1)) % offset wrt next instr
900   (when (not (bytep n)) (stderror  "distance too long for short jump"))
901   (depositbyte (bytep n))
902   (when *testlap (tab 15)(prin2 "-> ")
903	 (prin2 n) (prin2 " rel = ")
904	 (prin2 (plus currentoffset* n))(prin2t " abs"))))
905
906(de lth-JUMP-SHORT (code op1) 2)
907
908% indirect jump to effective address
909(de OP-JUMP-EFFA (code op1)
910	      % a tag "indirect" contained already in the operation if not
911	      % explicit reg reference
912	   (when (and (eqcar op1 'indirect) (not (regp (cadr op1))))
913		 (setq op1 (cadr op1)))
914           % need REX byte if upper 8 register
915           (if (upperreg64p op1)
916               (progn
917                 (setq REX-prefix 16#40)
918                 (depositbyte REX-prefix)
919                 (SETQ REX? (plus codebase!* currentoffset* -1))
920                 (setq allowextrarexprefix 1)))
921	   (op-reg-effa code (cadr code) op1))
922
923(de LTH-JUMP-EFFA (code op1)
924	   (when (and (eqcar op1 'indirect) (not (regp (cadr op1))))
925		 (setq op1 (cadr op1)))
926           (if (upperreg64p op1) (add1 (lth-reg-effa code (cadr code) op1))
927	     (lth-reg-effa code (cadr code) op1)))
928
929
930(commentoutcode
931%jump full size (32 bit displacement)
932(de OP-JUMP-LONG(code op1)
933    (depositbyte (car code))
934    (setq op1 (saniere-Sprungziel op1))
935    (when (cdr code) (depositbyte (cadr code))) %conditional jumps
936    (depositExtension op1))
937(de lth-JUMP-LONG(code op1) (if (cdr code) 6 5))
938)
939
940(de saniere-Sprungziel(l)
941    (cond ((atom l) l)
942	  ((eqcar l 'IMMEDIATE) (saniere-Sprungziel (cadr l)))
943	  ((eqcar l 'LABEL) (saniere-Sprungziel (cadr l)))
944	  (T l)))
945% RET n
946(de OP-RET-n (code op1)
947   (depositbyte (car code))
948   (deposithalfword (halfwordp (unimmediate op1))))
949(de lth-RET-n (code op1) 3)
950
951%-------------------------------------------------------------
952%enter
953(de OP-enter (code op1)
954   (depositbyte (car code))
955   (deposithalfword (unimmediate op1))
956   (depositbyte 0))  % support for level 0 only
957(de lth-enter (code op1) 4)
958
959%-------------------------------------------------------------
960% PUSH imm32
961(de OP-imm   (code op1)
962   (depositbyte (car code))
963   (depositextension (unimmediate op1)))
964(de lth-imm   (code op1) 5)
965
966
967%-------------------------------------------------------------
968% shift with one parameter
969(de op-shift (code dummy op1)
970    (depositbyte (car code))
971    (modr/m (cadr code) op1))
972(de lth-shift (code op1) (add1 (lthmodR/M (cadr code) op1)))
973
974%shift with immediate amount
975(de op-shiftimm(code op2 op1)
976    (depositbyte (car code))
977    (depositbyte (lor 2#11000000 (lor (cadr code) (reg2int op1 'REXB))))
978    (depositbyte (bytep (unimmediate op2))))
979(de lth-shiftimm(code op1 op2) 3)
980
981% double shifts
982(de op-dshift (code dummy op1)
983    (depositbyte (cadr code))
984    (modR/M op1 0))
985(de lth-dshift (code op1) (plus 2 (lthmodR/M op1 0)))
986
987(de op-dshiftimm (code op2 op1)
988    (depositbyte (cadr code))
989    (modR/M op1 0)
990    (depositbyte (bytep (unimmediate op2))))
991(de lth-dshiftimm (code op1) (plus 3 (lthmodR/M op1 0)))
992
993%-------------------------------------------------------------
994% MUL and DIV
995(de OP-MUL (code op1) (op-reg-effa code (cadr code) op1))
996(de lth-mul (code op1) (lth-reg-effa code (cadr code) op1))
997
998% special: IMUL
999(de OP-IMUL (code op1 op2)
1000    (depositbyte (car code))
1001    (depositbyte (cadr code))
1002    (modR/M op1 op2))
1003(de lth-imul (code op1 op2) 3)
1004
1005% ------------------------------------------------------------
1006% standard operand tags
1007% ------------------------------------------------------------
1008
1009
1010
1011(de DepositFluid (X)
1012    (DepositValueCellLocation (second X)))      % Defined in System-Faslin.Red
1013
1014(de DepositExtraReg (X)
1015    (DepositExtraRegLocation (second X)))       % Defined in System-Faslin.Red
1016
1017(de DepositEntry (X)
1018    (DepositFunctionCellLocation (second X)))   % Defined in System-Faslin.Red
1019
1020(de depositforeignentry (x)
1021  (depositfunctioncelllocation (second x)))
1022
1023(put 'fluid 'OperandDepositFunction (function DepositFluid))
1024(put '$fluid 'OperandDepositFunction (function DepositFluid))
1025(put 'global 'OperandDepositFunction (function DepositFluid))
1026(put '$global 'OperandDepositFunction (function DepositFluid))
1027(put 'ExtraReg 'OperandDepositFunction (function DepositExtraReg))
1028(put 'entry 'OperandDepositFunction (function DepositEntry))
1029(put 'foreignentry 'operanddepositfunction (function depositforeignentry))
1030
1031
1032
1033% ------------------------------------------------------------
1034% Deposit instructions for Pseudo ops
1035% ------------------------------------------------------------
1036
1037(de DepositWordBlock (X)                % (FULLWORD xxx xxx ... xxx)
1038    (foreach Y in (cdr X) do (DepositQuadWordExpression Y)))
1039
1040(de DepositHalfWordBlock (X)            % (HALFWORD xxx xxx ... xxx)
1041    (foreach Y in (cdr X) do (DepositHalfWordExpression Y)))
1042
1043(de DepositByteBlock (X)                % (BYTE     xxx xxx ... xxx)
1044(prog (Z)
1045    (setq Z 0)
1046    (foreach Y in (cdr X) do
1047    (progn (DepositByte Y)
1048	(setq Z (LXOR Z 1))))           % toggle Z
1049    (cond ((not (equal Z 0)) (DepositByte 0)))))        % go to halfword boundary
1050
1051
1052% Deposit a string with a trailing ZERO byte
1053
1054(de DepositString (X)                   % (STRING "xxxxxx")
1055  (prog nil
1056    (setq X (second X))
1057    (for (from I 0 (Size X) 1) (do (DepositByte (Indx X I))))
1058    (DepositByte 0)
1059	(while (not (eq 0 (remainder CurrentOffset!* 8)))
1060	       (depositbyte 0))))
1061% align to word boundary
1062
1063(de DepositFloat (X)                    % this will not work in cross-assembly
1064(progn (setq X (FltInf (second X)))
1065    (DepositWord (FloatlowOrder X)) ))
1066
1067(put 'fullword 'InstructionDepositFunction 'DepositWordBlock)
1068(put 'halfword 'InstructionDepositFunction 'DepositHalfWordBlock)
1069(put 'byte 'InstructionDepositFunction 'DepositByteBlock)
1070(put 'string 'InstructionDepositFunction 'DepositString)
1071(put 'float 'InstructionDepositFunction 'DepositFloat)
1072
1073% Auxiliary functions for computing instruction bit patterns
1074
1075(de MakeExpressionRelative (Exp OffsetFromHere)
1076(prog (X Y Z)
1077    (cond ((EqCar Exp 'InternalEntry)
1078      (return (MakeInternalEntryRelative (second Exp) OffsetFromHere))))
1079
1080    (cond ((not (LabelP Exp))
1081	   (return (StdError "Only labels can be relative"))))
1082
1083    (setq X (plus CurrentOffset* OffsetFromHere))
1084    (setq Y (LabelOffset Exp))
1085    (return (Difference Y X))))
1086
1087
1088(de makeinternalentryrelative (nam offsetfromhere)
1089  (prog (offset)
1090	(setq offset (atsoc nam labeloffsets*))
1091	(setq offset (if offset
1092		(cdr offset)
1093		(get nam 'internalentryoffset)))
1094	(return (if offset
1095		  (progn
1096		    (setq offset
1097		     (difference offset
1098		      (plus2 currentoffset* offsetfromhere))))
1099		  (progn
1100		    (setq forwardinternalreferences*
1101		     (cons (cons currentoffset* nam)
1102		      forwardinternalreferences*))
1103		    0)))))
1104	% will be fixed in SystemFasl...
1105
1106
1107(de labeloffset (l)
1108  (let (offset)
1109    (cond
1110     ((codep l) (if *writingfaslfile
1111		  (inf l)
1112		  (wdifference (inf l) codebase*)))
1113     ((setq offset  (atsoc l labeloffsets*)) (cdr offset))
1114     (t (stderror (bldmsg "Unknown label %r" l)))
1115     )))
1116
1117
1118
1119% ------------------------------------------------------------
1120% Branch analysis (conditional jumps)
1121% ------------------------------------------------------------
1122
1123(fluid '(ConditionalJumps*))
1124(setq ConditionalJumps*
1125  '((jo  . jno)  (jno  . jo)
1126    (jp  . jnp)  (jnp  . jp)
1127    (jz  . jnz)  (jnz  . jz)
1128    (je  . jne)  (jne  . je)
1129    (jb  . jae)  (jae  . jb)  (jbe  . ja)  (ja  . jbe)
1130    (jnb . jnae) (jnae . jnb) (jnbe . jna) (jna . jnbe)
1131    (jl  . jge)  (jge  . jl)  (jle  . jg)  (jg  . jle)
1132    (jnl . jnge) (jnge . jnl) (jnle . jng) (jng . jnle)
1133))
1134
1135(de reformBranches (code)
1136  (prog (rcode instr bottom x y z)
1137    (while code
1138       (setq instr (pop code))
1139	   % replace simple negation jumps
1140	   % case: (jne lab)(j anywhere) lab
1141	   %  ->   (je anywhere)
1142       (when (and
1143		(pairp instr)
1144		(setq x (atsoc (car instr) ConditionalJumps*))
1145		(setq y (saniere-Sprungziel (cadr instr)))
1146		(eqcar (car code) 'JMP)
1147		(cdr code)
1148		(equal (cadr code) y))
1149	     (setq instr (cons (cdr x) (cdr (pop code)))))
1150	   % replace indirect conditional jumps (not avail on 386)
1151	(when (and
1152		(pairp instr)
1153		(atsoc (car instr) ConditionalJumps*)
1154		(not (atom (saniere-Sprungziel (cadr instr)))))
1155	     (setq x (gensym))
1156	     (push (cons 'JMP (cdr instr)) bottom)
1157	     (push x bottom)
1158	     (setq instr (list (car instr) x))   )
1159	(push instr rcode))
1160	(return (nconc (reversip rcode) bottom))))
1161
1162% ------------------------------------------------------------
1163% Branch optimization (in favour of short jumps)
1164% ------------------------------------------------------------
1165
1166(deflist '(
1167   (JMP JMPL) (JO JOL) (JB JBL)(JNAE JNAEL)(JNB JNBL)
1168   (JAE JAEL) (JE JEL) (JZ JZL)(JNE JNEL)(JNZ JNZL)
1169   (JBE JBEL)(JNA JNAL)(JNBE JNBEL)(JA JAL)(JS JSL)
1170   (JNS JNSL)(JP JPL)(JPE JPEL)(JNP JNPL)(JPO JPOL)
1171   (JL JLL)(JNGE JNGEL)(JNL JNLL)(JGE JGEL)(JLE JLEL)
1172   (JNG JNGL)(JNLE JNLEL)(JG JGL)
1173) 'WordBranch)
1174
1175(de GeneralBranchInstructionP (i) (get i 'WordBranch))
1176(de LocalLabelp (l) (atom (saniere-sprungziel l)))
1177
1178% ProcessInitCode CodeList
1179% Purpose: Take a code list which has already been expanded by Pass1Lap
1180%          and replaces all generic branches with word mode branches.
1181% Returns: a new code list
1182
1183(de ProcessInitCode (CodeList)
1184(prog (BranchAndLabelAList*)            % find branches, labels, and entries
1185    (setq CodeList (BuildInitCodeOffsetTable CodeList))
1186    (setq LabelOffsets* (DeleteAllButLabels BranchAndLabelAList*))
1187    (return CodeList)))
1188
1189
1190% OptimizeBranches BranchCodeList!*;
1191% Purpose: Take a code list which has already been expanded by Pass1Lap
1192%          and try to optimize the branches
1193% Returns: a new code list
1194
1195(de OptimizeBranches (u) (OptimizeBranches0 u *WritingFaslFile))
1196
1197(de OptimizeBranches0 (u m)
1198(prog (BranchAndLabelAList* InstructionChanged* q w)
1199    (setq BranchCodeList* (if m (alignCode u) u))
1200    (BuildOffsetTable)                  % find branches, labels, and entries
1201    (setq InstructionChanged* nil)
1202    (FindLongBranches)
1203    (when (and m InstructionChanged*)
1204	      % give up aligned code
1205	  (return (OptimizeBranches0 u nil)))
1206    (while InstructionChanged*
1207	 (setq InstructionChanged* nil)
1208	 (FindLongBranches))
1209    (setq LabelOffsets* (DeleteAllButLabels BranchAndLabelAList*))
1210    (return BranchCodeList*)))
1211
1212(de &make-nop(n)
1213   % make n bytes of nop instructions
1214   (cond ((wleq n 0) nil)
1215	 ((eq n 1)'((nop)))
1216	 ((eq n 2)'((nop) (nop)))
1217	 ((eq n 3)'((mov (reg t1)(reg t1))))
1218	 ((eq n 4)'((lea (displacement(reg t1)0) (reg t1))))
1219	 (t (append (&make-nop 3)(&make-nop (difference n 3)))) ))
1220
1221(de alignCode(u)
1222  (if (&smember 'fastapply u) u (alignCode1 u)))
1223
1224(de alignCode1(u)
1225   (let(rcode w (a currentoffset*) l x y z q s nops)
1226     (while u
1227       (setq w (pop u))
1228       (setq nops 0)
1229       (cond
1230
1231	 % initial start: sync. entry point
1232	   ((null rcode)
1233	     (setq x a)
1234	     (setq y u q w)
1235	     (setq s (eqcar w '*entry))
1236	     (while y
1237		 (when (pairp q)(setq x (iplus2 x (instructionlength q))))
1238		 (if (eqcar q '*entry) (setq y nil) (setq q (pop y))))
1239	     (setq x (wand x 15))
1240	     (when (not (eq x 0)) (setq nops (idifference 16 x)))
1241	    )
1242
1243	% entry: executable code starts
1244	    ((eqcar w '*entry)(setq s t))
1245
1246	% fullword: executable code terminated
1247	    ((eqcar w 'fullword)(setq s nil))
1248
1249        % label under *align16
1250	   ((and s (atom w) *align16)
1251	      % next instruction should begin on cache line
1252	     (setq x (wand a 15))
1253	     (when (not (eq x 0))
1254		   (setq nops(wdifference 16 x))))
1255
1256	% label in standard mode
1257	   ((and s (atom w) u (pairp (car u)))
1258	      % next instruction should not split cache lines
1259	    (setq x (iplus2 (wand a 15) (instructionlength (car u))))
1260	    (when (not (igreaterp x 16))
1261		  (setq nops (idifference 16 (wand a 15))))
1262	   )
1263
1264       % call under *align16
1265	   ((and *align16 (eqcar w 'call))
1266	      % put call exactly at the end of cache line
1267	    (setq x (wand (iplus2 a (instructionlength w)) 15))
1268	    (when (not (eq x 0)) (setq nops (idifference 16 x)))
1269	   )
1270
1271	% call
1272	   ((and (eqcar w 'call) u (pairp (car u)))
1273	      % following instruction should not split over cache line
1274	    (setq x (wand (iplus2 a (instructionlength w)) 15))
1275	    (when (igreaterp (iplus2 x (instructionlength (car u)))16)
1276		  (setq nops (idifference 16 x)))
1277	   )
1278	 )
1279       (when (and (igreaterp nops 0)
1280		  (ilessp nops 9))  % not too many
1281	     (foreach q in (&make-nop nops) do (push q rcode))
1282	     (setq a (iplus2 a nops)))
1283       (when (pairp w)(setq a (iplus2 a (InstructionLength w))))
1284       (push w rcode)
1285      )
1286      (while rcode
1287	 (when (not (eq (setq w (pop rcode)) '!%temp-label))
1288	       (push w u)))
1289    u
1290))
1291
1292(de DeleteAllButLabels (X)
1293(prog (Y)
1294   (while (not (LabelP (car (first X)))) (setq X (cdr X)))
1295
1296   (cond ((null X) (return nil)))
1297    (setq Y X)
1298    (while (cdr Y)
1299	(cond ((LabelP (car (second Y)))
1300	    (setq Y (cdr Y))) (t
1301
1302	    (Rplacd Y (cddr Y)))))
1303    (return X)))
1304
1305
1306% BuildInitCodeOffsetTable CodeList;
1307% Purpose: generate a association list of labels, procedure entries
1308% Input is:
1309     %   labels:             label
1310     %   instructions:     ( opcode [operands]* )
1311     %   entry points:     ( !*entry procedurename proceduretype numberofargs)
1312% The Alist has the form:
1313     %   labels:           ( label . CurrentOffset)
1314     %   procedures:       ( procedurename . CurrentOffset)
1315     %   branch instrs     ( (opcode label) . CurrentOffset)
1316% otherwise, the CurrentOffset is advanced by the length of the instruction
1317
1318(de BuildInitCodeOffsetTable (CodeList)
1319(prog (AList Instr)
1320    (setq CodeSize* CurrentOffset*)
1321    (foreach X in CodeList do
1322   (progn (cond ((LabelP X)
1323	  (setq AList (cons (cons X CodeSize*) AList)))
1324   ((equal (setq Instr (first X)) '*entry)
1325	  (setq AList (cons (cons (second X) CodeSize*) AList)))
1326   ((and (GeneralBranchInstructionP Instr)(locallabelp (second X)))
1327	  (progn (Rplaca X (get Instr 'WordBranch))
1328
1329		    (setq CodeSize* (plus CodeSize* (InstructionLength X)))
1330		    (setq AList (cons (cons X CodeSize*) AList)))) (t
1331
1332   (setq CodeSize* (plus CodeSize* (InstructionLength X)))))))
1333
1334
1335    (setq BranchAndLabelAList* (ReversIP AList))
1336    (return CodeList)))
1337
1338
1339%  BuildOffsetTable();
1340% Purpose: generate a association list of labels, procedure entries
1341% The Alist has the form:
1342     %   labels:           ( label . CurrentOffset)
1343     %   procedures:       ( procedurename . CurrentOffset)
1344     %   branch instrs     ( (opcode label) . CurrentOffset)
1345
1346(de BuildOffsetTable nil
1347 (prog (AList Instr)
1348  (setq CodeSize* CurrentOffset*)
1349  (foreach X in BranchCodeList* do
1350   (progn
1351     (cond ((LabelP X) (setq AList (cons (cons X CodeSize*) AList)))
1352	   ((equal (setq Instr (first X)) '*entry)
1353	    (setq AList (cons (cons (second X) CodeSize*) AList)))
1354	       % branch: enter the address of the following instruction
1355	   ((and (GeneralBranchInstructionP Instr) (locallabelp (second X)))
1356	    (setq CodeSize* (plus CodeSize* (InstructionLength X)))
1357	    (setq AList (cons (cons X CodeSize*) AList)))
1358	   (t (setq CodeSize* (plus CodeSize* (InstructionLength X)))))))
1359  (setq BranchAndLabelAList* (ReversIP AList))
1360  (setq InstructionChanged* BranchAndLabelAList*)
1361  (return BranchAndLabelAList*) ))
1362
1363
1364(de FindLongBranches nil
1365 (prog (CurrentDisplacement)
1366  (foreach entry on BranchAndLabelAList* do
1367    (cond ((not (LabelP (car (first entry))))
1368      (progn
1369	(setq CurrentDisplacement (FindDisplacement (first entry)))
1370	(cond
1371	   ((or (GreaterP CurrentDisplacement (const MaximumShortBranch))
1372		(ZeroP CurrentDisplacement))    % Must have long brahch.
1373
1374	  (progn (setq InstructionChanged* t)
1375	      (IncreaseAllOffsets entry (MakeLongBranch entry)))))))))))
1376
1377
1378
1379(de FindDisplacement (InstructionOffsetPair)
1380    (Abs (Difference (cdr InstructionOffsetPair)
1381		    (FindLabelOffset (second (first InstructionOffsetPair))))))
1382
1383%  FindLabelOffset(Label)
1384% Purpose: looks up the location of Label in BranchAndLabelAList!*
1385% Returns: the offset of said Label
1386
1387(de FindLabelOffset (L)
1388(prog (Offset)
1389    (return
1390     (cond ((EqCar L 'InternalEntry) (FindEntryOffset (second L)))
1391
1392	   ((setq Offset (Atsoc (saniere-sprungziel L) BranchAndLabelAList*)) (cdr Offset))
1393	   (t (StdError (BldMsg "Unknown label %r" L)))))))
1394
1395
1396%  FindEntryOffset(ProcedureName);
1397% Purpose: looks up the location of ProcedureName in BranchAndLabelAList!*
1398% Returns: the offset of said ProcedureName
1399%          if an entry point is not known for this procedure it returns a
1400%          dummy value of -2000
1401
1402(de FindEntryOffset (L)
1403    (cond ((setq L (Atsoc L BranchAndLabelAList*)) (cdr L)) (t -2000)))
1404
1405(de MakeLongBranch (AList)
1406(prog (InstructionList Result OppositeBranch n)
1407    (setq InstructionList (car (first AList)))
1408    (setq n (instructionlength InstructionList))
1409    (Rplaca InstructionList (get (first InstructionList) 'WordBranch))
1410    (setq n (difference (instructionlength InstructionList) n))
1411    (cond ((cdr AList) (Rplacw AList (cdr AList)))
1412	  (t (Rplacw AList (list (cons '~DummyLabel~ 0)))))
1413    (return n))) % increased length of subsequent code
1414
1415
1416(de IncreaseAllOffsets (X N)
1417    (foreach Y in X do (Rplacd Y (plus (cdr Y) N)))
1418    (setq CodeSize* (plus CodeSize* N)))
1419
1420% ------------------------------------------------------------
1421% Procedures to compute instruction lengths
1422% ------------------------------------------------------------
1423
1424(de InstructionLength (X)
1425   (cond ((and (eqcar x 'movq) (not (xmmregp (cadr x))) (not (xmmregp (caddr x))))
1426	  (wplus2 1 (InstructionLength1 (cons 'mov (cdr x)))))
1427         ((eqcar x 'addq) (wplus2 1 (InstructionLength1
1428				 (cons 'add (cdr x)))))
1429         ((eqcar x 'subq) (wplus2 1 (InstructionLength1
1430				     (cons 'sub (cdr x)))))
1431         ((eqcar x 'cmpq) (wplus2 1 (InstructionLength1
1432				     (cons 'cmp (cdr x)))))
1433         ((and (pairp x) (flagp (car x) 'norexprefix))
1434	  (InstructionLength1 x))
1435	 ((and (pairp x) (flagp (car x) 'onlyupperregrexprefix))
1436	  (if (upperreg64p x)
1437	      (wplus2 1 (InstructionLength1 x))
1438	    (InstructionLength1 x)))
1439         ((reg64bitp x) (wplus2 1 (InstructionLength1 x)))
1440         (t (InstructionLength1 x))))
1441
1442(de InstructionLength1 (X)
1443   (prog (Y)
1444       (when (setq Y (get (car x) 'InstructionLengthFunction))
1445	     (return (apply2safe y (cdr x))))
1446       (when (setq Y (get (car x) 'INSTRUCTIONLENGTH))
1447	     (return (if (numberp y) y (apply y (list x)))))
1448       (stderror (bldmsg "*** Unknown x86_64 instruction:%w " x))))
1449
1450(de apply2safe(y x) % ensure that plly has two parameters at least
1451     (cond ((null x) (apply y (list nil nil)))
1452	   ((null (cdr x)) (apply y (list (car x) nil)))
1453	   (t (apply y (list (car x)(cadr x))))))
1454
1455(de InlineConstantLength (X)
1456% Purpose: returns the Size_Of_Unit_In_Bytes * Number_Of_Such_Units
1457%   X has the form:
1458%          (Unit  value_1  value_2 value_3 .... )
1459    (Times2 (cond ((equal (first X) 'fullword) 8) (t 2))
1460 	    (length (rest X))))
1461
1462(de ByteConstantLength (X)
1463    (Times2 (Quotient (Plus2 (length (rest X)) 1) 2) 2))
1464
1465(de LapStringLength (X)                 % must fall on word boundary
1466% Purpose: Calculate the number of bytes occupied by a given string
1467%  X has the form: (STRING "xxxxxx")
1468    (Times2 (Quotient (Plus2 (Size (second X)) 9) 8) 8))
1469
1470(DefList '((fullword InlineConstantLength)
1471	  (halfword InlineConstantLength)
1472	  (byte ByteConstantLength)
1473	  (float 8)
1474	  (string LapStringLength)) 'InstructionLength)
1475
1476
1477(put '*entry 'InstructionLength 0)
1478
1479% ------------------------------------------------------------
1480% Depositing Operations
1481% ------------------------------------------------------------
1482
1483(de DepositByte (X)
1484(progn (putbyte (wPlus2 CodeBase* CurrentOffset*) 0 X)
1485    (UpdateBitTable 1 0)
1486    (setq CurrentOffset* (plus CurrentOffset* 1))))
1487
1488(de DepositHalfWord (X)
1489(progn (puthalfword (wPlus2 CodeBase* CurrentOffset*) 0 X)
1490    (UpdateBitTable 2 0)
1491    (setq CurrentOffset* (plus CurrentOffset* 2))))
1492
1493(compiletime (put 'put_a_halfword 'opencode '(
1494   (mov (reg ebx) (displacement (reg eax) 0))))) %% (reg 2) (displacement (reg eax) 0)))))
1495
1496(de deposit32bitword (x) %% cross
1497  (put_a_halfword (wplus2 codebase* currentoffset*) x)
1498  (updatebittable 4 0)
1499  (setq currentoffset* (plus currentoffset* 4)))
1500
1501(de depositword (x)
1502  (putword (wplus2 codebase* currentoffset*) 0 x)
1503  (updatebittable 8 0)
1504  (setq currentoffset* (plus currentoffset* 8)))
1505
1506(de depositliteralword (x) %% Cross
1507  (put_a_halfword (wplus2 codebase!* currentoffset!*) x)
1508  (updatebittable 4 0)
1509  (setq currentoffset* (plus currentoffset* 4)))
1510
1511(de deposit-relocated-word (offset)
1512  % Given an OFFSET from CODEBASE*, deposit a word containing the
1513  % absolute address of that offset.
1514  (put_a_halfword (wplus2 codebase* currentoffset*)
1515          (if *writingfaslfile  offset
1516               (wdifference offset (wplus2 currentoffset* 4))))
1517  (updatebittable 4 (const reloc_word))
1518  (setq currentoffset* (plus currentoffset* 4)))
1519
1520
1521(de old-deposit-relocated-word (offset)
1522  % Given an OFFSET from CODEBASE*, deposit a word containing the
1523  % absolute address of that offset.
1524  (put_a_halfword (wplus2 codebase* currentoffset*)
1525	   (iplus2 offset (if *writingfaslfile 0 codebase*)))
1526  (updatebittable 4 (const reloc_word))
1527  (setq currentoffset* (plus currentoffset* 4)))
1528
1529(de depositwordexpression (x)
1530  % Only limited expressions now handled
1531  (let (y)
1532    (cond
1533      ((fixp x) (deposit32bitword (int2sys x)))
1534      ((labelp x) (deposit-relocated-word (labeloffset x)))
1535      ((equal (first x) 'internalentry)
1536       (let ((offset (get (second x) 'internalentryoffset)))
1537	 (if offset
1538	     (deposit-relocated-word offset)
1539	     (progn
1540	       (setq forwardinternalreferences*
1541		     (cons (cons currentoffset* (second x))
1542			   forwardinternalreferences*))
1543	       (deposit-relocated-word 0)))))
1544      ((equal (first x) 'idloc) (depositwordidnumber (second x)))
1545      ((equal (first x) 'entry) (depositentry x))
1546      (t (stderror (bldmsg "Expression too complicated %r" x))))))
1547
1548(de depositquadwordexpression (x)
1549  % Only limited expressions now handled
1550  (let (y)
1551    (cond
1552      ((fixp x) (depositword (int2sys x)))
1553      ((labelp x) (deposit-relocated-word (labeloffset x)))
1554      ((equal (first x) 'internalentry)
1555       (let ((offset (get (second x) 'internalentryoffset)))
1556	 (if offset
1557	     (deposit-relocated-word offset)
1558	     (progn
1559	       (setq forwardinternalreferences*
1560		     (cons (cons currentoffset* (second x))
1561			   forwardinternalreferences*))
1562	       (deposit-relocated-word 0)))))
1563      ((and (eq (car x) 'mkitem)
1564	    (eq (cadr x) id-tag)
1565	    (eqcar (setq y (caddr x)) 'idloc)
1566	    (wlessp (id2int(cadr y)) 129))
1567	(depositword (cadr y)))
1568      ((equal (first x) 'idloc) (depositwordidnumber (second x)))
1569      ((equal (first x) 'mkitem) (deposititem (second x) (third x)))
1570      ((equal (first x) 'entry) (depositentry x))
1571      ((setq y (wconstevaluable x)) (depositword (int2sys y)))
1572      (t (stderror (bldmsg "Expression too complicated %r" x))))))
1573
1574(de depositwordidnumber (x)
1575  (cond
1576    ((or (not *writingfaslfile) (leq (idinf x) 128))
1577     (deposit32bitword (idinf X)))
1578    (t
1579      (put_a_halfword (wplus2 codebase* currentoffset*)
1580	       (makerelocword (const reloc_id_number) (findidnumber x)))
1581      (setq currentoffset* (plus currentoffset* 4))
1582      (updatebittable 4 (const reloc_word)))))
1583
1584(de DepositHalfWordExpression (X)
1585(prog (Y)
1586    (return (cond ((FixP X) (DepositHalfWord X))
1587    ((LabelP X)
1588    (progn (puthalfword (wPlus2 CodeBase* CurrentOffset*) 0
1589		    (IPlus2 (LabelOffset X)
1590			   (cond (*WritingFaslFile 0) (t CodeBase*))))
1591	(UpdateBitTable 2 (const RELOC_HALFWORD))
1592	(setq CurrentOffset* (plus CurrentOffset* 2))))
1593
1594	    ((and (equal (first X) 'Difference) (LabelP (second X))
1595	    (LabelP (third X)))
1596	   (DepositHalfWord (Difference (LabelOffset (second X))
1597					   (LabelOffset (third X)))))
1598    ((equal (first X) 'IDLoc)
1599	   (DepositHalfWordIDNumber (second X)))
1600    ((setq Y (WConstEvaluable X))
1601	   (DepositHalfWord Y)) (t
1602    (StdError (BldMsg "Halfword expression too complicated %r" X)))))))
1603
1604
1605(de DepositItem (TagPart InfPart)
1606    (cond ((not *WritingFaslFile)
1607	   (DepositWord
1608	       (MkItem TagPart
1609		       (cond ((LabelP InfPart)
1610			      (wPlus2 CodeBase* (LabelOffset InfPart)))
1611			     ((equal (first InfPart) 'IDLoc)
1612			      (IDInf (second InfPart)))
1613			     (t
1614				(StdError
1615				    (BldMsg "Unknown inf in MkItem %r"
1616					    InfPart)))))))
1617	  (t
1618	     (progn (cond
1619		     ((LabelP InfPart)      % RELOC_CODE_OFFSET = 0
1620		      (putword (wPlus2 CodeBase* CurrentOffset*) 0
1621		            (mkitem tagpart (LabelOffset InfPart))))
1622		     ((equal (first InfPart) 'IDLoc)
1623		      (putword (wPlus2 CodeBase* CurrentOffset*) 0
1624		        (MkItem TagPart
1625	 		(MakeRelocInf (const RELOC_ID_NUMBER)
1626	 		  (FindIDNumber (second InfPart))))))
1627		     (t
1628		      (StdError (BldMsg "Unknown inf in MkItem %r"
1629				 InfPart))))
1630	      (setq CurrentOffset* (plus CurrentOffset* 8))
1631	      (UpdateBitTable 8 (const RELOC_INF))))))
1632
1633(de DepositHalfWordIDNumber (X)
1634    (cond ((or (not *WritingFaslFile) (LEQ (IDInf X) 128))
1635	(DepositHalfWord (IDInf X))) (t
1636
1637    (progn (puthalfword (wplus2 codebase* currentoffset*) 0
1638		    (makerelochalfword (const reloc_id_number) (findidnumber x)))
1639	(setq currentoffset* (plus currentoffset* 2))
1640	(updatebittable 2 (const reloc_halfword))))))
1641
1642% ------------------------------------------------------------
1643% this procedure was "inserted" to eliminate the problem with the compiler
1644% not generating the code for:
1645% function lambda y;remprop(y, 'internalentryoffset);
1646% who knows why it didn't generate anything reasonable - it generated nil.
1647(de remove-ieo-property (y)
1648  (remprop y 'internalentryoffset))
1649% ------------------------------------------------------------
1650
1651(de systemfaslfixup ()
1652  (prog (x)
1653     % THIS VERSION ASSUMES 32 bit RELATIVE ADDESSES, HM.
1654     (setq x (remainder currentoffset* 16))
1655     (while (greaterp x 0) (depositbyte 0) (setq x (sub1 x)))
1656     (while forwardinternalreferences*
1657       (setq x (get (cdr (first forwardinternalreferences*))
1658		    'internalentryoffset))
1659       (when (null x)
1660	      (errorprintf "***** %r not defined in this module, call incorrect"
1661			   (cdr (first forwardinternalreferences*))))
1662	       % calculate the offset
1663       (setq x (plus -4             % offset to next word
1664	     (difference x (car (first forwardinternalreferences*)))))
1665			 % insert the fixup
1666       (put_a_halfword
1667	    (iplus2 codebase* (car (first forwardinternalreferences*))) x)
1668       (setq forwardinternalreferences* (cdr forwardinternalreferences*)))
1669	      % Now remove the InternalEntry offsets from everyone
1670   (mapobl 'remove-ieo-property)))
1671
1672%-----------------------------------------------------------------
1673%
1674% Optimize:
1675%   1.  (prog (a b c d e) :
1676%    (mov (quote nil) (reg t1)) (mov (reg t1)(frame 1))
1677%    (mov (quote nil) (reg t1)  (mov (reg t1)(frame 2))
1678%        ......
1679%
1680%   2. (setq a b)(xxx   ... b )
1681%    (mov (reg 1) (frame 1)) (mov (frame 1) (reg 2))
1682
1683(de LapoptFrame(u)
1684    (prog (rcode instr op nextinstr src dest x)
1685       (while u
1686	(setq instr (pop u))
1687	(when (and (pairp instr) u)
1688		(setq op (car instr))
1689		(setq nextinstr (car u))
1690		    % pattern:
1691		    %    (mov (reg n) (frame m))
1692		    %    (mov (frame m) (reg k))
1693		(when (and (eq op 'MOV)
1694			   (regp (setq src (cadr instr)))
1695			   (not (sregp src))  % not for segment registers
1696			   (setq dest (caddr instr))
1697			   (eqcar nextinstr 'MOV)
1698			   (equal (cadr nextinstr)dest)
1699			   (regp (setq x (caddr nextinstr))))
1700		      (pop u)
1701		      (push (list 'mov src x) u))
1702		    % pattern:
1703		    %      (mov (quote nil) (frame 1))
1704		    %      (mov (quote nil) (frame 2)) ...
1705		(when (and
1706			(eq op 'mov)
1707			(immediatep (setq src (cadr instr)))
1708			(not (regp (caddr instr)))
1709			(eqcar nextinstr 'mov)
1710			(equal (cadr nextinstr) src))   % at 2 of that type
1711		      (setq u (LapoptFrame1 src (push instr u)))
1712		      (setq instr (list 'mov src '(reg t1))))
1713		    % pattern:
1714		    %      (push (quote nil) )
1715		    %      (push (quote nil) ) ...
1716		(when (and
1717			(eq op 'push)
1718			(immediatep (setq src (cadr instr)))
1719			(or (and
1720			       (eqcar nextinstr 'mov)
1721			       (equal (cadr nextinstr) src))
1722			    (and
1723			       (eqcar nextinstr 'push)
1724			       (equal (cadr nextinstr) src))))
1725		      (setq u (LapoptFrame1 src (push instr u)))
1726		      (setq instr (list 'mov src '(reg t1))))
1727
1728	)
1729	(push instr rcode))
1730       (return (reversip rcode))))
1731
1732(de LapoptFrame1 (src u)
1733     % here subsequent instructions are modified for source T1
1734   (cond ((or (null u) (atom (car u))) u)
1735	 ((and (eq (caar u) 'PUSH) (equal (cadr (car u)) src))
1736		(cons '(PUSH (reg t1)) (LapoptFrame1 src (cdr u))))
1737	 ((not (and (eq(caar u) 'MOV) % nor more such instr.
1738		    (equal (cadar u) src)))
1739	  u)
1740	 ((equal (caddr (car u)) '(reg t1))
1741		(LapoptFrame1 src (cdr u)))
1742	 (t  (cons
1743		(list 'mov '(reg t1) (caddr (car u)))
1744		(LapoptFrame1 src (cdr u))))))
1745
1746(fluid '(!*optimize-i486))
1747
1748(setq *optimize-i486 t)
1749
1750(de LapoptPeep(code)
1751   (when *optimize-i486 (setq code (LapoptPeep486 code)))
1752   code)
1753
1754(de LapoptPeep486(code)
1755% peephole optimizer for 486 code
1756% interchanging instructions for dependencies.
1757 (let (rcode i1 i2 i3 r rb)
1758  (while code
1759   (setq i1 (pop code))
1760   (when (and code (cdr code))
1761    (setq i2(car code) i3(cadr code))
1762    (cond
1763      % case
1764      %   something
1765      %   (add 16 (reg st))
1766      %   (ret)
1767      % move (add 16 (reg st)) one step up if possible.
1768     ((and (equal i3 '(ret))
1769	   (pairp i1)
1770	   (pairp i2)
1771	   (eq (car i2) 'add)
1772	   (equal (caddr i2) '(reg st))
1773	   (not (&jumpcontrol i1))
1774	   (not (&smember '(reg st) i1))
1775	)
1776
1777      (pop code r)
1778      (push i1 code)
1779      (setq i1 i2))
1780
1781      % case
1782      %   something
1783      %   (mov (reg x) ...)
1784      %   (yyy ... (displacement (reg x))
1785      % move (mov (reg x) ...)  one step up if independent
1786     ((and
1787       (pairp i1)
1788       (eqcar i2 'mov)
1789       (eqcar (setq r (caddr i2)) 'reg)
1790       (equal r (&indirectbase i3))
1791       (not (&jumpcontrol i1))
1792	    % test independence (target i2) and i1
1793       (not (&smember r i1))
1794	    % test independence (source i2) and (target i1)
1795       (setq rb (&regbase (cadr i2)))
1796       (or (and (eqcar i1 'mov) (not (&smember rb (caddr i1))) )
1797	   (not (&smember rb i1))
1798       )
1799      )
1800
1801      (pop code)
1802      (push i1 code)
1803      (setq i1 i2)
1804     )) % cond
1805    )% when
1806    (push i1 rcode)
1807   )% while
1808   (reversip rcode)
1809)) % let,de
1810
1811(de &smember(a l)
1812  (cond ((equal a l) t)
1813	((atom l) nil)
1814	((&smember a (car l)) t)
1815	(t (&smember a (cdr l))) ))
1816
1817(de &indirectbase(u)
1818  (cond ((atom u) nil)
1819	((atom (cdr u)) nil)
1820	((eq (car u) 'displacement)(cadr u))
1821	((eq (car u) 'indirect) (cadr u))
1822	(t (or (&indirectbase (car u))(&indirectbase (cdr u)))) ))
1823
1824(de &regbase(u)
1825  % u is an operand of *MOVE. Extract the source base.
1826     (cond((or (atom u)(eq (car u) 'quote)) t)
1827	  ((eq (car u) 'reg) u)
1828	  ((or (eq (car u) 'indirect) (eq(car u)'displacement))
1829	   (cadr u))
1830	  (t t)))
1831
1832(de &jumpcontrol(u)
1833    (or (atom u)
1834	 (LocalLabelp u)
1835	(GeneralBranchInstructionP (setq u (car u)))
1836	(eq u 'call)
1837	(eq u 'ret)
1838	(eq u '*entry)
1839   ))
1840
1841(de lapoptprint(l)
1842   (terpri)
1843   (prin2t " 486 tauschen:")
1844   (mapc l 'prin2t))
1845
1846(fluid '(!64bitregs upper64bitregs))
1847
1848(setq !64bitregs '((reg 1) (reg 2) (reg 3) (reg 4) (reg 5) (reg st)
1849                  (reg rax) (reg rbx) (reg rcx) (reg rdx)
1850                  (reg t3) (reg t4) (reg NIL) (reg heaplast)
1851                  (reg bndstkptr) (reg bndstkupperbound)
1852		  (reg bndstklowerbound)  (reg heaptrapbound)
1853                  (reg t1) (reg t2) (reg esp) (reg rdi) (reg rsi)
1854		  (reg r8) (reg r9) (reg r10) (reg r11)
1855		  (reg r12) (reg r13)(reg r14) (reg r15)
1856		  ))
1857
1858(setq upper64bitregs '(
1859                  (reg t3) (reg t4) (reg NIL) (reg heaplast)
1860                  (reg bndstkptr) (reg bndstkupperbound)
1861		  (reg bndstklowerbound)  (reg heaptrapbound)
1862		  (reg r8) (reg r9) (reg r10) (reg r11)
1863		  (reg r12) (reg r13)(reg r14) (reg r15)
1864		  (reg xmm8) (reg xmm9) (reg xmm10) (reg xmm11)
1865		  (reg xmm12) (reg xmm13) (reg xmm15) (reg xmm15)
1866		  ))
1867
1868
1869(de reg64bitP (i) (reg64bitp1 !64bitregs i))
1870
1871(de upperreg64p (i) (reg64bitp1 upper64bitregs i))
1872
1873(de reg64bitp1 (reglist inst)
1874   (if (null reglist) nil
1875     (if (&smember (car reglist) inst) t
1876        (reg64bitp1 (cdr reglist) inst))))
1877
1878
1879%---------------------------------------------------------------------
1880%  cmacro optimizer
1881%---------------------------------------------------------------------
1882
1883(de lapopt1 (u)
1884    (prog()
1885	(when (not *lapopt) (return u))
1886	(setq u (lapopt-move-special-cases u))
1887	(return u)))
1888
1889%  Move bodies of if-then-elseif ... sequences  such that at runtime as few
1890%  jumps as possible are taken.
1891
1892(fluid '(&cond-cm&))
1893
1894(setq &cond-cm&
1895 '((*jumpeq      . *jumpnoteq)       (*jumpnoteq      . *jumpeq)
1896   (*jumpwgeq    . *jumpwlessp)      (*jumpwlessp     . *jumpwgeq)
1897   (*jumpwleq    . *jumpwgreaterp)   (*jumpwgreaterp  . *jumpwleq)
1898   (*jumptype    . *jumpnottype)     (*jumpnottype    . *jumptype)
1899   (*jumpintype  . *jumpnotintype)   (*jumpnotintype  . *jumpintype)
1900   (*jumpeqtag   . *jumpnoteqtag)    (*jumpnoteqtag   . *jumpeqtag)
1901   (*jumpwgeqtag . *jumpwlessptag)   (*jumpwlessptag  . *jumpwgeqtag)
1902  % no inverse jumps for
1903   (*jumpwgreaterptag)
1904   (*jumpon)
1905
1906))
1907
1908(de lapopt-move-special-cases(code)
1909  (let (rcode bcode inst u w lbl lab)
1910   (while (not (lapopt-bottom code))
1911    (setq inst (pop code))
1912     (when
1913      (and
1914       (pairp inst)
1915       (setq u (atsoc (car inst) &cond-cm&))
1916       (setq u (cdr u))
1917       (setq lbl (cadr inst))
1918       (or (memq lbl code)(member (setq lbl (list '*lbl lbl)) code))
1919       (setq w (lapopt-move-special-cases1 code lbl))
1920      )
1921      (setq lab (list 'label (gensym)))
1922      (setq bcode (nconc bcode (cons (list '*lbl lab) (car w))))
1923      (push (cons u (cons lab (cddr inst))) rcode)
1924      (setq inst nil)
1925      (setq code (cdr w))
1926     )
1927     (when inst (push inst rcode))
1928    )
1929  (setq code (nconc (reversip rcode) (nconc bcode code)))
1930  (when *trlapopt
1931	 (prin2t "=== move special cases:")
1932	 (MAPCAR code 'PRINT))
1933  code
1934))
1935
1936(de lapopt-bottom(u)
1937 (or (null u)
1938     (eqcar (car u) 'fullword)))
1939
1940(de lapopt-move-special-cases1 (code lbl)
1941  % Find basic block until lbl which ends by an unconditional jump.
1942  % Return nil or pair of bblock and rest of code.
1943  (let (rcode fcode inst op)
1944   (while code
1945     (setq inst (pop code))
1946     (push inst rcode)
1947     (cond ((or (atom inst) (equal inst lbl)) (setq code nil))
1948	   ((and (memq (setq op (car inst))
1949		      '(*jump *linke *exit))
1950		 (cdr code)
1951		 (equal (car code) lbl)
1952	    )
1953	    (setq fcode code) (setq code nil))
1954	   ((or (eq op '*lbl) (atsoc op &cond-cm&))
1955	    (setq code nil)
1956	   )
1957   ))
1958   (when fcode (cons (reversip rcode) fcode))
1959))
1960( dskin "AMD64-inst.dat")
1961