1 /* -*-C-*-
2 
3 Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994,
4     1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
5     2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Massachusetts
6     Institute of Technology
7 
8 This file is part of MIT/GNU Scheme.
9 
10 MIT/GNU Scheme is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2 of the License, or (at
13 your option) any later version.
14 
15 MIT/GNU Scheme is distributed in the hope that it will be useful, but
16 WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 General Public License for more details.
19 
20 You should have received a copy of the GNU General Public License
21 along with MIT/GNU Scheme; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301,
23 USA.
24 
25 */
26 
27 /* Compiled code interface macros for Intel IA-32.  */
28 
29 #ifndef SCM_CMPINTMD_H_INCLUDED
30 #define SCM_CMPINTMD_H_INCLUDED 1
31 
32 /*
33 
34 Problems with the IA-32 instruction set architecture
35 ====================================================
36 
37 1. Code space is separate from data space.  The only way to obtain a
38    code space address is to do a CALL and use the return address on
39    the stack.
40 
41 Problem: References to the constants vector in compiled code.
42 
43 Fix: Just as on RISC machines.  Use CALL when necessary, and cache the
44    result in the assembly language.
45 
46 
47 2. Jumps are PC-relative.  There are absolute jumps, assuming the PC
48    is in a data location, or with immediate destinations that include
49    a segment descriptor (16 bits).  The short forms have a PC-relative
50    offset defined with respect to the immediately following
51    instruction.
52 
53 Problem: Closures and execute caches need their address in old space
54    in order to be relocated correctly.
55 
56 Fix:
57 
58 For execute caches we can define a new linker field, called
59 load-relocation-address which on every GC/relocation stores the new
60 address and the old contents into global variables and stores the new
61 address in the field.  Alternatively the difference between the new
62 address and the old contents can be stored into a single global
63 variable, and this can be used, together with the new address of each
64 cache, to find the old code.
65 
66 For closures the code that reads the header (manifest closure) can do
67 the same.
68 
69 
70 3. The stack pointer register (ESP) cannot be used as the base in
71    (base + displacement) addressing mode.
72 
73 Problem: Common operation in the compiler, which assumes direct access
74    to the stack.
75 
76 Fix: Use base + indexed mode, which allows specification of ESP as
77    base and nullification of the index (by using ESP again).  This is
78    one byte longer than otherwise, but...
79 
80 
81 Register assignments
82 ====================
83 
84 EAX (0)		Unassigned
85 ECX (1)		Unassigned
86 EDX (2)		Unassigned
87 EBX (3)		Unassigned
88 
89 ESP (4)		Stack Pointer
90 EBP (5)		Register Mask
91 ESI (6)		Pointer to register block, etc.
92 EDI (7)		Free Pointer
93 
94 The dynamic link and value "registers" are not processor registers.
95 Slots in the register array must be reserved for them.
96 
97 The Free Pointer is EDI because EDI is the implicit base register for
98 the memory-to-memory move instructions, and the string store
99 instruction.  Perhaps we can make use of it.
100 
101 The pointer to register block is not held in EBP (the processor's
102 "frame" register is typically used) because its most common use, (EBP)
103 (address syllable for memory memtop) takes more bytes than (ESI).
104 
105 Encodings and layout of various control features
106 ================================================
107 
108 Assumptions:
109 
110 The processor will be in 32-bit address and operand mode.  Thus
111 instructions use 32-bit operands, and displacements for addressing
112 modes and jump instructions are all 32 bits by default.
113 
114 	Offset		Contents		Encoding
115 
116 
117 - Execute cache entry encoding:
118 
119 		Before linking
120 
121 	0		16-bit arity	\
122 	2		0x00		  [TC_FIXNUM | arity]
123 entry	3		0x1A		/
124 	4		Symbol
125 	8		<next cache>
126 
127 		After linking
128 
129 	0		16-bit arity
130 	2		0x00
131 entry	3		JMP opcode		0x39
132 	4		32-bit offset
133 	8		<next cache>
134 
135 Arity stays in place because the IA-32 is a little-endian architecture.
136 
137 
138 - Closure entry encoding:
139 
140 entry	0		CALL opcode		0xE8
141 	1		32-bit offset
142 	5		<padding>		0x00
143 	6		<next entry or variables>
144 
145 
146 - Trampoline encoding:
147 
148 entry	0		MOV	AL,code		0xB0, code-byte
149 	2		CALL	n(ESI)		0xFF 0x96 n-longword
150 	8		<trampoline dependent storage>
151 
152 
153 - GC & interrupt check at procedure/continuation entry:
154 
155 gc_lab	-7		CALL	n(ESI)		0xFF 0x56 n-byte
156 	-4		<type/arity info>
157 	-2		<gc offset>
158 entry	0		CMP	EDI,(ESI)	0x39 0x3e
159 	2		JAE	gc_lab		0x73 -11
160 	4		<real code>
161 
162 
163 - GC & interrupt check at closure entry:
164 
165 gc_lab	-11		ADD	(ESP),&offset	0x83 0x04 0x24 offset-byte
166   	-7		JMP	n(ESI)		0xFF 0x66 n-byte
167 	-4		<type/arity info>
168 	-2		<gc offset>
169 entry	0		ADD	(ESP),&magic	0x81 0x04 0x24 magic-longword
170 	7		CMP	EDI,(ESI)	0x39 0x3e
171 	9		JAE	gc_lab		0x73 0xea (= -22)
172 	11		<real code>
173 
174 The magic value depends on the closure because of canonicalization.
175 
176 The ADD instruction at offset -11 is not present for the 0th closure
177 entry, since it is the canonical entry point.  Its format depends on
178 the value of offset, since the sign-extending forms often suffice.
179 
180 offset = entry_number * entry_size
181 magic = ([TC_COMPILED_ENTRY | 0] - (offset + length_of_CALL_instruction))
182 
183 */
184 
185 #define ASM_RESET_HOOK i386_reset_hook
186 
187 #define CMPINT_USE_STRUCS 1
188 
189 /* These next definitions must agree with "cmpauxmd/i386.m4", which is
190    where the register block is allocated.  */
191 #define COMPILER_REGBLOCK_N_FIXED 16
192 /* Big enough to hold 80-bit floating-point value: */
193 #define COMPILER_TEMP_SIZE 3
194 #define COMPILER_REGBLOCK_N_TEMPS 256
195 #define COMPILER_REGBLOCK_N_HOOKS 80
196 #define COMPILER_HOOK_SIZE 1
197 
198 #define COMPILER_REGBLOCK_EXTRA_SIZE					\
199   (COMPILER_REGBLOCK_N_HOOKS * COMPILER_HOOK_SIZE)
200 
201 #define REGBLOCK_ALLOCATED_BY_INTERFACE true
202 
203 typedef byte_t insn_t;
204 
205 /* Number of insn_t units preceding entry address in which header
206    (type and offset info) is stored.  */
207 #define CC_ENTRY_HEADER_SIZE (CC_ENTRY_TYPE_SIZE + CC_ENTRY_OFFSET_SIZE)
208 #define CC_ENTRY_TYPE_SIZE 2
209 #define CC_ENTRY_OFFSET_SIZE 2
210 
211 /* Number of insn_t units preceding entry header in which GC trap
212    instructions are stored.  */
213 #define CC_ENTRY_GC_TRAP_SIZE 3
214 
215 #define EMBEDDED_CLOSURE_ADDRS_P 1
216 
217 typedef struct
218 {
219   insn_t * old_addr;
220   insn_t * new_addr;
221 } reloc_ref_t;
222 
223 #define DECLARE_RELOCATION_REFERENCE(name) reloc_ref_t name
224 
225 #define START_CLOSURE_RELOCATION(scan, ref)				\
226   start_closure_relocation ((scan), (&ref))
227 
228 #define START_OPERATOR_RELOCATION(scan, ref) do				\
229 {									\
230   start_operator_relocation ((scan), (&ref));				\
231   (scan) += 1;								\
232 } while (false)
233 
234 #define OPERATOR_RELOCATION_OFFSET 1
235 
236 #define READ_COMPILED_CLOSURE_TARGET(a, r)				\
237   read_compiled_closure_target ((a), (&r))
238 
239 /* Size of execution cache in SCHEME_OBJECTS.  */
240 #define UUO_LINK_SIZE 2
241 
242 #define UUO_WORDS_TO_COUNT(nw) (((nw) - 1) / UUO_LINK_SIZE)
243 #define UUO_COUNT_TO_WORDS(nc) (((nc) * UUO_LINK_SIZE) + 1)
244 
245 #define READ_UUO_TARGET(a, r) read_uuo_target ((a), (&r))
246 
247 #define FLUSH_I_CACHE() IA32_CACHE_SYNCHRONIZE ()
248 #define FLUSH_I_CACHE_REGION(address, nwords) IA32_CACHE_SYNCHRONIZE ()
249 #define PUSH_D_CACHE_REGION(address, nwords) IA32_CACHE_SYNCHRONIZE ()
250 
251 #define IA32_CACHE_SYNCHRONIZE() do					\
252 {									\
253   if (ia32_cpuid_needed)						\
254     ia32_cache_synchronize ();						\
255 } while (false)
256 
257 #if defined(__OS2__) && (defined(__IBMC__) || defined(__WATCOMC__))
258 #  define ASM_ENTRY_POINT(name) (_System name)
259 #elif defined(__WIN32__) && defined(__WATCOMC__)
260 #  define ASM_ENTRY_POINT(name) (__cdecl name)
261 #else
262 #  define ASM_ENTRY_POINT(name) name
263 #endif
264 
265 extern int ASM_ENTRY_POINT (i386_interface_initialize) (void);
266 extern void ASM_ENTRY_POINT (within_c_stack) (void (*) (void *), void *);
267 
268 extern void asm_assignment_trap (void);
269 extern void asm_dont_serialize_cache (void);
270 extern void asm_error (void);
271 extern void asm_fixnum_shift (void);
272 extern void asm_generic_add (void);
273 extern void asm_generic_decrement (void);
274 extern void asm_generic_divide (void);
275 extern void asm_generic_equal (void);
276 extern void asm_generic_greater (void);
277 extern void asm_generic_increment (void);
278 extern void asm_generic_less (void);
279 extern void asm_generic_modulo (void);
280 extern void asm_generic_multiply (void);
281 extern void asm_generic_negative (void);
282 extern void asm_generic_positive (void);
283 extern void asm_generic_quotient (void);
284 extern void asm_generic_remainder (void);
285 extern void asm_generic_subtract (void);
286 extern void asm_generic_zero (void);
287 extern void asm_interrupt_closure (void);
288 extern void asm_interrupt_continuation (void);
289 extern void asm_interrupt_continuation_2 (void);
290 extern void asm_interrupt_dlink (void);
291 extern void asm_interrupt_procedure (void);
292 extern void asm_link (void);
293 extern void asm_nofp_add (void);
294 extern void asm_nofp_decrement (void);
295 extern void asm_nofp_divide (void);
296 extern void asm_nofp_equal (void);
297 extern void asm_nofp_greater (void);
298 extern void asm_nofp_increment (void);
299 extern void asm_nofp_less (void);
300 extern void asm_nofp_modulo (void);
301 extern void asm_nofp_multiply (void);
302 extern void asm_nofp_negative (void);
303 extern void asm_nofp_positive (void);
304 extern void asm_nofp_quotient (void);
305 extern void asm_nofp_remainder (void);
306 extern void asm_nofp_subtract (void);
307 extern void asm_nofp_zero (void);
308 extern void asm_primitive_apply (void);
309 extern void asm_primitive_error (void);
310 extern void asm_primitive_lexpr_apply (void);
311 extern void asm_reference_trap (void);
312 extern void asm_safe_reference_trap (void);
313 extern void asm_set_interrupt_enables (void);
314 extern void asm_sc_apply (void);
315 extern void asm_sc_apply_size_1 (void);
316 extern void asm_sc_apply_size_2 (void);
317 extern void asm_sc_apply_size_3 (void);
318 extern void asm_sc_apply_size_4 (void);
319 extern void asm_sc_apply_size_5 (void);
320 extern void asm_sc_apply_size_6 (void);
321 extern void asm_sc_apply_size_7 (void);
322 extern void asm_sc_apply_size_8 (void);
323 extern void asm_scheme_to_interface (void);
324 extern void asm_scheme_to_interface_call (void);
325 extern void asm_serialize_cache (void);
326 extern void asm_short_primitive_apply (void);
327 extern void asm_trampoline_to_interface (void);
328 
329 extern void ia32_cache_synchronize (void);
330 extern void start_closure_relocation (SCHEME_OBJECT *, reloc_ref_t *);
331 extern insn_t * read_compiled_closure_target (insn_t *, reloc_ref_t *);
332 extern void start_operator_relocation (SCHEME_OBJECT *, reloc_ref_t *);
333 extern insn_t * read_uuo_target (SCHEME_OBJECT *, reloc_ref_t *);
334 extern void i386_reset_hook (void);
335 
336 extern int ia32_cpuid_needed;
337 
338 #ifndef HAVE_FENV_H
339 #  define CMPINTMD_EMULATES_FENV
340 #  include "cmpintmd/x86-fenv.h"
341 #endif
342 
343 #endif /* !SCM_CMPINTMD_H_INCLUDED */
344