1 /* -*-C-*- 2 3 Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 4 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 5 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Massachusetts 6 Institute of Technology 7 8 This file is part of MIT/GNU Scheme. 9 10 MIT/GNU Scheme is free software; you can redistribute it and/or modify 11 it under the terms of the GNU General Public License as published by 12 the Free Software Foundation; either version 2 of the License, or (at 13 your option) any later version. 14 15 MIT/GNU Scheme is distributed in the hope that it will be useful, but 16 WITHOUT ANY WARRANTY; without even the implied warranty of 17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 General Public License for more details. 19 20 You should have received a copy of the GNU General Public License 21 along with MIT/GNU Scheme; if not, write to the Free Software 22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, 23 USA. 24 25 */ 26 27 /* Compiled code interface macros for Intel IA-32. */ 28 29 #ifndef SCM_CMPINTMD_H_INCLUDED 30 #define SCM_CMPINTMD_H_INCLUDED 1 31 32 /* 33 34 Problems with the IA-32 instruction set architecture 35 ==================================================== 36 37 1. Code space is separate from data space. The only way to obtain a 38 code space address is to do a CALL and use the return address on 39 the stack. 40 41 Problem: References to the constants vector in compiled code. 42 43 Fix: Just as on RISC machines. Use CALL when necessary, and cache the 44 result in the assembly language. 45 46 47 2. Jumps are PC-relative. There are absolute jumps, assuming the PC 48 is in a data location, or with immediate destinations that include 49 a segment descriptor (16 bits). The short forms have a PC-relative 50 offset defined with respect to the immediately following 51 instruction. 52 53 Problem: Closures and execute caches need their address in old space 54 in order to be relocated correctly. 55 56 Fix: 57 58 For execute caches we can define a new linker field, called 59 load-relocation-address which on every GC/relocation stores the new 60 address and the old contents into global variables and stores the new 61 address in the field. Alternatively the difference between the new 62 address and the old contents can be stored into a single global 63 variable, and this can be used, together with the new address of each 64 cache, to find the old code. 65 66 For closures the code that reads the header (manifest closure) can do 67 the same. 68 69 70 3. The stack pointer register (ESP) cannot be used as the base in 71 (base + displacement) addressing mode. 72 73 Problem: Common operation in the compiler, which assumes direct access 74 to the stack. 75 76 Fix: Use base + indexed mode, which allows specification of ESP as 77 base and nullification of the index (by using ESP again). This is 78 one byte longer than otherwise, but... 79 80 81 Register assignments 82 ==================== 83 84 EAX (0) Unassigned 85 ECX (1) Unassigned 86 EDX (2) Unassigned 87 EBX (3) Unassigned 88 89 ESP (4) Stack Pointer 90 EBP (5) Register Mask 91 ESI (6) Pointer to register block, etc. 92 EDI (7) Free Pointer 93 94 The dynamic link and value "registers" are not processor registers. 95 Slots in the register array must be reserved for them. 96 97 The Free Pointer is EDI because EDI is the implicit base register for 98 the memory-to-memory move instructions, and the string store 99 instruction. Perhaps we can make use of it. 100 101 The pointer to register block is not held in EBP (the processor's 102 "frame" register is typically used) because its most common use, (EBP) 103 (address syllable for memory memtop) takes more bytes than (ESI). 104 105 Encodings and layout of various control features 106 ================================================ 107 108 Assumptions: 109 110 The processor will be in 32-bit address and operand mode. Thus 111 instructions use 32-bit operands, and displacements for addressing 112 modes and jump instructions are all 32 bits by default. 113 114 Offset Contents Encoding 115 116 117 - Execute cache entry encoding: 118 119 Before linking 120 121 0 16-bit arity \ 122 2 0x00 [TC_FIXNUM | arity] 123 entry 3 0x1A / 124 4 Symbol 125 8 <next cache> 126 127 After linking 128 129 0 16-bit arity 130 2 0x00 131 entry 3 JMP opcode 0x39 132 4 32-bit offset 133 8 <next cache> 134 135 Arity stays in place because the IA-32 is a little-endian architecture. 136 137 138 - Closure entry encoding: 139 140 entry 0 CALL opcode 0xE8 141 1 32-bit offset 142 5 <padding> 0x00 143 6 <next entry or variables> 144 145 146 - Trampoline encoding: 147 148 entry 0 MOV AL,code 0xB0, code-byte 149 2 CALL n(ESI) 0xFF 0x96 n-longword 150 8 <trampoline dependent storage> 151 152 153 - GC & interrupt check at procedure/continuation entry: 154 155 gc_lab -7 CALL n(ESI) 0xFF 0x56 n-byte 156 -4 <type/arity info> 157 -2 <gc offset> 158 entry 0 CMP EDI,(ESI) 0x39 0x3e 159 2 JAE gc_lab 0x73 -11 160 4 <real code> 161 162 163 - GC & interrupt check at closure entry: 164 165 gc_lab -11 ADD (ESP),&offset 0x83 0x04 0x24 offset-byte 166 -7 JMP n(ESI) 0xFF 0x66 n-byte 167 -4 <type/arity info> 168 -2 <gc offset> 169 entry 0 ADD (ESP),&magic 0x81 0x04 0x24 magic-longword 170 7 CMP EDI,(ESI) 0x39 0x3e 171 9 JAE gc_lab 0x73 0xea (= -22) 172 11 <real code> 173 174 The magic value depends on the closure because of canonicalization. 175 176 The ADD instruction at offset -11 is not present for the 0th closure 177 entry, since it is the canonical entry point. Its format depends on 178 the value of offset, since the sign-extending forms often suffice. 179 180 offset = entry_number * entry_size 181 magic = ([TC_COMPILED_ENTRY | 0] - (offset + length_of_CALL_instruction)) 182 183 */ 184 185 #define ASM_RESET_HOOK i386_reset_hook 186 187 #define CMPINT_USE_STRUCS 1 188 189 /* These next definitions must agree with "cmpauxmd/i386.m4", which is 190 where the register block is allocated. */ 191 #define COMPILER_REGBLOCK_N_FIXED 16 192 /* Big enough to hold 80-bit floating-point value: */ 193 #define COMPILER_TEMP_SIZE 3 194 #define COMPILER_REGBLOCK_N_TEMPS 256 195 #define COMPILER_REGBLOCK_N_HOOKS 80 196 #define COMPILER_HOOK_SIZE 1 197 198 #define COMPILER_REGBLOCK_EXTRA_SIZE \ 199 (COMPILER_REGBLOCK_N_HOOKS * COMPILER_HOOK_SIZE) 200 201 #define REGBLOCK_ALLOCATED_BY_INTERFACE true 202 203 typedef byte_t insn_t; 204 205 /* Number of insn_t units preceding entry address in which header 206 (type and offset info) is stored. */ 207 #define CC_ENTRY_HEADER_SIZE (CC_ENTRY_TYPE_SIZE + CC_ENTRY_OFFSET_SIZE) 208 #define CC_ENTRY_TYPE_SIZE 2 209 #define CC_ENTRY_OFFSET_SIZE 2 210 211 /* Number of insn_t units preceding entry header in which GC trap 212 instructions are stored. */ 213 #define CC_ENTRY_GC_TRAP_SIZE 3 214 215 #define EMBEDDED_CLOSURE_ADDRS_P 1 216 217 typedef struct 218 { 219 insn_t * old_addr; 220 insn_t * new_addr; 221 } reloc_ref_t; 222 223 #define DECLARE_RELOCATION_REFERENCE(name) reloc_ref_t name 224 225 #define START_CLOSURE_RELOCATION(scan, ref) \ 226 start_closure_relocation ((scan), (&ref)) 227 228 #define START_OPERATOR_RELOCATION(scan, ref) do \ 229 { \ 230 start_operator_relocation ((scan), (&ref)); \ 231 (scan) += 1; \ 232 } while (false) 233 234 #define OPERATOR_RELOCATION_OFFSET 1 235 236 #define READ_COMPILED_CLOSURE_TARGET(a, r) \ 237 read_compiled_closure_target ((a), (&r)) 238 239 /* Size of execution cache in SCHEME_OBJECTS. */ 240 #define UUO_LINK_SIZE 2 241 242 #define UUO_WORDS_TO_COUNT(nw) (((nw) - 1) / UUO_LINK_SIZE) 243 #define UUO_COUNT_TO_WORDS(nc) (((nc) * UUO_LINK_SIZE) + 1) 244 245 #define READ_UUO_TARGET(a, r) read_uuo_target ((a), (&r)) 246 247 #define FLUSH_I_CACHE() IA32_CACHE_SYNCHRONIZE () 248 #define FLUSH_I_CACHE_REGION(address, nwords) IA32_CACHE_SYNCHRONIZE () 249 #define PUSH_D_CACHE_REGION(address, nwords) IA32_CACHE_SYNCHRONIZE () 250 251 #define IA32_CACHE_SYNCHRONIZE() do \ 252 { \ 253 if (ia32_cpuid_needed) \ 254 ia32_cache_synchronize (); \ 255 } while (false) 256 257 #if defined(__OS2__) && (defined(__IBMC__) || defined(__WATCOMC__)) 258 # define ASM_ENTRY_POINT(name) (_System name) 259 #elif defined(__WIN32__) && defined(__WATCOMC__) 260 # define ASM_ENTRY_POINT(name) (__cdecl name) 261 #else 262 # define ASM_ENTRY_POINT(name) name 263 #endif 264 265 extern int ASM_ENTRY_POINT (i386_interface_initialize) (void); 266 extern void ASM_ENTRY_POINT (within_c_stack) (void (*) (void *), void *); 267 268 extern void asm_assignment_trap (void); 269 extern void asm_dont_serialize_cache (void); 270 extern void asm_error (void); 271 extern void asm_fixnum_shift (void); 272 extern void asm_generic_add (void); 273 extern void asm_generic_decrement (void); 274 extern void asm_generic_divide (void); 275 extern void asm_generic_equal (void); 276 extern void asm_generic_greater (void); 277 extern void asm_generic_increment (void); 278 extern void asm_generic_less (void); 279 extern void asm_generic_modulo (void); 280 extern void asm_generic_multiply (void); 281 extern void asm_generic_negative (void); 282 extern void asm_generic_positive (void); 283 extern void asm_generic_quotient (void); 284 extern void asm_generic_remainder (void); 285 extern void asm_generic_subtract (void); 286 extern void asm_generic_zero (void); 287 extern void asm_interrupt_closure (void); 288 extern void asm_interrupt_continuation (void); 289 extern void asm_interrupt_continuation_2 (void); 290 extern void asm_interrupt_dlink (void); 291 extern void asm_interrupt_procedure (void); 292 extern void asm_link (void); 293 extern void asm_nofp_add (void); 294 extern void asm_nofp_decrement (void); 295 extern void asm_nofp_divide (void); 296 extern void asm_nofp_equal (void); 297 extern void asm_nofp_greater (void); 298 extern void asm_nofp_increment (void); 299 extern void asm_nofp_less (void); 300 extern void asm_nofp_modulo (void); 301 extern void asm_nofp_multiply (void); 302 extern void asm_nofp_negative (void); 303 extern void asm_nofp_positive (void); 304 extern void asm_nofp_quotient (void); 305 extern void asm_nofp_remainder (void); 306 extern void asm_nofp_subtract (void); 307 extern void asm_nofp_zero (void); 308 extern void asm_primitive_apply (void); 309 extern void asm_primitive_error (void); 310 extern void asm_primitive_lexpr_apply (void); 311 extern void asm_reference_trap (void); 312 extern void asm_safe_reference_trap (void); 313 extern void asm_set_interrupt_enables (void); 314 extern void asm_sc_apply (void); 315 extern void asm_sc_apply_size_1 (void); 316 extern void asm_sc_apply_size_2 (void); 317 extern void asm_sc_apply_size_3 (void); 318 extern void asm_sc_apply_size_4 (void); 319 extern void asm_sc_apply_size_5 (void); 320 extern void asm_sc_apply_size_6 (void); 321 extern void asm_sc_apply_size_7 (void); 322 extern void asm_sc_apply_size_8 (void); 323 extern void asm_scheme_to_interface (void); 324 extern void asm_scheme_to_interface_call (void); 325 extern void asm_serialize_cache (void); 326 extern void asm_short_primitive_apply (void); 327 extern void asm_trampoline_to_interface (void); 328 329 extern void ia32_cache_synchronize (void); 330 extern void start_closure_relocation (SCHEME_OBJECT *, reloc_ref_t *); 331 extern insn_t * read_compiled_closure_target (insn_t *, reloc_ref_t *); 332 extern void start_operator_relocation (SCHEME_OBJECT *, reloc_ref_t *); 333 extern insn_t * read_uuo_target (SCHEME_OBJECT *, reloc_ref_t *); 334 extern void i386_reset_hook (void); 335 336 extern int ia32_cpuid_needed; 337 338 #ifndef HAVE_FENV_H 339 # define CMPINTMD_EMULATES_FENV 340 # include "cmpintmd/x86-fenv.h" 341 #endif 342 343 #endif /* !SCM_CMPINTMD_H_INCLUDED */ 344