1 /*
2 Title: Machine dependent code for i386 and X64 under Windows and Unix
3
4 Copyright (c) 2000-7
5 Cambridge University Technical Services Limited
6
7 Further work copyright David C. J. Matthews 2011-20
8
9 This library is free software; you can redistribute it and/or
10 modify it under the terms of the GNU Lesser General Public
11 License version 2.1 as published by the Free Software Foundation.
12
13 This library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public
19 License along with this library; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21
22 */
23
24 #ifdef HAVE_CONFIG_H
25 #include "config.h"
26 #elif defined(_WIN32)
27 #include "winconfig.h"
28 #else
29 #error "No configuration file"
30 #endif
31
32 #ifdef HAVE_STDLIB_H
33 #include <stdlib.h>
34 #endif
35
36 #include <stdio.h>
37
38 #ifdef HAVE_SIGNAL_H
39 #include <signal.h>
40 #endif
41
42 #ifdef HAVE_ASSERT_H
43 #include <assert.h>
44 #define ASSERT(x) assert(x)
45 #else
46 #define ASSERT(x)
47 #endif
48
49 #ifdef HAVE_STRING_H
50 #include <string.h>
51 #endif
52
53 #ifdef HAVE_ERRNO_H
54 #include <errno.h>
55 #endif
56
57 #if (defined(_WIN32))
58 #include <windows.h>
59 #include <excpt.h>
60 #endif
61
62 #include "globals.h"
63 #include "run_time.h"
64 #include "diagnostics.h"
65 #include "processes.h"
66 #include "profiling.h"
67 #include "machine_dep.h"
68 #include "scanaddrs.h"
69 #include "memmgr.h"
70 #include "rtsentry.h"
71
72 #include "sys.h" // Temporary
73
74
75 /**********************************************************************
76 *
77 * Register usage:
78 *
79 * %Reax: First argument to function. Result of function call.
80 * %Rebx: Second argument to function.
81 * %Recx: General register
82 * %Redx: Closure pointer in call.
83 * %Rebp: Points to memory used for extra registers
84 * %Resi: General register.
85 * %Redi: General register.
86 * %Resp: Stack pointer.
87 * The following apply only on the X64
88 * %R8: Third argument to function
89 * %R9: Fourth argument to function
90 * %R10: Fifth argument to function
91 * %R11: General register
92 * %R12: General register
93 * %R13: General register
94 * %R14: General register
95 * %R15: Memory allocation pointer
96
97 *
98 **********************************************************************/
99
100 #ifdef HOSTARCHITECTURE_X86_64
101 struct fpSaveArea {
102 double fpregister[7]; // Save area for xmm0-6
103 };
104 #else
105 // Structure of floating point save area.
106 // This is dictated by the hardware.
107 typedef byte fpregister[10];
108
109 struct fpSaveArea {
110 unsigned short cw;
111 unsigned short _unused0;
112 unsigned short sw;
113 unsigned short _unused1;
114 unsigned short tw;
115 unsigned short _unused2;
116 unsigned fip;
117 unsigned short fcs0;
118 unsigned short _unused3;
119 unsigned foo;
120 unsigned short fcs1;
121 unsigned short _unused4;
122 fpregister registers[8];
123 };
124 #endif
125
126 /* the amount of ML stack space to reserve for registers,
127 C exception handling etc. The compiler requires us to
128 reserve 2 stack-frames worth (2 * 20 words). We actually reserve
129 slightly more than this.
130 */
131 #if (!defined(_WIN32) && !defined(HAVE_SIGALTSTACK))
132 // If we can't handle signals on a separate stack make sure there's space
133 // on the Poly stack.
134 #define OVERFLOW_STACK_SIZE (50+1024)
135 #else
136 #define OVERFLOW_STACK_SIZE 50
137 #endif
138
139 class X86TaskData;
140
141 // This is passed as the argument vector to X86AsmSwitchToPoly.
142 // The offsets are built into the assembly code and the code-generator.
143 // localMpointer and stackPtr are updated before control returns to C.
144 typedef struct _AssemblyArgs {
145 public:
146 PolyWord *localMpointer; // Allocation ptr + 1 word
147 stackItem *handlerRegister; // Current exception handler
148 PolyWord *localMbottom; // Base of memory + 1 word
149 stackItem *stackLimit; // Lower limit of stack
150 stackItem exceptionPacket; // Set if there is an exception
151 byte unusedRequestCode; // No longer used.
152 byte unusedFlag; // No longer used
153 byte returnReason; // Reason for returning from ML.
154 byte unusedRestore; // No longer used.
155 uintptr_t saveCStack; // Saved C stack frame.
156 PolyWord threadId; // My thread id. Saves having to call into RTS for it.
157 stackItem *stackPtr; // Current stack pointer
158 byte *noLongerUsed; // Now removed
159 byte *heapOverFlowCall; // These are filled in with the functions.
160 byte *stackOverFlowCall;
161 byte *stackOverFlowCallEx;
162 byte *trapHandlerEntry;
163 // Saved registers, where applicable.
164 stackItem p_rax;
165 stackItem p_rbx;
166 stackItem p_rcx;
167 stackItem p_rdx;
168 stackItem p_rsi;
169 stackItem p_rdi;
170 #ifdef HOSTARCHITECTURE_X86_64
171 stackItem p_r8;
172 stackItem p_r9;
173 stackItem p_r10;
174 stackItem p_r11;
175 stackItem p_r12;
176 stackItem p_r13;
177 stackItem p_r14;
178 #endif
179 struct fpSaveArea p_fp;
180 } AssemblyArgs;
181
182 // These next few are temporarily added for the interpreter
183 // This duplicates some code in reals.cpp but is now updated.
184 #define DOUBLESIZE (sizeof(double)/sizeof(POLYUNSIGNED))
185
186 union realdb { double dble; POLYUNSIGNED puns[DOUBLESIZE]; };
187
188 #define LGWORDSIZE (sizeof(uintptr_t) / sizeof(PolyWord))
189
190 class X86TaskData: public TaskData {
191 public:
192 X86TaskData();
193 unsigned allocReg; // The register to take the allocated space.
194 POLYUNSIGNED allocWords; // The words to allocate.
195 AssemblyArgs assemblyInterface;
196 int saveRegisterMask; // Registers that need to be updated by a GC.
197
198 virtual void GarbageCollect(ScanAddress *process);
199 void ScanStackAddress(ScanAddress *process, stackItem &val, StackSpace *stack);
200 virtual void EnterPolyCode(); // Start running ML
201 virtual void InterruptCode();
202 virtual bool AddTimeProfileCount(SIGNALCONTEXT *context);
203 virtual void InitStackFrame(TaskData *parentTask, Handle proc, Handle arg);
204 virtual void SetException(poly_exn *exc);
205
206 // Release a mutex in exactly the same way as compiler code
207 virtual Handle AtomicDecrement(Handle mutexp);
208 virtual void AtomicReset(Handle mutexp);
209
210 // Return the minimum space occupied by the stack. Used when setting a limit.
211 // N.B. This is PolyWords not native words.
currentStackSpace(void) const212 virtual uintptr_t currentStackSpace(void) const
213 { return (this->stack->top - (PolyWord*)assemblyInterface.stackPtr) +
214 OVERFLOW_STACK_SIZE*sizeof(uintptr_t)/sizeof(PolyWord); }
215
216 // Increment the profile count for an allocation. Also now used for mutex contention.
addProfileCount(POLYUNSIGNED words)217 virtual void addProfileCount(POLYUNSIGNED words)
218 { addSynchronousCount(assemblyInterface.stackPtr[0].codeAddr, words); }
219
220 // PreRTSCall: After calling from ML to the RTS we need to save the current heap pointer
PreRTSCall(void)221 virtual void PreRTSCall(void) { TaskData::PreRTSCall(); SaveMemRegisters(); }
222 // PostRTSCall: Before returning we need to restore the heap pointer.
223 // If there has been a GC in the RTS call we need to create a new heap area.
PostRTSCall(void)224 virtual void PostRTSCall(void) { SetMemRegisters(); TaskData::PostRTSCall(); }
225
226 virtual void CopyStackFrame(StackObject *old_stack, uintptr_t old_length, StackObject *new_stack, uintptr_t new_length);
227
228 void HeapOverflowTrap(byte *pcPtr);
229
230 void SetMemRegisters();
231 void SaveMemRegisters();
232 void SetRegisterMask();
233
234 void HandleTrap();
235
236 PLock interruptLock;
237
238 stackItem *get_reg(int n);
239
regSP()240 stackItem *®SP() { return assemblyInterface.stackPtr; }
241
regAX()242 stackItem ®AX() { return assemblyInterface.p_rax; }
regBX()243 stackItem ®BX() { return assemblyInterface.p_rbx; }
regCX()244 stackItem ®CX() { return assemblyInterface.p_rcx; }
regDX()245 stackItem ®DX() { return assemblyInterface.p_rdx; }
regSI()246 stackItem ®SI() { return assemblyInterface.p_rsi; }
regDI()247 stackItem ®DI() { return assemblyInterface.p_rdi; }
248 #ifdef HOSTARCHITECTURE_X86_64
reg8()249 stackItem ®8() { return assemblyInterface.p_r8; }
reg9()250 stackItem ®9() { return assemblyInterface.p_r9; }
reg10()251 stackItem ®10() { return assemblyInterface.p_r10; }
reg11()252 stackItem ®11() { return assemblyInterface.p_r11; }
reg12()253 stackItem ®12() { return assemblyInterface.p_r12; }
reg13()254 stackItem ®13() { return assemblyInterface.p_r13; }
reg14()255 stackItem ®14() { return assemblyInterface.p_r14; }
256 #endif
257
258 #if (defined(_WIN32))
259 DWORD savedErrno;
260 #else
261 int savedErrno;
262 #endif
263 };
264
265 class X86Dependent: public MachineDependent {
266 public:
X86Dependent()267 X86Dependent() {}
268
269 // Create a task data object.
CreateTaskData(void)270 virtual TaskData *CreateTaskData(void) { return new X86TaskData(); }
271
272 // Initial size of stack in PolyWords
InitialStackSize(void)273 virtual unsigned InitialStackSize(void) { return (128+OVERFLOW_STACK_SIZE) * sizeof(uintptr_t) / sizeof(PolyWord); }
274 virtual void ScanConstantsWithinCode(PolyObject *addr, PolyObject *oldAddr, POLYUNSIGNED length, ScanAddress *process);
275
MachineArchitecture(void)276 virtual Architectures MachineArchitecture(void)
277 #ifndef HOSTARCHITECTURE_X86_64
278 { return MA_I386; }
279 #elif defined(POLYML32IN64)
280 { return MA_X86_64_32; }
281 #else
282 { return MA_X86_64; }
283 #endif
284 };
285
286 // Values for the returnReason byte
287 enum RETURN_REASON {
288 RETURN_HEAP_OVERFLOW = 1,
289 RETURN_STACK_OVERFLOW = 2,
290 RETURN_STACK_OVERFLOWEX = 3,
291 };
292
293 extern "C" {
294
295 // These are declared in the assembly code segment.
296 void X86AsmSwitchToPoly(void *);
297 extern int X86AsmCallExtraRETURN_HEAP_OVERFLOW(void);
298 extern int X86AsmCallExtraRETURN_STACK_OVERFLOW(void);
299 extern int X86AsmCallExtraRETURN_STACK_OVERFLOWEX(void);
300
301 POLYUNSIGNED X86AsmAtomicDecrement(PolyObject*);
302
303 void X86TrapHandler(PolyWord threadId);
304 };
305
X86TaskData()306 X86TaskData::X86TaskData(): allocReg(0), allocWords(0), saveRegisterMask(0)
307 {
308 assemblyInterface.heapOverFlowCall = (byte*)X86AsmCallExtraRETURN_HEAP_OVERFLOW;
309 assemblyInterface.stackOverFlowCall = (byte*)X86AsmCallExtraRETURN_STACK_OVERFLOW;
310 assemblyInterface.stackOverFlowCallEx = (byte*)X86AsmCallExtraRETURN_STACK_OVERFLOWEX;
311 assemblyInterface.trapHandlerEntry = (byte*)X86TrapHandler;
312 savedErrno = 0;
313 }
314
GarbageCollect(ScanAddress * process)315 void X86TaskData::GarbageCollect(ScanAddress *process)
316 {
317 TaskData::GarbageCollect(process); // Process the parent first
318 assemblyInterface.threadId = threadObject;
319
320 if (stack != 0)
321 {
322 ASSERT(assemblyInterface.stackPtr >= (stackItem*)stack->bottom && assemblyInterface.stackPtr <= (stackItem*)stack->top);
323 // Now the values on the stack.
324 for (stackItem *q = assemblyInterface.stackPtr; q < (stackItem*)stack->top; q++)
325 ScanStackAddress(process, *q, stack);
326 }
327 // Register mask
328 for (int i = 0; i < 16; i++)
329 {
330 if (saveRegisterMask & (1 << i))
331 ScanStackAddress(process, *get_reg(i), stack);
332 }
333 }
334
335 // Process a value within the stack.
ScanStackAddress(ScanAddress * process,stackItem & stackItem,StackSpace * stack)336 void X86TaskData::ScanStackAddress(ScanAddress *process, stackItem &stackItem, StackSpace *stack)
337 {
338 // We may have return addresses on the stack which could look like
339 // tagged values. Check whether the value is in the code area before
340 // checking whether it is untagged.
341 #ifdef POLYML32IN64
342 // In 32-in-64 return addresses always have the top 32 bits non-zero.
343 if (stackItem.argValue < ((uintptr_t)1 << 32))
344 {
345 // It's either a tagged integer or an object pointer.
346 if (stackItem.w().IsDataPtr())
347 {
348 PolyWord val = process->ScanObjectAddress(stackItem.w().AsObjPtr());
349 stackItem = val;
350 }
351 }
352 else
353 {
354 // Could be a code address or a stack address.
355 MemSpace *space = gMem.SpaceForAddress(stackItem.codeAddr - 1);
356 if (space == 0 || space->spaceType != ST_CODE) return;
357 PolyObject *obj = gMem.FindCodeObject(stackItem.codeAddr);
358 ASSERT(obj != 0);
359 // Process the address of the start. Don't update anything.
360 process->ScanObjectAddress(obj);
361 }
362 #else
363 // The -1 here is because we may have a zero-sized cell in the last
364 // word of a space.
365 MemSpace *space = gMem.SpaceForAddress(stackItem.codeAddr-1);
366 if (space == 0) return; // In particular we may have one of the assembly code addresses.
367 if (space->spaceType == ST_CODE)
368 {
369 PolyObject *obj = gMem.FindCodeObject(stackItem.codeAddr);
370 // If it is actually an integer it might be outside a valid code object.
371 if (obj == 0)
372 {
373 ASSERT(stackItem.w().IsTagged()); // It must be an integer
374 }
375 else // Process the address of the start. Don't update anything.
376 process->ScanObjectAddress(obj);
377 }
378 else if (space->spaceType == ST_LOCAL && stackItem.w().IsDataPtr())
379 // Local values must be word addresses.
380 {
381 PolyWord val = process->ScanObjectAddress(stackItem.w().AsObjPtr());
382 stackItem = val;
383 }
384 #endif
385 }
386
387
388 // Copy a stack
CopyStackFrame(StackObject * old_stack,uintptr_t old_length,StackObject * new_stack,uintptr_t new_length)389 void X86TaskData::CopyStackFrame(StackObject *old_stack, uintptr_t old_length, StackObject *new_stack, uintptr_t new_length)
390 {
391 /* Moves a stack, updating all references within the stack */
392 #ifdef POLYML32IN64
393 old_length = old_length / 2;
394 new_length = new_length / 2;
395 #endif
396
397 stackItem *old_base = (stackItem *)old_stack;
398 stackItem *new_base = (stackItem*)new_stack;
399 stackItem *old_top = old_base + old_length;
400
401 /* Calculate the offset of the new stack from the old. If the frame is
402 being extended objects in the new frame will be further up the stack
403 than in the old one. */
404
405 uintptr_t offset = new_base - old_base + new_length - old_length;
406
407 stackItem *oldStackPtr = assemblyInterface.stackPtr;
408
409 // Adjust the stack pointer and handler pointer since these point into the stack.
410 assemblyInterface.stackPtr = assemblyInterface.stackPtr + offset;
411 assemblyInterface.handlerRegister = assemblyInterface.handlerRegister + offset;
412
413 // We need to adjust any values on the stack that are pointers within the stack.
414 // Skip the unused part of the stack.
415
416 size_t i = oldStackPtr - old_base;
417
418 ASSERT (i <= old_length);
419
420 i = old_length - i;
421
422 stackItem *old = oldStackPtr;
423 stackItem *newp = assemblyInterface.stackPtr;
424
425 while (i--)
426 {
427 stackItem old_word = *old++;
428 if (old_word.w().IsDataPtr() && old_word.stackAddr >= old_base && old_word.stackAddr <= old_top)
429 old_word.stackAddr = old_word.stackAddr + offset;
430 else if (old_word.w().IsDataPtr() && IsHeapAddress(old_word.stackAddr))
431 {
432 stackItem *addr = (stackItem*)old_word.w().AsStackAddr();
433 if (addr >= old_base && addr <= old_top)
434 {
435 addr += offset;
436 old_word = PolyWord::FromStackAddr((PolyWord*)addr);
437 }
438 }
439 *newp++ = old_word;
440 }
441 ASSERT(old == ((stackItem*)old_stack)+old_length);
442 ASSERT(newp == ((stackItem*)new_stack)+new_length);
443 // And change any registers that pointed into the old stack
444 for (int j = 0; j < 16; j++)
445 {
446 if (saveRegisterMask & (1 << j))
447 {
448 stackItem *regAddr = get_reg(j);
449 stackItem old_word = *regAddr;
450 if (old_word.w().IsDataPtr() && old_word.stackAddr >= old_base && old_word.stackAddr <= old_top)
451 old_word.stackAddr = old_word.stackAddr + offset;
452 else if (old_word.w().IsDataPtr() && IsHeapAddress(old_word.stackAddr))
453 {
454 stackItem *addr = (stackItem*)old_word.w().AsStackAddr();
455 if (addr >= old_base && addr <= old_top)
456 {
457 addr += offset;
458 old_word = PolyWord::FromStackAddr((PolyWord*)addr);
459 }
460 }
461 *regAddr = old_word;
462 }
463 }
464 }
465
EnterPolyCode()466 void X86TaskData::EnterPolyCode()
467 /* Called from "main" to enter the code. */
468 {
469 SetMemRegisters();
470 // Enter the ML code.
471 X86AsmSwitchToPoly(&this->assemblyInterface);
472 // This should never return
473 ASSERT(0);
474 }
475
476 // Called from the assembly code as a result of a trap i.e. a request for
477 // a GC or to extend the stack.
X86TrapHandler(PolyWord threadId)478 void X86TrapHandler(PolyWord threadId)
479 {
480 X86TaskData* taskData = (X86TaskData*)TaskData::FindTaskForId(threadId);
481 taskData->HandleTrap();
482 }
483
HandleTrap()484 void X86TaskData::HandleTrap()
485 {
486 SaveMemRegisters(); // Update globals from the memory registers.
487
488 switch (this->assemblyInterface.returnReason)
489 {
490
491 case RETURN_HEAP_OVERFLOW:
492 // The heap has overflowed.
493 SetRegisterMask();
494 this->HeapOverflowTrap(assemblyInterface.stackPtr[0].codeAddr); // Computes a value for allocWords only
495 break;
496
497 case RETURN_STACK_OVERFLOW:
498 case RETURN_STACK_OVERFLOWEX:
499 {
500 SetRegisterMask();
501 uintptr_t min_size; // Size in PolyWords
502 if (assemblyInterface.returnReason == RETURN_STACK_OVERFLOW)
503 {
504 min_size = (this->stack->top - (PolyWord*)assemblyInterface.stackPtr) +
505 OVERFLOW_STACK_SIZE * sizeof(uintptr_t) / sizeof(PolyWord);
506 }
507 else
508 {
509 // Stack limit overflow. If the required stack space is larger than
510 // the fixed overflow size the code will calculate the limit in %EDI.
511 stackItem* stackP = regDI().stackAddr;
512 min_size = (this->stack->top - (PolyWord*)stackP) +
513 OVERFLOW_STACK_SIZE * sizeof(uintptr_t) / sizeof(PolyWord);
514 }
515 try {
516 // The stack check has failed. This may either be because we really have
517 // overflowed the stack or because the stack limit value has been adjusted
518 // to result in a call here.
519 CheckAndGrowStack(this, min_size);
520 }
521 catch (IOException&) {
522 // We may get an exception while handling this if we run out of store
523 }
524 {
525 PLocker l(&interruptLock);
526 // Set the stack limit. This clears any interrupt and also sets the
527 // correct value if we've grown the stack.
528 this->assemblyInterface.stackLimit = (stackItem*)this->stack->bottom + OVERFLOW_STACK_SIZE;
529 }
530 // We're in a safe state to handle any interrupts.
531 try {
532 // Process any asynchronous events i.e. interrupts or kill
533 processes->ProcessAsynchRequests(this);
534 // Release and re-acquire use of the ML memory to allow another thread to GC.
535 processes->ThreadReleaseMLMemory(this);
536 processes->ThreadUseMLMemory(this);
537 }
538 catch (IOException&) {
539 // If this resulted in an ML exception it will also raise a C++ exception.
540 }
541 catch (KillException&) {
542 processes->ThreadExit(this);
543 }
544 break;
545 }
546
547 default:
548 Crash("Unknown return reason code %u", this->assemblyInterface.returnReason);
549 }
550 SetMemRegisters();
551 }
552
InitStackFrame(TaskData * parentTaskData,Handle proc,Handle arg)553 void X86TaskData::InitStackFrame(TaskData *parentTaskData, Handle proc, Handle arg)
554 /* Initialise stack frame. */
555 {
556 StackSpace *space = this->stack;
557 StackObject * newStack = space->stack();
558 uintptr_t stack_size = space->spaceSize() * sizeof(PolyWord) / sizeof(stackItem);
559 // Set the top of the stack inside the stack rather than at the end. This wastes
560 // a word but if sp is actually at the end OpenBSD segfaults because it isn't in
561 // a MAP_STACK area.
562 uintptr_t topStack = stack_size - 1;
563 stackItem* stackTop = (stackItem*)newStack + topStack;
564 *stackTop = TAGGED(0); // Set it to non-zero.
565 assemblyInterface.stackPtr = stackTop;
566 assemblyInterface.stackLimit = (stackItem*)space->bottom + OVERFLOW_STACK_SIZE;
567 assemblyInterface.handlerRegister = stackTop;
568
569 // Floating point save area.
570 memset(&assemblyInterface.p_fp, 0, sizeof(struct fpSaveArea));
571 #ifndef HOSTARCHITECTURE_X86_64
572 // Set the control word for 64-bit precision otherwise we get inconsistent results.
573 assemblyInterface.p_fp.cw = 0x027f ; // Control word
574 assemblyInterface.p_fp.tw = 0xffff; // Tag registers - all unused
575 #endif
576 // Store the argument and the closure.
577 assemblyInterface.p_rdx = proc->Word(); // Closure
578 assemblyInterface.p_rax = (arg == 0) ? TAGGED(0) : DEREFWORD(arg); // Argument
579 // Have to set the register mask in case we get a GC before the thread starts.
580 saveRegisterMask = (1 << 2) | 1; // Rdx and rax
581
582 #ifdef POLYML32IN64
583 // In 32-in-64 RBX always contains the heap base address.
584 assemblyInterface.p_rbx.stackAddr = (stackItem*)globalHeapBase;
585 #endif
586 }
587
588 // In Solaris-x86 the registers are named EIP and ESP.
589 #if (!defined(REG_EIP) && defined(EIP))
590 #define REG_EIP EIP
591 #endif
592 #if (!defined(REG_ESP) && defined(ESP))
593 #define REG_ESP ESP
594 #endif
595
596
597 // Get the PC and SP(stack) from a signal context. This is needed for profiling.
598 // This version gets the actual sp and pc if we are in ML.
599 // N.B. This must not call malloc since we're in a signal handler.
AddTimeProfileCount(SIGNALCONTEXT * context)600 bool X86TaskData::AddTimeProfileCount(SIGNALCONTEXT *context)
601 {
602 stackItem * sp = 0;
603 POLYCODEPTR pc = 0;
604 if (context != 0)
605 {
606 // The tests for HAVE_UCONTEXT_T, HAVE_STRUCT_SIGCONTEXT and HAVE_WINDOWS_H need
607 // to follow the tests in processes.h.
608 #if defined(HAVE_WINDOWS_H)
609 #ifdef _WIN64
610 sp = (stackItem *)context->Rsp;
611 pc = (POLYCODEPTR)context->Rip;
612 #else
613 // Windows 32 including cygwin.
614 sp = (stackItem *)context->Esp;
615 pc = (POLYCODEPTR)context->Eip;
616 #endif
617 #elif defined(HAVE_UCONTEXT_T)
618 #ifdef HAVE_MCONTEXT_T_GREGS
619 // Linux
620 #ifndef HOSTARCHITECTURE_X86_64
621 pc = (byte*)context->uc_mcontext.gregs[REG_EIP];
622 sp = (stackItem*)context->uc_mcontext.gregs[REG_ESP];
623 #else /* HOSTARCHITECTURE_X86_64 */
624 pc = (byte*)context->uc_mcontext.gregs[REG_RIP];
625 sp = (stackItem*)context->uc_mcontext.gregs[REG_RSP];
626 #endif /* HOSTARCHITECTURE_X86_64 */
627 #elif defined(HAVE_MCONTEXT_T_MC_ESP)
628 // FreeBSD
629 #ifndef HOSTARCHITECTURE_X86_64
630 pc = (byte*)context->uc_mcontext.mc_eip;
631 sp = (stackItem*)context->uc_mcontext.mc_esp;
632 #else /* HOSTARCHITECTURE_X86_64 */
633 pc = (byte*)context->uc_mcontext.mc_rip;
634 sp = (stackItem*)context->uc_mcontext.mc_rsp;
635 #endif /* HOSTARCHITECTURE_X86_64 */
636 #else
637 // Mac OS X
638 #ifndef HOSTARCHITECTURE_X86_64
639 #if(defined(HAVE_STRUCT_MCONTEXT_SS)||defined(HAVE_STRUCT___DARWIN_MCONTEXT32_SS))
640 pc = (byte*)context->uc_mcontext->ss.eip;
641 sp = (stackItem*)context->uc_mcontext->ss.esp;
642 #elif(defined(HAVE_STRUCT___DARWIN_MCONTEXT32___SS))
643 pc = (byte*)context->uc_mcontext->__ss.__eip;
644 sp = (stackItem*)context->uc_mcontext->__ss.__esp;
645 #endif
646 #else /* HOSTARCHITECTURE_X86_64 */
647 #if(defined(HAVE_STRUCT_MCONTEXT_SS)||defined(HAVE_STRUCT___DARWIN_MCONTEXT64_SS))
648 pc = (byte*)context->uc_mcontext->ss.rip;
649 sp = (stackItem*)context->uc_mcontext->ss.rsp;
650 #elif(defined(HAVE_STRUCT___DARWIN_MCONTEXT64___SS))
651 pc = (byte*)context->uc_mcontext->__ss.__rip;
652 sp = (stackItem*)context->uc_mcontext->__ss.__rsp;
653 #endif
654 #endif /* HOSTARCHITECTURE_X86_64 */
655 #endif
656 #elif defined(HAVE_STRUCT_SIGCONTEXT)
657 #if defined(HOSTARCHITECTURE_X86_64) && defined(__OpenBSD__)
658 // CPP defines missing in amd64/signal.h in OpenBSD
659 pc = (byte*)context->sc_rip;
660 sp = (stackItem*)context->sc_rsp;
661 #else // !HOSTARCHITEXTURE_X86_64 || !defined(__OpenBSD__)
662 pc = (byte*)context->sc_pc;
663 sp = (stackItem*)context->sc_sp;
664 #endif
665 #endif
666 }
667 if (pc != 0)
668 {
669 // See if the PC we've got is an ML code address.
670 MemSpace *space = gMem.SpaceForAddress(pc);
671 if (space != 0 && (space->spaceType == ST_CODE || space->spaceType == ST_PERMANENT))
672 {
673 incrementCountAsynch(pc);
674 return true;
675 }
676 }
677 // See if the sp value is in the current stack.
678 if (sp >= (stackItem*)this->stack->bottom && sp < (stackItem*)this->stack->top)
679 {
680 // We may be in the assembly code. The top of the stack will be a return address.
681 pc = sp[0].w().AsCodePtr();
682 MemSpace *space = gMem.SpaceForAddress(pc);
683 if (space != 0 && (space->spaceType == ST_CODE || space->spaceType == ST_PERMANENT))
684 {
685 incrementCountAsynch(pc);
686 return true;
687 }
688 }
689 // See if the value of regSP is a valid stack pointer.
690 // This works if we happen to be in an RTS call using a "Full" call.
691 // It doesn't work if we've used a "Fast" call because that doesn't save the SP.
692 sp = assemblyInterface.stackPtr;
693 if (sp >= (stackItem*)this->stack->bottom && sp < (stackItem*)this->stack->top)
694 {
695 // We may be in the run-time system.
696 pc = sp[0].w().AsCodePtr();
697 MemSpace *space = gMem.SpaceForAddress(pc);
698 if (space != 0 && (space->spaceType == ST_CODE || space->spaceType == ST_PERMANENT))
699 {
700 incrementCountAsynch(pc);
701 return true;
702 }
703 }
704 // None of those worked
705 return false;
706 }
707
708 // This is called from a different thread so we have to be careful.
InterruptCode()709 void X86TaskData::InterruptCode()
710 {
711 PLocker l(&interruptLock);
712 // Set the stack limit pointer to the top of the stack to cause
713 // a trap when we next check for stack overflow.
714 // We use a lock here to ensure that we always use the current value of the
715 // stack. The thread we're interrupting could be growing the stack at this point.
716 if (this->stack != 0)
717 this->assemblyInterface.stackLimit = (stackItem*)(this->stack->top-1);
718 }
719
720 // This is called from SwitchToPoly before we enter the ML code.
SetMemRegisters()721 void X86TaskData::SetMemRegisters()
722 {
723 // Copy the current store limits into variables before we go into the assembly code.
724
725 // If we haven't yet set the allocation area or we don't have enough we need
726 // to create one (or a new one).
727 if (this->allocPointer <= this->allocLimit + this->allocWords)
728 {
729 if (this->allocPointer < this->allocLimit)
730 Crash ("Bad length in heap overflow trap");
731
732 // Find some space to allocate in. Updates taskData->allocPointer and
733 // returns a pointer to the newly allocated space (if allocWords != 0)
734 PolyWord *space =
735 processes->FindAllocationSpace(this, this->allocWords, true);
736 if (space == 0)
737 {
738 // We will now raise an exception instead of returning.
739 // Set allocWords to zero so we don't set the allocation register
740 // since that could be holding the exception packet.
741 this->allocWords = 0;
742 }
743 // Undo the allocation just now.
744 this->allocPointer += this->allocWords;
745 }
746
747 if (this->allocWords != 0)
748 {
749 // If we have had a heap trap we actually do the allocation here.
750 // We will have already garbage collected and recovered sufficient space.
751 // This also happens if we have just trapped because of store profiling.
752 this->allocPointer -= this->allocWords; // Now allocate
753 // Set the allocation register to this area. N.B. This is an absolute address.
754 if (this->allocReg < 15)
755 get_reg(this->allocReg)[0].codeAddr = (POLYCODEPTR)(this->allocPointer + 1); /* remember: it's off-by-one */
756 this->allocWords = 0;
757 }
758
759 // If we have run out of store, either just above or while allocating in the RTS,
760 // allocPointer and allocLimit will have been set to zero as part of the GC. We will
761 // now be raising an exception which may free some store but we need to come back here
762 // before we allocate anything. The compiled code uses unsigned arithmetic to check for
763 // heap overflow but only after subtracting the space required. We need to make sure
764 // that the values are still non-negative after substracting any object size.
765 if (this->allocPointer == 0) this->allocPointer += MAX_OBJECT_SIZE;
766 if (this->allocLimit == 0) this->allocLimit += MAX_OBJECT_SIZE;
767
768 this->assemblyInterface.localMbottom = this->allocLimit + 1;
769 this->assemblyInterface.localMpointer = this->allocPointer + 1;
770 // If we are profiling store allocation we set mem_hl so that a trap
771 // will be generated.
772 if (profileMode == kProfileStoreAllocation)
773 this->assemblyInterface.localMbottom = this->assemblyInterface.localMpointer;
774
775 this->assemblyInterface.threadId = this->threadObject;
776 }
777
778 // This is called whenever we have returned from ML to C.
SaveMemRegisters()779 void X86TaskData::SaveMemRegisters()
780 {
781 this->allocPointer = this->assemblyInterface.localMpointer - 1;
782 this->allocWords = 0;
783 this->assemblyInterface.exceptionPacket = TAGGED(0);
784 this->saveRegisterMask = 0;
785 }
786
787 // Called on a GC or stack overflow trap. The register mask
788 // is in the bytes after the trap call.
SetRegisterMask()789 void X86TaskData::SetRegisterMask()
790 {
791 byte *pc = assemblyInterface.stackPtr[0].codeAddr;
792 if (*pc == 0xcd) // CD - INT n is used for a single byte
793 {
794 pc++;
795 saveRegisterMask = *pc++;
796 }
797 else if (*pc == 0xca) // CA - FAR RETURN is used for a two byte mask
798 {
799 pc++;
800 saveRegisterMask = pc[0] | (pc[1] << 8);
801 pc += 2;
802 }
803 assemblyInterface.stackPtr[0].codeAddr = pc;
804 }
805
get_reg(int n)806 stackItem *X86TaskData::get_reg(int n)
807 /* Returns a pointer to the register given by n. */
808 {
809 switch (n)
810 {
811 case 0: return &assemblyInterface.p_rax;
812 case 1: return &assemblyInterface.p_rcx;
813 case 2: return &assemblyInterface.p_rdx;
814 case 3: return &assemblyInterface.p_rbx;
815 // Should not have rsp or rbp.
816 case 6: return &assemblyInterface.p_rsi;
817 case 7: return &assemblyInterface.p_rdi;
818 #ifdef HOSTARCHITECTURE_X86_64
819 case 8: return &assemblyInterface.p_r8;
820 case 9: return &assemblyInterface.p_r9;
821 case 10: return &assemblyInterface.p_r10;
822 case 11: return &assemblyInterface.p_r11;
823 case 12: return &assemblyInterface.p_r12;
824 case 13: return &assemblyInterface.p_r13;
825 case 14: return &assemblyInterface.p_r14;
826 // R15 is the heap pointer so shouldn't occur here.
827 #endif /* HOSTARCHITECTURE_X86_64 */
828 default: Crash("Unknown register %d\n", n);
829 }
830 }
831
832 // Called as a result of a heap overflow trap
HeapOverflowTrap(byte * pcPtr)833 void X86TaskData::HeapOverflowTrap(byte *pcPtr)
834 {
835 X86TaskData *mdTask = this;
836 POLYUNSIGNED wordsNeeded = 0;
837 // The next instruction, after any branches round forwarding pointers or pop
838 // instructions, will be a store of register containing the adjusted heap pointer.
839 // We need to find that register and the value in it in order to find out how big
840 // the area we actually wanted is. N.B. The code-generator and assembly code
841 // must generate the correct instruction sequence.
842 // byte *pcPtr = assemblyInterface.programCtr;
843 while (true)
844 {
845 if (pcPtr[0] == 0xeb)
846 {
847 // Forwarding pointer
848 if (pcPtr[1] >= 128) pcPtr += 256 - pcPtr[1] + 2;
849 else pcPtr += pcPtr[1] + 2;
850 }
851 else if ((pcPtr[0] & 0xf8) == 0x58) // Pop instruction.
852 pcPtr++;
853 else if (pcPtr[0] == 0x41 && ((pcPtr[1] & 0xf8) == 0x58)) // Pop with Rex prefix
854 pcPtr += 2;
855 else break;
856 }
857 #ifndef HOSTARCHITECTURE_X86_64
858 // This should be movl REG,0[%ebp].
859 ASSERT(pcPtr[0] == 0x89);
860 mdTask->allocReg = (pcPtr[1] >> 3) & 7; // Remember this until we allocate the memory
861 stackItem *reg = get_reg(mdTask->allocReg);
862 stackItem reg_val = *reg;
863 // The space we need is the difference between this register
864 // and the current value of newptr.
865 // The +1 here is because assemblyInterface.localMpointer is A.M.pointer +1. The reason
866 // is that after the allocation we have the register pointing at the address we will
867 // actually use.
868 wordsNeeded = (this->allocPointer - (PolyWord*)reg_val.stackAddr) + 1;
869 *reg = TAGGED(0); // Clear this - it's not a valid address.
870 /* length in words, including length word */
871
872 ASSERT (wordsNeeded <= (1<<24)); /* Max object size including length/flag word is 2^24 words. */
873 #else /* HOSTARCHITECTURE_X86_64 */
874 ASSERT(pcPtr[1] == 0x89 || pcPtr[1] == 0x8b);
875 if (pcPtr[1] == 0x89)
876 {
877 // New (5.4) format. This should be movq REG,%r15
878 ASSERT(pcPtr[0] == 0x49 || pcPtr[0] == 0x4d);
879 mdTask->allocReg = (pcPtr[2] >> 3) & 7; // Remember this until we allocate the memory
880 if (pcPtr[0] & 0x4) mdTask->allocReg += 8;
881 }
882 else
883 {
884 // Alternative form of movq REG,%r15
885 ASSERT(pcPtr[0] == 0x4c || pcPtr[0] == 0x4d);
886 mdTask->allocReg = pcPtr[2] & 7; // Remember this until we allocate the memory
887 if (pcPtr[0] & 0x1) mdTask->allocReg += 8;
888 }
889 stackItem *reg = get_reg(this->allocReg);
890 stackItem reg_val = *reg;
891 wordsNeeded = (POLYUNSIGNED)((this->allocPointer - (PolyWord*)reg_val.stackAddr) + 1);
892 *reg = TAGGED(0); // Clear this - it's not a valid address.
893 #endif /* HOSTARCHITECTURE_X86_64 */
894 if (profileMode == kProfileStoreAllocation)
895 addProfileCount(wordsNeeded);
896
897 mdTask->allocWords = wordsNeeded; // The actual allocation is done in SetMemRegisters.
898 }
899
SetException(poly_exn * exc)900 void X86TaskData::SetException(poly_exn *exc)
901 // The RTS wants to raise an exception packet. Normally this is as the
902 // result of an RTS call in which case the caller will check this. It can
903 // also happen in a trap.
904 {
905 assemblyInterface.exceptionPacket = (PolyWord)exc; // Set for direct calls.
906 }
907
908 // Decode and process an effective address. There may
909 // be a constant address in here but in any case we need
910 // to decode it to work out where the next instruction starts.
911 // If this is an lea instruction any addresses are just constants
912 // so must not be treated as addresses.
skipea(PolyObject * base,byte ** pt,ScanAddress * process,bool lea)913 static void skipea(PolyObject *base, byte **pt, ScanAddress *process, bool lea)
914 {
915 unsigned int modrm = *((*pt)++);
916 unsigned int md = modrm >> 6;
917 unsigned int rm = modrm & 7;
918
919 if (md == 3) { } /* Register. */
920 else if (rm == 4)
921 {
922 /* s-i-b present. */
923 unsigned int sib = *((*pt)++);
924
925 if (md == 0)
926 {
927 if ((sib & 7) == 5)
928 {
929 if (! lea) {
930 #ifndef HOSTARCHITECTURE_X86_64
931 process->ScanConstant(base, *pt, PROCESS_RELOC_DIRECT);
932 #endif /* HOSTARCHITECTURE_X86_64 */
933 }
934 (*pt) += 4;
935 }
936 }
937 else if (md == 1) (*pt)++;
938 else if (md == 2) (*pt) += 4;
939 }
940 else if (md == 0 && rm == 5)
941 {
942 if (!lea) {
943 #ifndef HOSTARCHITECTURE_X86_64
944 /* Absolute address. */
945 process->ScanConstant(base, *pt, PROCESS_RELOC_DIRECT);
946 #endif /* HOSTARCHITECTURE_X86_64 */
947 }
948 *pt += 4;
949 }
950 else
951 {
952 if (md == 1) *pt += 1;
953 else if (md == 2) *pt += 4;
954 }
955 }
956
957 /* Added to deal with constants within the
958 code rather than in the constant area. The constant
959 area is still needed for the function name.
960 DCJM 2/1/2001
961 */
ScanConstantsWithinCode(PolyObject * addr,PolyObject * old,POLYUNSIGNED length,ScanAddress * process)962 void X86Dependent::ScanConstantsWithinCode(PolyObject *addr, PolyObject *old, POLYUNSIGNED length, ScanAddress *process)
963 {
964 byte *pt = (byte*)addr;
965 PolyWord *end = addr->Offset(length - 1);
966 #ifdef POLYML32IN64
967 // If this begins with enter-int it's interpreted code - ignore
968 if (pt[0] == 0xff && pt[1] == 0x55 && pt[2] == 0x48) return;
969 #endif
970
971 while (true)
972 {
973 // Escape prefixes come before any Rex byte
974 if (*pt == 0xf2 || *pt == 0xf3 || *pt == 0x66)
975 pt++;
976 #ifdef HOSTARCHITECTURE_X86_64
977 // REX prefixes. Set this first.
978 byte lastRex;
979 if (*pt >= 0x40 && *pt <= 0x4f)
980 lastRex = *pt++;
981 else
982 lastRex = 0;
983
984 //printf("pt=%p *pt=%x\n", pt, *pt);
985
986 #endif /* HOSTARCHITECTURE_X86_64 */
987 switch (*pt)
988 {
989 case 0x00: return; // This is actually the first byte of the old "marker" word.
990 case 0xf4: return; // Halt - now used as a marker.
991 case 0x50: case 0x51: case 0x52: case 0x53:
992 case 0x54: case 0x55: case 0x56: case 0x57: /* Push */
993 case 0x58: case 0x59: case 0x5a: case 0x5b:
994 case 0x5c: case 0x5d: case 0x5e: case 0x5f: /* Pop */
995 case 0x90: /* nop */ case 0xc3: /* ret */
996 case 0xf9: /* stc */ case 0xce: /* into */
997 case 0xf0: /* lock. */ case 0xf3: /* rep/repe */
998 case 0xa4: case 0xa5: case 0xaa: case 0xab: /* movs/stos */
999 case 0xa6: /* cmpsb */ case 0x9e: /* sahf */ case 0x99: /* cqo/cdq */
1000 pt++; break;
1001
1002 case 0x70: case 0x71: case 0x72: case 0x73: case 0x74: case 0x75: case 0x76: case 0x77:
1003 case 0x78: case 0x79: case 0x7a: case 0x7b: case 0x7c: case 0x7d: case 0x7e: case 0x7f:
1004 case 0xeb:
1005 /* short jumps. */
1006 case 0xcd: /* INT - now used for a register mask */
1007 case 0xa8: /* TEST_ACC8 */
1008 case 0x6a: /* PUSH_8 */
1009 pt += 2; break;
1010
1011 case 0xc2: /* RET_16 */
1012 case 0xca: /* FAR RET 16 - used for a register mask */
1013 pt += 3; break;
1014
1015 case 0x8d: /* leal. */
1016 pt++; skipea(addr, &pt, process, true); break;
1017
1018 case 0x03: case 0x0b: case 0x13: case 0x1b:
1019 case 0x23: case 0x2b: case 0x33: case 0x3b: /* Add r,ea etc. */
1020 case 0x88: /* MOVB_R_A */ case 0x89: /* MOVL_R_A */
1021 case 0x8b: /* MOVL_A_R */
1022 case 0x62: /* BOUNDL */
1023 case 0xff: /* Group5 */
1024 case 0xd1: /* Group2_1_A */
1025 case 0x8f: /* POP_A */
1026 case 0xd3: /* Group2_CL_A */
1027 case 0x87: // XCHNG
1028 case 0x63: // MOVSXD
1029 pt++; skipea(addr, &pt, process, false); break;
1030
1031 case 0xf6: /* Group3_a */
1032 {
1033 int isTest = 0;
1034 pt++;
1035 /* The test instruction has an immediate operand. */
1036 if ((*pt & 0x38) == 0) isTest = 1;
1037 skipea(addr, &pt, process, false);
1038 if (isTest) pt++;
1039 break;
1040 }
1041
1042 case 0xf7: /* Group3_A */
1043 {
1044 int isTest = 0;
1045 pt++;
1046 /* The test instruction has an immediate operand. */
1047 if ((*pt & 0x38) == 0) isTest = 1;
1048 skipea(addr, &pt, process, false);
1049 if (isTest) pt += 4;
1050 break;
1051 }
1052
1053 case 0xc1: /* Group2_8_A */
1054 case 0xc6: /* MOVB_8_A */
1055 case 0x83: /* Group1_8_A */
1056 case 0x80: /* Group1_8_a */
1057 case 0x6b: // IMUL Ev,Ib
1058 pt++; skipea(addr, &pt, process, false); pt++; break;
1059
1060 case 0x69: // IMUL Ev,Iv
1061 pt++; skipea(addr, &pt, process, false); pt += 4; break;
1062
1063 case 0x81: /* Group1_32_A */
1064 {
1065 pt ++;
1066 #ifndef HOSTARCHITECTURE_X86_64
1067 unsigned opCode = *pt;
1068 #endif
1069 skipea(addr, &pt, process, false);
1070 // Only check the 32 bit constant if this is a comparison.
1071 // For other operations this may be untagged and shouldn't be an address.
1072 #ifndef HOSTARCHITECTURE_X86_64
1073 if ((opCode & 0x38) == 0x38)
1074 process->ScanConstant(addr, pt, PROCESS_RELOC_DIRECT);
1075 #endif
1076 pt += 4;
1077 break;
1078 }
1079
1080 case 0xe8: case 0xe9:
1081 // Long jump and call. These are used to call constant (known) functions
1082 // and also long jumps within the function.
1083 {
1084 pt++;
1085 POLYSIGNED disp = (pt[3] & 0x80) ? -1 : 0; // Set the sign just in case.
1086 for(unsigned i = 4; i > 0; i--)
1087 disp = (disp << 8) | pt[i-1];
1088 byte *absAddr = pt + disp + 4; // The address is relative to AFTER the constant
1089
1090 // If the new address is within the current piece of code we don't do anything
1091 if (absAddr >= (byte*)addr && absAddr < (byte*)end) {}
1092 else {
1093 #ifdef HOSTARCHITECTURE_X86_64
1094 ASSERT(sizeof(PolyWord) == 4); // Should only be used internally on x64
1095 #endif /* HOSTARCHITECTURE_X86_64 */
1096 if (addr != old)
1097 {
1098 // The old value of the displacement was relative to the old address before
1099 // we copied this code segment.
1100 // We have to correct it back to the original address.
1101 absAddr = absAddr - (byte*)addr + (byte*)old;
1102 // We have to correct the displacement for the new location and store
1103 // that away before we call ScanConstant.
1104 size_t newDisp = absAddr - pt - 4;
1105 byte* wr = gMem.SpaceForAddress(pt)->writeAble(pt);
1106 for (unsigned i = 0; i < 4; i++)
1107 {
1108 wr[i] = (byte)(newDisp & 0xff);
1109 newDisp >>= 8;
1110 }
1111 }
1112 process->ScanConstant(addr, pt, PROCESS_RELOC_I386RELATIVE);
1113 }
1114 pt += 4;
1115 break;
1116 }
1117
1118 case 0xc7:/* MOVL_32_A */
1119 {
1120 pt++;
1121 if ((*pt & 0xc0) == 0x40 /* Byte offset or sib present */ &&
1122 ((*pt & 7) != 4) /* But not sib present */ && pt[1] == 256-sizeof(PolyWord))
1123 {
1124 /* We may use a move instruction to set the length
1125 word on a new segment. We mustn't try to treat this as a constant. */
1126 pt += 6; /* Skip the modrm byte, the offset and the constant. */
1127 }
1128 else
1129 {
1130 skipea(addr, &pt, process, false);
1131 #ifndef HOSTARCHITECTURE_X86_64
1132 // This isn't used for addresses even in 32-in-64
1133 process->ScanConstant(addr, pt, PROCESS_RELOC_DIRECT);
1134 #endif /* HOSTARCHITECTURE_X86_64 */
1135 pt += 4;
1136 }
1137 break;
1138 }
1139
1140 case 0xb8: case 0xb9: case 0xba: case 0xbb:
1141 case 0xbc: case 0xbd: case 0xbe: case 0xbf: /* MOVL_32_64_R */
1142 pt ++;
1143 #ifdef HOSTARCHITECTURE_X86_64
1144 if ((lastRex & 8) == 0)
1145 pt += 4; // 32-bit mode on 64-bits
1146 else
1147 #endif /* HOSTARCHITECTURE_X86_64 */
1148 {
1149 // This is used in native 32-bit for constants and in
1150 // 32-in-64 for the special case of an absolute address.
1151 process->ScanConstant(addr, pt, PROCESS_RELOC_DIRECT);
1152 pt += sizeof(uintptr_t);
1153 }
1154 break;
1155
1156 case 0x68: /* PUSH_32 */
1157 pt ++;
1158 #if (!defined(HOSTARCHITECTURE_X86_64))
1159 process->ScanConstant(addr, pt, PROCESS_RELOC_DIRECT);
1160 #endif
1161 pt += 4;
1162 break;
1163
1164 case 0x0f: /* ESCAPE */
1165 {
1166 pt++;
1167 switch (*pt)
1168 {
1169 case 0xb6: /* movzl */
1170 case 0xb7: // movzw
1171 case 0xbe: // movsx
1172 case 0xbf: // movsx
1173 case 0xc1: /* xaddl */
1174 case 0xae: // ldmxcsr/stmxcsr
1175 case 0xaf: // imul
1176 case 0x40: case 0x41: case 0x42: case 0x43: case 0x44: case 0x45: case 0x46: case 0x47:
1177 case 0x48: case 0x49: case 0x4a: case 0x4b: case 0x4c: case 0x4d: case 0x4e: case 0x4f:
1178 // cmov
1179 pt++; skipea(addr, &pt, process, false); break;
1180
1181 case 0x80: case 0x81: case 0x82: case 0x83:
1182 case 0x84: case 0x85: case 0x86: case 0x87:
1183 case 0x88: case 0x89: case 0x8a: case 0x8b:
1184 case 0x8c: case 0x8d: case 0x8e: case 0x8f:
1185 /* Conditional branches with 32-bit displacement. */
1186 pt += 5; break;
1187
1188 case 0x90: case 0x91: case 0x92: case 0x93:
1189 case 0x94: case 0x95: case 0x96: case 0x97:
1190 case 0x98: case 0x99: case 0x9a: case 0x9b:
1191 case 0x9c: case 0x9d: case 0x9e: case 0x9f:
1192 /* SetCC. */
1193 pt++; skipea(addr, &pt, process, false); break;
1194
1195 // These are SSE2 instructions
1196 case 0x10: case 0x11: case 0x58: case 0x5c: case 0x59: case 0x5e:
1197 case 0x2e: case 0x2a: case 0x54: case 0x57: case 0x5a: case 0x6e:
1198 case 0x7e: case 0x2c: case 0x2d:
1199 pt++; skipea(addr, &pt, process, false); break;
1200
1201 case 0x73: // PSRLDQ - EA,imm
1202 pt++; skipea(addr, &pt, process, false); pt++; break;
1203
1204 default: Crash("Unknown opcode %d at %p\n", *pt, pt);
1205 }
1206 break;
1207 }
1208
1209 case 0xd8: case 0xd9: case 0xda: case 0xdb:
1210 case 0xdc: case 0xdd: case 0xde: case 0xdf: // Floating point escape instructions
1211 {
1212 pt++;
1213 if ((*pt & 0xe0) == 0xe0) pt++;
1214 else skipea(addr, &pt, process, false);
1215 break;
1216 }
1217
1218 default: Crash("Unknown opcode %d at %p\n", *pt, pt);
1219 }
1220 }
1221 }
1222
1223 // Increment the value contained in the first word of the mutex.
AtomicDecrement(Handle mutexp)1224 Handle X86TaskData::AtomicDecrement(Handle mutexp)
1225 {
1226 PolyObject *p = DEREFHANDLE(mutexp);
1227 POLYUNSIGNED result = X86AsmAtomicDecrement(p);
1228 return this->saveVec.push(PolyWord::FromUnsigned(result));
1229 }
1230
1231 // Release a mutex. Because the atomic increment and decrement
1232 // use the hardware LOCK prefix we can simply set this to zero.
AtomicReset(Handle mutexp)1233 void X86TaskData::AtomicReset(Handle mutexp)
1234 {
1235 DEREFHANDLE(mutexp)->Set(0, TAGGED(0));
1236 }
1237
1238 static X86Dependent x86Dependent;
1239
1240 MachineDependent *machineDependent = &x86Dependent;
1241
1242 extern "C" {
1243 POLYEXTERNALSYMBOL void *PolyX86GetThreadData();
1244 }
1245
1246 // Return the address of assembly data for the current thread. This is normally in
1247 // RBP except if we are in a callback.
PolyX86GetThreadData()1248 void *PolyX86GetThreadData()
1249 {
1250 // We should get the task data for the thread that is running this code.
1251 // If this thread has been created by the foreign code we will have to
1252 // create a new one here.
1253 TaskData* taskData = processes->GetTaskDataForThread();
1254 if (taskData == 0)
1255 {
1256 try {
1257 taskData = processes->CreateNewTaskData(0, 0, 0, TAGGED(0));
1258 }
1259 catch (std::bad_alloc&) {
1260 ::Exit("Unable to create thread data - insufficient memory");
1261 }
1262 catch (MemoryException&) {
1263 ::Exit("Unable to create thread data - insufficient memory");
1264 }
1265 }
1266 return &((X86TaskData*)taskData)->assemblyInterface;
1267 }
1268
1269 struct _entrypts machineSpecificEPT[] =
1270 {
1271 { "PolyX86GetThreadData", (polyRTSFunction)& PolyX86GetThreadData },
1272
1273 { NULL, NULL} // End of list.
1274 };
1275
1276