1 /*
2     Title:  Machine dependent code for i386 and X64 under Windows and Unix
3 
4     Copyright (c) 2000-7
5         Cambridge University Technical Services Limited
6 
7     Further work copyright David C. J. Matthews 2011-20
8 
9     This library is free software; you can redistribute it and/or
10     modify it under the terms of the GNU Lesser General Public
11     License version 2.1 as published by the Free Software Foundation.
12 
13     This library is distributed in the hope that it will be useful,
14     but WITHOUT ANY WARRANTY; without even the implied warranty of
15     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16     Lesser General Public License for more details.
17 
18     You should have received a copy of the GNU Lesser General Public
19     License along with this library; if not, write to the Free Software
20     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
21 
22 */
23 
24 #ifdef HAVE_CONFIG_H
25 #include "config.h"
26 #elif defined(_WIN32)
27 #include "winconfig.h"
28 #else
29 #error "No configuration file"
30 #endif
31 
32 #ifdef HAVE_STDLIB_H
33 #include <stdlib.h>
34 #endif
35 
36 #include <stdio.h>
37 
38 #ifdef HAVE_SIGNAL_H
39 #include <signal.h>
40 #endif
41 
42 #ifdef HAVE_ASSERT_H
43 #include <assert.h>
44 #define ASSERT(x)   assert(x)
45 #else
46 #define ASSERT(x)
47 #endif
48 
49 #ifdef HAVE_STRING_H
50 #include <string.h>
51 #endif
52 
53 #ifdef HAVE_ERRNO_H
54 #include <errno.h>
55 #endif
56 
57 #if (defined(_WIN32))
58 #include <windows.h>
59 #include <excpt.h>
60 #endif
61 
62 #include "globals.h"
63 #include "run_time.h"
64 #include "diagnostics.h"
65 #include "processes.h"
66 #include "profiling.h"
67 #include "machine_dep.h"
68 #include "scanaddrs.h"
69 #include "memmgr.h"
70 #include "rtsentry.h"
71 
72 #include "sys.h" // Temporary
73 
74 
75 /**********************************************************************
76  *
77  * Register usage:
78  *
79  *  %Reax: First argument to function.  Result of function call.
80  *  %Rebx: Second argument to function.
81  *  %Recx: General register
82  *  %Redx: Closure pointer in call.
83  *  %Rebp: Points to memory used for extra registers
84  *  %Resi: General register.
85  *  %Redi: General register.
86  *  %Resp: Stack pointer.
87  *  The following apply only on the X64
88  *  %R8:   Third argument to function
89  *  %R9:   Fourth argument to function
90  *  %R10:  Fifth argument to function
91  *  %R11:  General register
92  *  %R12:  General register
93  *  %R13:  General register
94  *  %R14:  General register
95  *  %R15:  Memory allocation pointer
96 
97  *
98  **********************************************************************/
99 
100 #ifdef HOSTARCHITECTURE_X86_64
101 struct fpSaveArea {
102     double fpregister[7]; // Save area for xmm0-6
103 };
104 #else
105 // Structure of floating point save area.
106 // This is dictated by the hardware.
107 typedef byte fpregister[10];
108 
109 struct fpSaveArea {
110     unsigned short cw;
111     unsigned short _unused0;
112     unsigned short sw;
113     unsigned short _unused1;
114     unsigned short tw;
115     unsigned short _unused2;
116     unsigned fip;
117     unsigned short fcs0;
118     unsigned short _unused3;
119     unsigned foo;
120     unsigned short fcs1;
121     unsigned short _unused4;
122     fpregister registers[8];
123 };
124 #endif
125 
126 /* the amount of ML stack space to reserve for registers,
127    C exception handling etc. The compiler requires us to
128    reserve 2 stack-frames worth (2 * 20 words). We actually reserve
129    slightly more than this.
130 */
131 #if (!defined(_WIN32) && !defined(HAVE_SIGALTSTACK))
132 // If we can't handle signals on a separate stack make sure there's space
133 // on the Poly stack.
134 #define OVERFLOW_STACK_SIZE (50+1024)
135 #else
136 #define OVERFLOW_STACK_SIZE 50
137 #endif
138 
139 class X86TaskData;
140 
141 // This is passed as the argument vector to X86AsmSwitchToPoly.
142 // The offsets are built into the assembly code and the code-generator.
143 // localMpointer and stackPtr are updated before control returns to C.
144 typedef struct _AssemblyArgs {
145 public:
146     PolyWord        *localMpointer;     // Allocation ptr + 1 word
147     stackItem       *handlerRegister;   // Current exception handler
148     PolyWord        *localMbottom;      // Base of memory + 1 word
149     stackItem       *stackLimit;        // Lower limit of stack
150     stackItem       exceptionPacket;    // Set if there is an exception
151     byte            unusedRequestCode;  // No longer used.
152     byte            unusedFlag;         // No longer used
153     byte            returnReason;       // Reason for returning from ML.
154     byte            unusedRestore;      // No longer used.
155     uintptr_t       saveCStack;         // Saved C stack frame.
156     PolyWord        threadId;           // My thread id.  Saves having to call into RTS for it.
157     stackItem       *stackPtr;          // Current stack pointer
158     byte            *noLongerUsed;      // Now removed
159     byte            *heapOverFlowCall;  // These are filled in with the functions.
160     byte            *stackOverFlowCall;
161     byte            *stackOverFlowCallEx;
162     byte            *trapHandlerEntry;
163     // Saved registers, where applicable.
164     stackItem       p_rax;
165     stackItem       p_rbx;
166     stackItem       p_rcx;
167     stackItem       p_rdx;
168     stackItem       p_rsi;
169     stackItem       p_rdi;
170 #ifdef HOSTARCHITECTURE_X86_64
171     stackItem       p_r8;
172     stackItem       p_r9;
173     stackItem       p_r10;
174     stackItem       p_r11;
175     stackItem       p_r12;
176     stackItem       p_r13;
177     stackItem       p_r14;
178 #endif
179     struct fpSaveArea p_fp;
180 } AssemblyArgs;
181 
182 // These next few are temporarily added for the interpreter
183 // This duplicates some code in reals.cpp but is now updated.
184 #define DOUBLESIZE (sizeof(double)/sizeof(POLYUNSIGNED))
185 
186 union realdb { double dble; POLYUNSIGNED puns[DOUBLESIZE]; };
187 
188 #define LGWORDSIZE (sizeof(uintptr_t) / sizeof(PolyWord))
189 
190 class X86TaskData: public TaskData {
191 public:
192     X86TaskData();
193     unsigned allocReg; // The register to take the allocated space.
194     POLYUNSIGNED allocWords; // The words to allocate.
195     AssemblyArgs assemblyInterface;
196     int saveRegisterMask; // Registers that need to be updated by a GC.
197 
198     virtual void GarbageCollect(ScanAddress *process);
199     void ScanStackAddress(ScanAddress *process, stackItem &val, StackSpace *stack);
200     virtual void EnterPolyCode(); // Start running ML
201     virtual void InterruptCode();
202     virtual bool AddTimeProfileCount(SIGNALCONTEXT *context);
203     virtual void InitStackFrame(TaskData *parentTask, Handle proc, Handle arg);
204     virtual void SetException(poly_exn *exc);
205 
206     // Release a mutex in exactly the same way as compiler code
207     virtual Handle AtomicDecrement(Handle mutexp);
208     virtual void AtomicReset(Handle mutexp);
209 
210     // Return the minimum space occupied by the stack.  Used when setting a limit.
211     // N.B. This is PolyWords not native words.
currentStackSpace(void) const212     virtual uintptr_t currentStackSpace(void) const
213         { return (this->stack->top - (PolyWord*)assemblyInterface.stackPtr) +
214             OVERFLOW_STACK_SIZE*sizeof(uintptr_t)/sizeof(PolyWord); }
215 
216     // Increment the profile count for an allocation.  Also now used for mutex contention.
addProfileCount(POLYUNSIGNED words)217     virtual void addProfileCount(POLYUNSIGNED words)
218     { addSynchronousCount(assemblyInterface.stackPtr[0].codeAddr, words); }
219 
220     // PreRTSCall: After calling from ML to the RTS we need to save the current heap pointer
PreRTSCall(void)221     virtual void PreRTSCall(void) { TaskData::PreRTSCall();  SaveMemRegisters(); }
222     // PostRTSCall: Before returning we need to restore the heap pointer.
223     // If there has been a GC in the RTS call we need to create a new heap area.
PostRTSCall(void)224     virtual void PostRTSCall(void) { SetMemRegisters(); TaskData::PostRTSCall();  }
225 
226     virtual void CopyStackFrame(StackObject *old_stack, uintptr_t old_length, StackObject *new_stack, uintptr_t new_length);
227 
228     void HeapOverflowTrap(byte *pcPtr);
229 
230     void SetMemRegisters();
231     void SaveMemRegisters();
232     void SetRegisterMask();
233 
234     void HandleTrap();
235 
236     PLock interruptLock;
237 
238     stackItem *get_reg(int n);
239 
regSP()240     stackItem *&regSP() { return assemblyInterface.stackPtr; }
241 
regAX()242     stackItem &regAX() { return assemblyInterface.p_rax; }
regBX()243     stackItem &regBX() { return assemblyInterface.p_rbx; }
regCX()244     stackItem &regCX() { return assemblyInterface.p_rcx; }
regDX()245     stackItem &regDX() { return assemblyInterface.p_rdx; }
regSI()246     stackItem &regSI() { return assemblyInterface.p_rsi; }
regDI()247     stackItem &regDI() { return assemblyInterface.p_rdi; }
248 #ifdef HOSTARCHITECTURE_X86_64
reg8()249     stackItem &reg8() { return assemblyInterface.p_r8; }
reg9()250     stackItem &reg9() { return assemblyInterface.p_r9; }
reg10()251     stackItem &reg10() { return assemblyInterface.p_r10; }
reg11()252     stackItem &reg11() { return assemblyInterface.p_r11; }
reg12()253     stackItem &reg12() { return assemblyInterface.p_r12; }
reg13()254     stackItem &reg13() { return assemblyInterface.p_r13; }
reg14()255     stackItem &reg14() { return assemblyInterface.p_r14; }
256 #endif
257 
258 #if (defined(_WIN32))
259     DWORD savedErrno;
260 #else
261     int savedErrno;
262 #endif
263 };
264 
265 class X86Dependent: public MachineDependent {
266 public:
X86Dependent()267     X86Dependent() {}
268 
269     // Create a task data object.
CreateTaskData(void)270     virtual TaskData *CreateTaskData(void) { return new X86TaskData(); }
271 
272     // Initial size of stack in PolyWords
InitialStackSize(void)273     virtual unsigned InitialStackSize(void) { return (128+OVERFLOW_STACK_SIZE) * sizeof(uintptr_t) / sizeof(PolyWord); }
274     virtual void ScanConstantsWithinCode(PolyObject *addr, PolyObject *oldAddr, POLYUNSIGNED length, ScanAddress *process);
275 
MachineArchitecture(void)276     virtual Architectures MachineArchitecture(void)
277 #ifndef HOSTARCHITECTURE_X86_64
278          { return MA_I386; }
279 #elif defined(POLYML32IN64)
280         { return MA_X86_64_32; }
281 #else
282          { return MA_X86_64; }
283 #endif
284 };
285 
286 // Values for the returnReason byte
287 enum RETURN_REASON {
288     RETURN_HEAP_OVERFLOW = 1,
289     RETURN_STACK_OVERFLOW = 2,
290     RETURN_STACK_OVERFLOWEX = 3,
291 };
292 
293 extern "C" {
294 
295     // These are declared in the assembly code segment.
296     void X86AsmSwitchToPoly(void *);
297     extern int X86AsmCallExtraRETURN_HEAP_OVERFLOW(void);
298     extern int X86AsmCallExtraRETURN_STACK_OVERFLOW(void);
299     extern int X86AsmCallExtraRETURN_STACK_OVERFLOWEX(void);
300 
301     POLYUNSIGNED X86AsmAtomicDecrement(PolyObject*);
302 
303     void X86TrapHandler(PolyWord threadId);
304 };
305 
X86TaskData()306 X86TaskData::X86TaskData(): allocReg(0), allocWords(0), saveRegisterMask(0)
307 {
308     assemblyInterface.heapOverFlowCall = (byte*)X86AsmCallExtraRETURN_HEAP_OVERFLOW;
309     assemblyInterface.stackOverFlowCall = (byte*)X86AsmCallExtraRETURN_STACK_OVERFLOW;
310     assemblyInterface.stackOverFlowCallEx = (byte*)X86AsmCallExtraRETURN_STACK_OVERFLOWEX;
311     assemblyInterface.trapHandlerEntry = (byte*)X86TrapHandler;
312     savedErrno = 0;
313 }
314 
GarbageCollect(ScanAddress * process)315 void X86TaskData::GarbageCollect(ScanAddress *process)
316 {
317     TaskData::GarbageCollect(process); // Process the parent first
318     assemblyInterface.threadId = threadObject;
319 
320     if (stack != 0)
321     {
322         ASSERT(assemblyInterface.stackPtr >= (stackItem*)stack->bottom && assemblyInterface.stackPtr <= (stackItem*)stack->top);
323         // Now the values on the stack.
324         for (stackItem *q = assemblyInterface.stackPtr; q < (stackItem*)stack->top; q++)
325             ScanStackAddress(process, *q, stack);
326     }
327     // Register mask
328     for (int i = 0; i < 16; i++)
329     {
330         if (saveRegisterMask & (1 << i))
331             ScanStackAddress(process, *get_reg(i), stack);
332     }
333 }
334 
335 // Process a value within the stack.
ScanStackAddress(ScanAddress * process,stackItem & stackItem,StackSpace * stack)336 void X86TaskData::ScanStackAddress(ScanAddress *process, stackItem &stackItem, StackSpace *stack)
337 {
338     // We may have return addresses on the stack which could look like
339     // tagged values.  Check whether the value is in the code area before
340     // checking whether it is untagged.
341 #ifdef POLYML32IN64
342     // In 32-in-64 return addresses always have the top 32 bits non-zero.
343     if (stackItem.argValue < ((uintptr_t)1 << 32))
344     {
345         // It's either a tagged integer or an object pointer.
346         if (stackItem.w().IsDataPtr())
347         {
348             PolyWord val = process->ScanObjectAddress(stackItem.w().AsObjPtr());
349             stackItem = val;
350         }
351     }
352     else
353     {
354         // Could be a code address or a stack address.
355         MemSpace *space = gMem.SpaceForAddress(stackItem.codeAddr - 1);
356         if (space == 0 || space->spaceType != ST_CODE) return;
357         PolyObject *obj = gMem.FindCodeObject(stackItem.codeAddr);
358         ASSERT(obj != 0);
359         // Process the address of the start.  Don't update anything.
360         process->ScanObjectAddress(obj);
361     }
362 #else
363     // The -1 here is because we may have a zero-sized cell in the last
364     // word of a space.
365     MemSpace *space = gMem.SpaceForAddress(stackItem.codeAddr-1);
366     if (space == 0) return; // In particular we may have one of the assembly code addresses.
367     if (space->spaceType == ST_CODE)
368     {
369         PolyObject *obj = gMem.FindCodeObject(stackItem.codeAddr);
370         // If it is actually an integer it might be outside a valid code object.
371         if (obj == 0)
372         {
373             ASSERT(stackItem.w().IsTagged()); // It must be an integer
374         }
375         else // Process the address of the start.  Don't update anything.
376             process->ScanObjectAddress(obj);
377     }
378     else if (space->spaceType == ST_LOCAL && stackItem.w().IsDataPtr())
379         // Local values must be word addresses.
380     {
381         PolyWord val = process->ScanObjectAddress(stackItem.w().AsObjPtr());
382         stackItem = val;
383     }
384 #endif
385 }
386 
387 
388 // Copy a stack
CopyStackFrame(StackObject * old_stack,uintptr_t old_length,StackObject * new_stack,uintptr_t new_length)389 void X86TaskData::CopyStackFrame(StackObject *old_stack, uintptr_t old_length, StackObject *new_stack, uintptr_t new_length)
390 {
391     /* Moves a stack, updating all references within the stack */
392 #ifdef POLYML32IN64
393     old_length = old_length / 2;
394     new_length = new_length / 2;
395 #endif
396 
397     stackItem *old_base  = (stackItem *)old_stack;
398     stackItem *new_base  = (stackItem*)new_stack;
399     stackItem *old_top   = old_base + old_length;
400 
401     /* Calculate the offset of the new stack from the old. If the frame is
402        being extended objects in the new frame will be further up the stack
403        than in the old one. */
404 
405     uintptr_t offset = new_base - old_base + new_length - old_length;
406 
407     stackItem *oldStackPtr = assemblyInterface.stackPtr;
408 
409     // Adjust the stack pointer and handler pointer since these point into the stack.
410     assemblyInterface.stackPtr = assemblyInterface.stackPtr + offset;
411     assemblyInterface.handlerRegister = assemblyInterface.handlerRegister + offset;
412 
413     // We need to adjust any values on the stack that are pointers within the stack.
414     // Skip the unused part of the stack.
415 
416     size_t i = oldStackPtr - old_base;
417 
418     ASSERT (i <= old_length);
419 
420     i = old_length - i;
421 
422     stackItem *old = oldStackPtr;
423     stackItem *newp = assemblyInterface.stackPtr;
424 
425     while (i--)
426     {
427         stackItem old_word = *old++;
428         if (old_word.w().IsDataPtr() && old_word.stackAddr >= old_base && old_word.stackAddr <= old_top)
429             old_word.stackAddr = old_word.stackAddr + offset;
430         else if (old_word.w().IsDataPtr() && IsHeapAddress(old_word.stackAddr))
431         {
432             stackItem *addr = (stackItem*)old_word.w().AsStackAddr();
433             if (addr >= old_base && addr <= old_top)
434             {
435                 addr += offset;
436                 old_word = PolyWord::FromStackAddr((PolyWord*)addr);
437             }
438         }
439         *newp++ = old_word;
440     }
441     ASSERT(old == ((stackItem*)old_stack)+old_length);
442     ASSERT(newp == ((stackItem*)new_stack)+new_length);
443     // And change any registers that pointed into the old stack
444     for (int j = 0; j < 16; j++)
445     {
446         if (saveRegisterMask & (1 << j))
447         {
448             stackItem *regAddr = get_reg(j);
449             stackItem old_word = *regAddr;
450             if (old_word.w().IsDataPtr() && old_word.stackAddr >= old_base && old_word.stackAddr <= old_top)
451                 old_word.stackAddr = old_word.stackAddr + offset;
452             else if (old_word.w().IsDataPtr() && IsHeapAddress(old_word.stackAddr))
453             {
454                 stackItem *addr = (stackItem*)old_word.w().AsStackAddr();
455                 if (addr >= old_base && addr <= old_top)
456                 {
457                     addr += offset;
458                     old_word = PolyWord::FromStackAddr((PolyWord*)addr);
459                 }
460             }
461             *regAddr = old_word;
462        }
463     }
464 }
465 
EnterPolyCode()466 void X86TaskData::EnterPolyCode()
467 /* Called from "main" to enter the code. */
468 {
469     SetMemRegisters();
470     // Enter the ML code.
471     X86AsmSwitchToPoly(&this->assemblyInterface);
472     // This should never return
473     ASSERT(0);
474  }
475 
476 // Called from the assembly code as a result of a trap i.e. a request for
477 // a GC or to extend the stack.
X86TrapHandler(PolyWord threadId)478 void X86TrapHandler(PolyWord threadId)
479 {
480     X86TaskData* taskData = (X86TaskData*)TaskData::FindTaskForId(threadId);
481     taskData->HandleTrap();
482 }
483 
HandleTrap()484 void X86TaskData::HandleTrap()
485 {
486     SaveMemRegisters(); // Update globals from the memory registers.
487 
488     switch (this->assemblyInterface.returnReason)
489     {
490 
491     case RETURN_HEAP_OVERFLOW:
492         // The heap has overflowed.
493         SetRegisterMask();
494         this->HeapOverflowTrap(assemblyInterface.stackPtr[0].codeAddr); // Computes a value for allocWords only
495         break;
496 
497     case RETURN_STACK_OVERFLOW:
498     case RETURN_STACK_OVERFLOWEX:
499     {
500         SetRegisterMask();
501         uintptr_t min_size; // Size in PolyWords
502         if (assemblyInterface.returnReason == RETURN_STACK_OVERFLOW)
503         {
504             min_size = (this->stack->top - (PolyWord*)assemblyInterface.stackPtr) +
505                 OVERFLOW_STACK_SIZE * sizeof(uintptr_t) / sizeof(PolyWord);
506         }
507         else
508         {
509             // Stack limit overflow.  If the required stack space is larger than
510             // the fixed overflow size the code will calculate the limit in %EDI.
511             stackItem* stackP = regDI().stackAddr;
512             min_size = (this->stack->top - (PolyWord*)stackP) +
513                 OVERFLOW_STACK_SIZE * sizeof(uintptr_t) / sizeof(PolyWord);
514         }
515         try {
516             // The stack check has failed.  This may either be because we really have
517             // overflowed the stack or because the stack limit value has been adjusted
518             // to result in a call here.
519             CheckAndGrowStack(this, min_size);
520         }
521         catch (IOException&) {
522             // We may get an exception while handling this if we run out of store
523         }
524         {
525             PLocker l(&interruptLock);
526             // Set the stack limit.  This clears any interrupt and also sets the
527             // correct value if we've grown the stack.
528             this->assemblyInterface.stackLimit = (stackItem*)this->stack->bottom + OVERFLOW_STACK_SIZE;
529         }
530         // We're in a safe state to handle any interrupts.
531         try {
532             // Process any asynchronous events i.e. interrupts or kill
533             processes->ProcessAsynchRequests(this);
534             // Release and re-acquire use of the ML memory to allow another thread to GC.
535             processes->ThreadReleaseMLMemory(this);
536             processes->ThreadUseMLMemory(this);
537         }
538         catch (IOException&) {
539             // If this resulted in an ML exception it will also raise a C++ exception.
540         }
541         catch (KillException&) {
542             processes->ThreadExit(this);
543         }
544         break;
545     }
546 
547     default:
548         Crash("Unknown return reason code %u", this->assemblyInterface.returnReason);
549     }
550     SetMemRegisters();
551 }
552 
InitStackFrame(TaskData * parentTaskData,Handle proc,Handle arg)553 void X86TaskData::InitStackFrame(TaskData *parentTaskData, Handle proc, Handle arg)
554 /* Initialise stack frame. */
555 {
556     StackSpace *space = this->stack;
557     StackObject * newStack = space->stack();
558     uintptr_t stack_size     = space->spaceSize() * sizeof(PolyWord) / sizeof(stackItem);
559     // Set the top of the stack inside the stack rather than at the end.  This wastes
560     // a word but if sp is actually at the end OpenBSD segfaults because it isn't in
561     // a MAP_STACK area.
562     uintptr_t topStack = stack_size - 1;
563     stackItem* stackTop = (stackItem*)newStack + topStack;
564     *stackTop = TAGGED(0); // Set it to non-zero.
565     assemblyInterface.stackPtr = stackTop;
566     assemblyInterface.stackLimit = (stackItem*)space->bottom + OVERFLOW_STACK_SIZE;
567     assemblyInterface.handlerRegister = stackTop;
568 
569     // Floating point save area.
570     memset(&assemblyInterface.p_fp, 0, sizeof(struct fpSaveArea));
571 #ifndef HOSTARCHITECTURE_X86_64
572     // Set the control word for 64-bit precision otherwise we get inconsistent results.
573     assemblyInterface.p_fp.cw = 0x027f ; // Control word
574     assemblyInterface.p_fp.tw = 0xffff; // Tag registers - all unused
575 #endif
576     // Store the argument and the closure.
577     assemblyInterface.p_rdx = proc->Word(); // Closure
578     assemblyInterface.p_rax = (arg == 0) ? TAGGED(0) : DEREFWORD(arg); // Argument
579     // Have to set the register mask in case we get a GC before the thread starts.
580     saveRegisterMask = (1 << 2) | 1; // Rdx and rax
581 
582 #ifdef POLYML32IN64
583     // In 32-in-64 RBX always contains the heap base address.
584     assemblyInterface.p_rbx.stackAddr = (stackItem*)globalHeapBase;
585 #endif
586 }
587 
588 // In Solaris-x86 the registers are named EIP and ESP.
589 #if (!defined(REG_EIP) && defined(EIP))
590 #define REG_EIP EIP
591 #endif
592 #if (!defined(REG_ESP) && defined(ESP))
593 #define REG_ESP ESP
594 #endif
595 
596 
597 // Get the PC and SP(stack) from a signal context.  This is needed for profiling.
598 // This version gets the actual sp and pc if we are in ML.
599 // N.B. This must not call malloc since we're in a signal handler.
AddTimeProfileCount(SIGNALCONTEXT * context)600 bool X86TaskData::AddTimeProfileCount(SIGNALCONTEXT *context)
601 {
602     stackItem * sp = 0;
603     POLYCODEPTR pc = 0;
604     if (context != 0)
605     {
606         // The tests for HAVE_UCONTEXT_T, HAVE_STRUCT_SIGCONTEXT and HAVE_WINDOWS_H need
607         // to follow the tests in processes.h.
608 #if defined(HAVE_WINDOWS_H)
609 #ifdef _WIN64
610         sp = (stackItem *)context->Rsp;
611         pc = (POLYCODEPTR)context->Rip;
612 #else
613         // Windows 32 including cygwin.
614         sp = (stackItem *)context->Esp;
615         pc = (POLYCODEPTR)context->Eip;
616 #endif
617 #elif defined(HAVE_UCONTEXT_T)
618 #ifdef HAVE_MCONTEXT_T_GREGS
619         // Linux
620 #ifndef HOSTARCHITECTURE_X86_64
621         pc = (byte*)context->uc_mcontext.gregs[REG_EIP];
622         sp = (stackItem*)context->uc_mcontext.gregs[REG_ESP];
623 #else /* HOSTARCHITECTURE_X86_64 */
624         pc = (byte*)context->uc_mcontext.gregs[REG_RIP];
625         sp = (stackItem*)context->uc_mcontext.gregs[REG_RSP];
626 #endif /* HOSTARCHITECTURE_X86_64 */
627 #elif defined(HAVE_MCONTEXT_T_MC_ESP)
628        // FreeBSD
629 #ifndef HOSTARCHITECTURE_X86_64
630         pc = (byte*)context->uc_mcontext.mc_eip;
631         sp = (stackItem*)context->uc_mcontext.mc_esp;
632 #else /* HOSTARCHITECTURE_X86_64 */
633         pc = (byte*)context->uc_mcontext.mc_rip;
634         sp = (stackItem*)context->uc_mcontext.mc_rsp;
635 #endif /* HOSTARCHITECTURE_X86_64 */
636 #else
637        // Mac OS X
638 #ifndef HOSTARCHITECTURE_X86_64
639 #if(defined(HAVE_STRUCT_MCONTEXT_SS)||defined(HAVE_STRUCT___DARWIN_MCONTEXT32_SS))
640         pc = (byte*)context->uc_mcontext->ss.eip;
641         sp = (stackItem*)context->uc_mcontext->ss.esp;
642 #elif(defined(HAVE_STRUCT___DARWIN_MCONTEXT32___SS))
643         pc = (byte*)context->uc_mcontext->__ss.__eip;
644         sp = (stackItem*)context->uc_mcontext->__ss.__esp;
645 #endif
646 #else /* HOSTARCHITECTURE_X86_64 */
647 #if(defined(HAVE_STRUCT_MCONTEXT_SS)||defined(HAVE_STRUCT___DARWIN_MCONTEXT64_SS))
648         pc = (byte*)context->uc_mcontext->ss.rip;
649         sp = (stackItem*)context->uc_mcontext->ss.rsp;
650 #elif(defined(HAVE_STRUCT___DARWIN_MCONTEXT64___SS))
651         pc = (byte*)context->uc_mcontext->__ss.__rip;
652         sp = (stackItem*)context->uc_mcontext->__ss.__rsp;
653 #endif
654 #endif /* HOSTARCHITECTURE_X86_64 */
655 #endif
656 #elif defined(HAVE_STRUCT_SIGCONTEXT)
657 #if defined(HOSTARCHITECTURE_X86_64) && defined(__OpenBSD__)
658         // CPP defines missing in amd64/signal.h in OpenBSD
659         pc = (byte*)context->sc_rip;
660         sp = (stackItem*)context->sc_rsp;
661 #else // !HOSTARCHITEXTURE_X86_64 || !defined(__OpenBSD__)
662         pc = (byte*)context->sc_pc;
663         sp = (stackItem*)context->sc_sp;
664 #endif
665 #endif
666     }
667     if (pc != 0)
668     {
669         // See if the PC we've got is an ML code address.
670         MemSpace *space = gMem.SpaceForAddress(pc);
671         if (space != 0 && (space->spaceType == ST_CODE || space->spaceType == ST_PERMANENT))
672         {
673             incrementCountAsynch(pc);
674             return true;
675         }
676     }
677     // See if the sp value is in the current stack.
678     if (sp >= (stackItem*)this->stack->bottom && sp < (stackItem*)this->stack->top)
679     {
680         // We may be in the assembly code.  The top of the stack will be a return address.
681         pc = sp[0].w().AsCodePtr();
682         MemSpace *space = gMem.SpaceForAddress(pc);
683         if (space != 0 && (space->spaceType == ST_CODE || space->spaceType == ST_PERMANENT))
684         {
685             incrementCountAsynch(pc);
686             return true;
687         }
688     }
689     // See if the value of regSP is a valid stack pointer.
690     // This works if we happen to be in an RTS call using a "Full" call.
691     // It doesn't work if we've used a "Fast" call because that doesn't save the SP.
692     sp = assemblyInterface.stackPtr;
693     if (sp >= (stackItem*)this->stack->bottom && sp < (stackItem*)this->stack->top)
694     {
695         // We may be in the run-time system.
696         pc = sp[0].w().AsCodePtr();
697         MemSpace *space = gMem.SpaceForAddress(pc);
698         if (space != 0 && (space->spaceType == ST_CODE || space->spaceType == ST_PERMANENT))
699         {
700             incrementCountAsynch(pc);
701             return true;
702         }
703     }
704     // None of those worked
705     return false;
706 }
707 
708 // This is called from a different thread so we have to be careful.
InterruptCode()709 void X86TaskData::InterruptCode()
710 {
711     PLocker l(&interruptLock);
712     // Set the stack limit pointer to the top of the stack to cause
713     // a trap when we next check for stack overflow.
714     // We use a lock here to ensure that we always use the current value of the
715     // stack.  The thread we're interrupting could be growing the stack at this point.
716     if (this->stack != 0)
717         this->assemblyInterface.stackLimit = (stackItem*)(this->stack->top-1);
718 }
719 
720 // This is called from SwitchToPoly before we enter the ML code.
SetMemRegisters()721 void X86TaskData::SetMemRegisters()
722 {
723     // Copy the current store limits into variables before we go into the assembly code.
724 
725     // If we haven't yet set the allocation area or we don't have enough we need
726     // to create one (or a new one).
727     if (this->allocPointer <= this->allocLimit + this->allocWords)
728     {
729         if (this->allocPointer < this->allocLimit)
730             Crash ("Bad length in heap overflow trap");
731 
732         // Find some space to allocate in.  Updates taskData->allocPointer and
733         // returns a pointer to the newly allocated space (if allocWords != 0)
734         PolyWord *space =
735             processes->FindAllocationSpace(this, this->allocWords, true);
736         if (space == 0)
737         {
738             // We will now raise an exception instead of returning.
739             // Set allocWords to zero so we don't set the allocation register
740             // since that could be holding the exception packet.
741             this->allocWords = 0;
742         }
743         // Undo the allocation just now.
744         this->allocPointer += this->allocWords;
745     }
746 
747     if (this->allocWords != 0)
748     {
749         // If we have had a heap trap we actually do the allocation here.
750         // We will have already garbage collected and recovered sufficient space.
751         // This also happens if we have just trapped because of store profiling.
752         this->allocPointer -= this->allocWords; // Now allocate
753         // Set the allocation register to this area. N.B.  This is an absolute address.
754         if (this->allocReg < 15)
755             get_reg(this->allocReg)[0].codeAddr = (POLYCODEPTR)(this->allocPointer + 1); /* remember: it's off-by-one */
756         this->allocWords = 0;
757     }
758 
759     // If we have run out of store, either just above or while allocating in the RTS,
760     // allocPointer and allocLimit will have been set to zero as part of the GC.  We will
761     // now be raising an exception which may free some store but we need to come back here
762     // before we allocate anything.  The compiled code uses unsigned arithmetic to check for
763     // heap overflow but only after subtracting the space required.  We need to make sure
764     // that the values are still non-negative after substracting any object size.
765     if (this->allocPointer == 0) this->allocPointer += MAX_OBJECT_SIZE;
766     if (this->allocLimit == 0) this->allocLimit += MAX_OBJECT_SIZE;
767 
768     this->assemblyInterface.localMbottom = this->allocLimit + 1;
769     this->assemblyInterface.localMpointer = this->allocPointer + 1;
770     // If we are profiling store allocation we set mem_hl so that a trap
771     // will be generated.
772     if (profileMode == kProfileStoreAllocation)
773         this->assemblyInterface.localMbottom = this->assemblyInterface.localMpointer;
774 
775     this->assemblyInterface.threadId = this->threadObject;
776 }
777 
778 // This is called whenever we have returned from ML to C.
SaveMemRegisters()779 void X86TaskData::SaveMemRegisters()
780 {
781     this->allocPointer = this->assemblyInterface.localMpointer - 1;
782     this->allocWords = 0;
783     this->assemblyInterface.exceptionPacket = TAGGED(0);
784     this->saveRegisterMask = 0;
785 }
786 
787 // Called on a GC or stack overflow trap.  The register mask
788 // is in the bytes after the trap call.
SetRegisterMask()789 void X86TaskData::SetRegisterMask()
790 {
791     byte *pc = assemblyInterface.stackPtr[0].codeAddr;
792     if (*pc == 0xcd) // CD - INT n is used for a single byte
793     {
794         pc++;
795         saveRegisterMask = *pc++;
796     }
797     else if (*pc == 0xca) // CA - FAR RETURN is used for a two byte mask
798     {
799         pc++;
800         saveRegisterMask = pc[0] | (pc[1] << 8);
801         pc += 2;
802     }
803     assemblyInterface.stackPtr[0].codeAddr = pc;
804 }
805 
get_reg(int n)806 stackItem *X86TaskData::get_reg(int n)
807 /* Returns a pointer to the register given by n. */
808 {
809     switch (n)
810     {
811     case 0: return &assemblyInterface.p_rax;
812     case 1: return &assemblyInterface.p_rcx;
813     case 2: return &assemblyInterface.p_rdx;
814     case 3: return &assemblyInterface.p_rbx;
815         // Should not have rsp or rbp.
816     case 6: return &assemblyInterface.p_rsi;
817     case 7: return &assemblyInterface.p_rdi;
818 #ifdef HOSTARCHITECTURE_X86_64
819     case 8: return &assemblyInterface.p_r8;
820     case 9: return &assemblyInterface.p_r9;
821     case 10: return &assemblyInterface.p_r10;
822     case 11: return &assemblyInterface.p_r11;
823     case 12: return &assemblyInterface.p_r12;
824     case 13: return &assemblyInterface.p_r13;
825     case 14: return &assemblyInterface.p_r14;
826     // R15 is the heap pointer so shouldn't occur here.
827 #endif /* HOSTARCHITECTURE_X86_64 */
828     default: Crash("Unknown register %d\n", n);
829     }
830 }
831 
832 // Called as a result of a heap overflow trap
HeapOverflowTrap(byte * pcPtr)833 void X86TaskData::HeapOverflowTrap(byte *pcPtr)
834 {
835     X86TaskData *mdTask = this;
836     POLYUNSIGNED wordsNeeded = 0;
837     // The next instruction, after any branches round forwarding pointers or pop
838     // instructions, will be a store of register containing the adjusted heap pointer.
839     // We need to find that register and the value in it in order to find out how big
840     // the area we actually wanted is.  N.B.  The code-generator and assembly code
841     // must generate the correct instruction sequence.
842 //    byte *pcPtr = assemblyInterface.programCtr;
843     while (true)
844     {
845         if (pcPtr[0] == 0xeb)
846         {
847             // Forwarding pointer
848             if (pcPtr[1] >= 128) pcPtr += 256 - pcPtr[1] + 2;
849             else pcPtr += pcPtr[1] + 2;
850         }
851         else if ((pcPtr[0] & 0xf8) == 0x58) // Pop instruction.
852             pcPtr++;
853         else if (pcPtr[0] == 0x41 && ((pcPtr[1] & 0xf8) == 0x58)) // Pop with Rex prefix
854             pcPtr += 2;
855         else break;
856     }
857 #ifndef HOSTARCHITECTURE_X86_64
858     // This should be movl REG,0[%ebp].
859     ASSERT(pcPtr[0] == 0x89);
860     mdTask->allocReg = (pcPtr[1] >> 3) & 7; // Remember this until we allocate the memory
861     stackItem *reg = get_reg(mdTask->allocReg);
862     stackItem reg_val = *reg;
863     // The space we need is the difference between this register
864     // and the current value of newptr.
865     // The +1 here is because assemblyInterface.localMpointer is A.M.pointer +1.  The reason
866     // is that after the allocation we have the register pointing at the address we will
867     // actually use.
868     wordsNeeded = (this->allocPointer - (PolyWord*)reg_val.stackAddr) + 1;
869     *reg = TAGGED(0); // Clear this - it's not a valid address.
870     /* length in words, including length word */
871 
872     ASSERT (wordsNeeded <= (1<<24)); /* Max object size including length/flag word is 2^24 words.  */
873 #else /* HOSTARCHITECTURE_X86_64 */
874     ASSERT(pcPtr[1] == 0x89 || pcPtr[1] == 0x8b);
875     if (pcPtr[1] == 0x89)
876     {
877         // New (5.4) format.  This should be movq REG,%r15
878         ASSERT(pcPtr[0] == 0x49 || pcPtr[0] == 0x4d);
879         mdTask->allocReg = (pcPtr[2] >> 3) & 7; // Remember this until we allocate the memory
880         if (pcPtr[0] & 0x4) mdTask->allocReg += 8;
881     }
882     else
883     {
884         // Alternative form of movq REG,%r15
885         ASSERT(pcPtr[0] == 0x4c || pcPtr[0] == 0x4d);
886         mdTask->allocReg = pcPtr[2] & 7; // Remember this until we allocate the memory
887         if (pcPtr[0] & 0x1) mdTask->allocReg += 8;
888     }
889     stackItem *reg = get_reg(this->allocReg);
890     stackItem reg_val = *reg;
891     wordsNeeded = (POLYUNSIGNED)((this->allocPointer - (PolyWord*)reg_val.stackAddr) + 1);
892     *reg = TAGGED(0); // Clear this - it's not a valid address.
893  #endif /* HOSTARCHITECTURE_X86_64 */
894     if (profileMode == kProfileStoreAllocation)
895         addProfileCount(wordsNeeded);
896 
897     mdTask->allocWords = wordsNeeded; // The actual allocation is done in SetMemRegisters.
898 }
899 
SetException(poly_exn * exc)900 void X86TaskData::SetException(poly_exn *exc)
901 // The RTS wants to raise an exception packet.  Normally this is as the
902 // result of an RTS call in which case the caller will check this.  It can
903 // also happen in a trap.
904 {
905     assemblyInterface.exceptionPacket = (PolyWord)exc; // Set for direct calls.
906 }
907 
908 // Decode and process an effective address.  There may
909 // be a constant address in here but in any case we need
910 // to decode it to work out where the next instruction starts.
911 // If this is an lea instruction any addresses are just constants
912 // so must not be treated as addresses.
skipea(PolyObject * base,byte ** pt,ScanAddress * process,bool lea)913 static void skipea(PolyObject *base, byte **pt, ScanAddress *process, bool lea)
914 {
915     unsigned int modrm = *((*pt)++);
916     unsigned int md = modrm >> 6;
917     unsigned int rm = modrm & 7;
918 
919     if (md == 3) { } /* Register. */
920     else if (rm == 4)
921     {
922         /* s-i-b present. */
923         unsigned int sib = *((*pt)++);
924 
925         if (md == 0)
926         {
927             if ((sib & 7) == 5)
928             {
929                 if (! lea) {
930 #ifndef HOSTARCHITECTURE_X86_64
931                     process->ScanConstant(base, *pt, PROCESS_RELOC_DIRECT);
932 #endif /* HOSTARCHITECTURE_X86_64 */
933                 }
934                 (*pt) += 4;
935             }
936         }
937         else if (md == 1) (*pt)++;
938         else if (md == 2) (*pt) += 4;
939     }
940     else if (md == 0 && rm == 5)
941     {
942         if (!lea) {
943 #ifndef HOSTARCHITECTURE_X86_64
944             /* Absolute address. */
945             process->ScanConstant(base, *pt, PROCESS_RELOC_DIRECT);
946 #endif /* HOSTARCHITECTURE_X86_64 */
947         }
948         *pt += 4;
949     }
950     else
951     {
952         if (md == 1) *pt += 1;
953         else if (md == 2) *pt += 4;
954     }
955 }
956 
957 /* Added to deal with constants within the
958    code rather than in the constant area.  The constant
959    area is still needed for the function name.
960    DCJM 2/1/2001
961 */
ScanConstantsWithinCode(PolyObject * addr,PolyObject * old,POLYUNSIGNED length,ScanAddress * process)962 void X86Dependent::ScanConstantsWithinCode(PolyObject *addr, PolyObject *old, POLYUNSIGNED length, ScanAddress *process)
963 {
964     byte *pt = (byte*)addr;
965     PolyWord *end = addr->Offset(length - 1);
966 #ifdef POLYML32IN64
967     // If this begins with enter-int it's interpreted code - ignore
968     if (pt[0] == 0xff && pt[1] == 0x55 && pt[2] == 0x48) return;
969 #endif
970 
971     while (true)
972     {
973         // Escape prefixes come before any Rex byte
974         if (*pt == 0xf2 || *pt == 0xf3 || *pt == 0x66)
975             pt++;
976 #ifdef HOSTARCHITECTURE_X86_64
977         // REX prefixes.  Set this first.
978         byte lastRex;
979         if (*pt >= 0x40 && *pt <= 0x4f)
980             lastRex = *pt++;
981         else
982             lastRex = 0;
983 
984         //printf("pt=%p *pt=%x\n", pt, *pt);
985 
986 #endif /* HOSTARCHITECTURE_X86_64 */
987         switch (*pt)
988         {
989         case 0x00: return; // This is actually the first byte of the old "marker" word.
990         case 0xf4: return; // Halt - now used as a marker.
991         case 0x50: case 0x51: case 0x52: case 0x53:
992         case 0x54: case 0x55: case 0x56: case 0x57: /* Push */
993         case 0x58: case 0x59: case 0x5a: case 0x5b:
994         case 0x5c: case 0x5d: case 0x5e: case 0x5f: /* Pop */
995         case 0x90: /* nop */ case 0xc3: /* ret */
996         case 0xf9: /* stc */ case 0xce: /* into */
997         case 0xf0: /* lock. */ case 0xf3: /* rep/repe */
998         case 0xa4: case 0xa5: case 0xaa: case 0xab: /* movs/stos */
999         case 0xa6: /* cmpsb */ case 0x9e: /* sahf */ case 0x99: /* cqo/cdq */
1000             pt++; break;
1001 
1002         case 0x70: case 0x71: case 0x72: case 0x73: case 0x74: case 0x75: case 0x76: case 0x77:
1003         case 0x78: case 0x79: case 0x7a: case 0x7b: case 0x7c: case 0x7d: case 0x7e: case 0x7f:
1004         case 0xeb:
1005             /* short jumps. */
1006         case 0xcd: /* INT - now used for a register mask */
1007         case 0xa8: /* TEST_ACC8 */
1008         case 0x6a: /* PUSH_8 */
1009             pt += 2; break;
1010 
1011         case 0xc2: /* RET_16 */
1012         case 0xca: /* FAR RET 16 - used for a register mask */
1013             pt += 3; break;
1014 
1015         case 0x8d: /* leal. */
1016             pt++; skipea(addr, &pt, process, true); break;
1017 
1018         case 0x03: case 0x0b: case 0x13: case 0x1b:
1019         case 0x23: case 0x2b: case 0x33: case 0x3b: /* Add r,ea etc. */
1020         case 0x88: /* MOVB_R_A */ case 0x89: /* MOVL_R_A */
1021         case 0x8b: /* MOVL_A_R */
1022         case 0x62: /* BOUNDL */
1023         case 0xff: /* Group5 */
1024         case 0xd1: /* Group2_1_A */
1025         case 0x8f: /* POP_A */
1026         case 0xd3: /* Group2_CL_A */
1027         case 0x87: // XCHNG
1028         case 0x63: // MOVSXD
1029             pt++; skipea(addr, &pt, process, false); break;
1030 
1031         case 0xf6: /* Group3_a */
1032             {
1033                 int isTest = 0;
1034                 pt++;
1035                 /* The test instruction has an immediate operand. */
1036                 if ((*pt & 0x38) == 0) isTest = 1;
1037                 skipea(addr, &pt, process, false);
1038                 if (isTest) pt++;
1039                 break;
1040             }
1041 
1042         case 0xf7: /* Group3_A */
1043             {
1044                 int isTest = 0;
1045                 pt++;
1046                 /* The test instruction has an immediate operand. */
1047                 if ((*pt & 0x38) == 0) isTest = 1;
1048                 skipea(addr, &pt, process, false);
1049                 if (isTest) pt += 4;
1050                 break;
1051             }
1052 
1053         case 0xc1: /* Group2_8_A */
1054         case 0xc6: /* MOVB_8_A */
1055         case 0x83: /* Group1_8_A */
1056         case 0x80: /* Group1_8_a */
1057         case 0x6b: // IMUL Ev,Ib
1058             pt++; skipea(addr, &pt, process, false); pt++; break;
1059 
1060         case 0x69: // IMUL Ev,Iv
1061             pt++; skipea(addr, &pt, process, false); pt += 4; break;
1062 
1063         case 0x81: /* Group1_32_A */
1064             {
1065                 pt ++;
1066 #ifndef HOSTARCHITECTURE_X86_64
1067                 unsigned opCode = *pt;
1068 #endif
1069                 skipea(addr, &pt, process, false);
1070                 // Only check the 32 bit constant if this is a comparison.
1071                 // For other operations this may be untagged and shouldn't be an address.
1072 #ifndef HOSTARCHITECTURE_X86_64
1073                 if ((opCode & 0x38) == 0x38)
1074                     process->ScanConstant(addr, pt, PROCESS_RELOC_DIRECT);
1075 #endif
1076                 pt += 4;
1077                 break;
1078             }
1079 
1080         case 0xe8: case 0xe9:
1081             // Long jump and call.  These are used to call constant (known) functions
1082             // and also long jumps within the function.
1083             {
1084                 pt++;
1085                 POLYSIGNED disp = (pt[3] & 0x80) ? -1 : 0; // Set the sign just in case.
1086                 for(unsigned i = 4; i > 0; i--)
1087                     disp = (disp << 8) | pt[i-1];
1088                 byte *absAddr = pt + disp + 4; // The address is relative to AFTER the constant
1089 
1090                 // If the new address is within the current piece of code we don't do anything
1091                 if (absAddr >= (byte*)addr && absAddr < (byte*)end) {}
1092                 else {
1093 #ifdef HOSTARCHITECTURE_X86_64
1094                     ASSERT(sizeof(PolyWord) == 4); // Should only be used internally on x64
1095 #endif /* HOSTARCHITECTURE_X86_64 */
1096                     if (addr != old)
1097                     {
1098                         // The old value of the displacement was relative to the old address before
1099                         // we copied this code segment.
1100                         // We have to correct it back to the original address.
1101                         absAddr = absAddr - (byte*)addr + (byte*)old;
1102                         // We have to correct the displacement for the new location and store
1103                         // that away before we call ScanConstant.
1104                         size_t newDisp = absAddr - pt - 4;
1105                         byte* wr = gMem.SpaceForAddress(pt)->writeAble(pt);
1106                         for (unsigned i = 0; i < 4; i++)
1107                         {
1108                             wr[i] = (byte)(newDisp & 0xff);
1109                             newDisp >>= 8;
1110                         }
1111                     }
1112                     process->ScanConstant(addr, pt, PROCESS_RELOC_I386RELATIVE);
1113                 }
1114                 pt += 4;
1115                 break;
1116             }
1117 
1118         case 0xc7:/* MOVL_32_A */
1119             {
1120                 pt++;
1121                 if ((*pt & 0xc0) == 0x40 /* Byte offset or sib present */ &&
1122                     ((*pt & 7) != 4) /* But not sib present */ && pt[1] == 256-sizeof(PolyWord))
1123                 {
1124                     /* We may use a move instruction to set the length
1125                        word on a new segment.  We mustn't try to treat this as a constant.  */
1126                     pt += 6; /* Skip the modrm byte, the offset and the constant. */
1127                 }
1128                 else
1129                 {
1130                     skipea(addr, &pt, process, false);
1131 #ifndef HOSTARCHITECTURE_X86_64
1132                     // This isn't used for addresses even in 32-in-64
1133                     process->ScanConstant(addr, pt, PROCESS_RELOC_DIRECT);
1134 #endif /* HOSTARCHITECTURE_X86_64 */
1135                     pt += 4;
1136                 }
1137                 break;
1138             }
1139 
1140         case 0xb8: case 0xb9: case 0xba: case 0xbb:
1141         case 0xbc: case 0xbd: case 0xbe: case 0xbf: /* MOVL_32_64_R */
1142             pt ++;
1143 #ifdef HOSTARCHITECTURE_X86_64
1144             if ((lastRex & 8) == 0)
1145                 pt += 4; // 32-bit mode on 64-bits
1146             else
1147 #endif /* HOSTARCHITECTURE_X86_64 */
1148             {
1149                 // This is used in native 32-bit for constants and in
1150                 // 32-in-64 for the special case of an absolute address.
1151                 process->ScanConstant(addr, pt, PROCESS_RELOC_DIRECT);
1152                 pt += sizeof(uintptr_t);
1153             }
1154             break;
1155 
1156         case 0x68: /* PUSH_32 */
1157             pt ++;
1158 #if (!defined(HOSTARCHITECTURE_X86_64))
1159             process->ScanConstant(addr, pt, PROCESS_RELOC_DIRECT);
1160 #endif
1161             pt += 4;
1162             break;
1163 
1164         case 0x0f: /* ESCAPE */
1165             {
1166                 pt++;
1167                 switch (*pt)
1168                 {
1169                 case 0xb6: /* movzl */
1170                 case 0xb7: // movzw
1171                 case 0xbe: // movsx
1172                 case 0xbf: // movsx
1173                 case 0xc1: /* xaddl */
1174                 case 0xae: // ldmxcsr/stmxcsr
1175                 case 0xaf: // imul
1176                 case 0x40: case 0x41: case 0x42: case 0x43: case 0x44: case 0x45: case 0x46: case 0x47:
1177                 case 0x48: case 0x49: case 0x4a: case 0x4b: case 0x4c: case 0x4d: case 0x4e: case 0x4f:
1178                     // cmov
1179                     pt++; skipea(addr, &pt, process, false); break;
1180 
1181                 case 0x80: case 0x81: case 0x82: case 0x83:
1182                 case 0x84: case 0x85: case 0x86: case 0x87:
1183                 case 0x88: case 0x89: case 0x8a: case 0x8b:
1184                 case 0x8c: case 0x8d: case 0x8e: case 0x8f:
1185                     /* Conditional branches with 32-bit displacement. */
1186                     pt += 5; break;
1187 
1188                 case 0x90: case 0x91: case 0x92: case 0x93:
1189                 case 0x94: case 0x95: case 0x96: case 0x97:
1190                 case 0x98: case 0x99: case 0x9a: case 0x9b:
1191                 case 0x9c: case 0x9d: case 0x9e: case 0x9f:
1192                     /* SetCC. */
1193                     pt++; skipea(addr, &pt, process, false); break;
1194 
1195                 // These are SSE2 instructions
1196                 case 0x10: case 0x11: case 0x58: case 0x5c: case 0x59: case 0x5e:
1197                 case 0x2e: case 0x2a: case 0x54: case 0x57: case 0x5a: case 0x6e:
1198                 case 0x7e: case 0x2c: case 0x2d:
1199                     pt++; skipea(addr, &pt, process, false); break;
1200 
1201                 case 0x73: // PSRLDQ - EA,imm
1202                     pt++; skipea(addr, &pt, process, false); pt++;  break;
1203 
1204                 default: Crash("Unknown opcode %d at %p\n", *pt, pt);
1205                 }
1206                 break;
1207             }
1208 
1209         case 0xd8: case 0xd9: case 0xda: case 0xdb:
1210         case 0xdc: case 0xdd: case 0xde: case 0xdf: // Floating point escape instructions
1211             {
1212                 pt++;
1213                 if ((*pt & 0xe0) == 0xe0) pt++;
1214                 else skipea(addr, &pt, process, false);
1215                 break;
1216             }
1217 
1218         default: Crash("Unknown opcode %d at %p\n", *pt, pt);
1219         }
1220     }
1221 }
1222 
1223 // Increment the value contained in the first word of the mutex.
AtomicDecrement(Handle mutexp)1224 Handle X86TaskData::AtomicDecrement(Handle mutexp)
1225 {
1226     PolyObject *p = DEREFHANDLE(mutexp);
1227     POLYUNSIGNED result = X86AsmAtomicDecrement(p);
1228     return this->saveVec.push(PolyWord::FromUnsigned(result));
1229 }
1230 
1231 // Release a mutex.  Because the atomic increment and decrement
1232 // use the hardware LOCK prefix we can simply set this to zero.
AtomicReset(Handle mutexp)1233 void X86TaskData::AtomicReset(Handle mutexp)
1234 {
1235     DEREFHANDLE(mutexp)->Set(0, TAGGED(0));
1236 }
1237 
1238 static X86Dependent x86Dependent;
1239 
1240 MachineDependent *machineDependent = &x86Dependent;
1241 
1242 extern "C" {
1243     POLYEXTERNALSYMBOL void *PolyX86GetThreadData();
1244 }
1245 
1246 // Return the address of assembly data for the current thread.  This is normally in
1247 // RBP except if we are in a callback.
PolyX86GetThreadData()1248 void *PolyX86GetThreadData()
1249 {
1250     // We should get the task data for the thread that is running this code.
1251     // If this thread has been created by the foreign code we will have to
1252     // create a new one here.
1253     TaskData* taskData = processes->GetTaskDataForThread();
1254     if (taskData == 0)
1255     {
1256         try {
1257             taskData = processes->CreateNewTaskData(0, 0, 0, TAGGED(0));
1258         }
1259         catch (std::bad_alloc&) {
1260             ::Exit("Unable to create thread data - insufficient memory");
1261         }
1262         catch (MemoryException&) {
1263             ::Exit("Unable to create thread data - insufficient memory");
1264         }
1265     }
1266     return &((X86TaskData*)taskData)->assemblyInterface;
1267 }
1268 
1269 struct _entrypts machineSpecificEPT[] =
1270 {
1271     { "PolyX86GetThreadData",           (polyRTSFunction)& PolyX86GetThreadData },
1272 
1273     { NULL, NULL} // End of list.
1274 };
1275 
1276