1;; Licensed to the .NET Foundation under one or more agreements.
2;; The .NET Foundation licenses this file to you under the MIT license.
3;; See the LICENSE file in the project root for more information.
4
5include AsmMacros.inc
6
7PROBE_SAVE_FLAGS_EVERYTHING     equ DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_ALL_SCRATCH
8PROBE_SAVE_FLAGS_RAX_IS_GCREF   equ DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_RAX_IS_GCREF
9
10;;
11;; See PUSH_COOP_PINVOKE_FRAME, this macro is very similar, but also saves RAX and accepts the register
12;; bitmask in RCX
13;;
14;; On entry:
15;;  - BITMASK: bitmask describing pushes, may be volatile register or constant value
16;;  - RAX: managed function return value, may be an object or byref
17;;  - preserved regs: need to stay preserved, may contain objects or byrefs
18;;  - extraStack bytes of stack have already been allocated
19;;
20;; INVARIANTS
21;; - The macro assumes it is called from a prolog, prior to a frame pointer being setup.
22;; - All preserved registers remain unchanged from their values in managed code.
23;;
24PUSH_PROBE_FRAME macro threadReg, trashReg, extraStack, BITMASK
25
26    push_vol_reg    rax                         ; save RAX, it might contain an objectref
27    lea             trashReg, [rsp + 10h + extraStack]
28    push_vol_reg    trashReg                    ; save caller's RSP
29    push_nonvol_reg r15                         ; save preserved registers
30    push_nonvol_reg r14                         ;   ..
31    push_nonvol_reg r13                         ;   ..
32    push_nonvol_reg r12                         ;   ..
33    push_nonvol_reg rdi                         ;   ..
34    push_nonvol_reg rsi                         ;   ..
35    push_nonvol_reg rbx                         ;   ..
36    push_vol_reg    BITMASK                     ; save the register bitmask passed in by caller
37    push_vol_reg    threadReg                   ; Thread * (unused by stackwalker)
38    push_nonvol_reg rbp                         ; save caller's RBP
39    mov             trashReg, [rsp + 12*8 + extraStack]  ; Find the return address
40    push_vol_reg    trashReg                    ; save m_RIP
41    lea             trashReg, [rsp + 0]         ; trashReg == address of frame
42
43    ;; allocate scratch space and any required alignment
44    alloc_stack     20h + 10h + (extraStack AND (10h-1))
45
46    ;; save xmm0 in case it's being used as a return value
47    movdqa          [rsp + 20h], xmm0
48
49    ; link the frame into the Thread
50    mov             [threadReg + OFFSETOF__Thread__m_pHackPInvokeTunnel], trashReg
51endm
52
53;;
54;; Remove the frame from a previous call to PUSH_PROBE_FRAME from the top of the stack and restore preserved
55;; registers and return value to their values from before the probe was called (while also updating any
56;; object refs or byrefs).
57;;
58;; NOTE: does NOT deallocate the 'extraStack' portion of the stack, the user of this macro must do that.
59;;
60POP_PROBE_FRAME macro extraStack
61    movdqa      xmm0, [rsp + 20h]
62    add         rsp, 20h + 10h + (extraStack AND (10h-1)) + 8
63    pop         rbp
64    pop         rax     ; discard Thread*
65    pop         rax     ; discard BITMASK
66    pop         rbx
67    pop         rsi
68    pop         rdi
69    pop         r12
70    pop         r13
71    pop         r14
72    pop         r15
73    pop         rax     ; discard caller RSP
74    pop         rax
75endm
76
77;;
78;; Macro to clear the hijack state. This is safe to do because the suspension code will not Unhijack this
79;; thread if it finds it at an IP that isn't managed code.
80;;
81;; Register state on entry:
82;;  RDX: thread pointer
83;;
84;; Register state on exit:
85;;  RCX: trashed
86;;
87ClearHijackState macro
88        xor         ecx, ecx
89        mov         [rdx + OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation], rcx
90        mov         [rdx + OFFSETOF__Thread__m_pvHijackedReturnAddress], rcx
91endm
92
93
94;;
95;; The prolog for all GC suspension hijacks (normal and stress). Fixes up the hijacked return address, and
96;; clears the hijack state.
97;;
98;; Register state on entry:
99;;  All registers correct for return to the original return address.
100;;
101;; Register state on exit:
102;;  RCX: trashed
103;;  RDX: thread pointer
104;;
105FixupHijackedCallstack macro
106
107        ;; rdx <- GetThread(), TRASHES rcx
108        INLINE_GETTHREAD rdx, rcx
109
110        ;;
111        ;; Fix the stack by pushing the original return address
112        ;;
113        mov         rcx, [rdx + OFFSETOF__Thread__m_pvHijackedReturnAddress]
114        push        rcx
115
116        ClearHijackState
117endm
118
119;;
120;; Set the Thread state and wait for a GC to complete.
121;;
122;; Register state on entry:
123;;  RBX: thread pointer
124;;
125;; Register state on exit:
126;;  RBX: thread pointer
127;;  All other registers trashed
128;;
129
130EXTERN RhpWaitForGCNoAbort : PROC
131
132WaitForGCCompletion macro
133        test        dword ptr [rbx + OFFSETOF__Thread__m_ThreadStateFlags], TSF_SuppressGcStress + TSF_DoNotTriggerGc
134        jnz         @F
135
136        mov         rcx, [rbx + OFFSETOF__Thread__m_pHackPInvokeTunnel]
137        call        RhpWaitForGCNoAbort
138@@:
139
140endm
141
142
143EXTERN RhpPInvokeExceptionGuard : PROC
144
145;;
146;;
147;;
148;; GC Probe Hijack targets
149;;
150;;
151NESTED_ENTRY RhpGcProbeHijackScalar, _TEXT, RhpPInvokeExceptionGuard
152        END_PROLOGUE
153        FixupHijackedCallstack
154        mov         ecx, DEFAULT_FRAME_SAVE_FLAGS
155        jmp         RhpGcProbe
156NESTED_END RhpGcProbeHijackScalar, _TEXT
157
158NESTED_ENTRY RhpGcProbeHijackObject, _TEXT, RhpPInvokeExceptionGuard
159        END_PROLOGUE
160        FixupHijackedCallstack
161        mov         ecx, DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_RAX_IS_GCREF
162        jmp         RhpGcProbe
163NESTED_END RhpGcProbeHijackObject, _TEXT
164
165NESTED_ENTRY RhpGcProbeHijackByref, _TEXT, RhpPInvokeExceptionGuard
166        END_PROLOGUE
167        FixupHijackedCallstack
168        mov         ecx, DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_RAX_IS_BYREF
169        jmp         RhpGcProbe
170NESTED_END RhpGcProbeHijackByref, _TEXT
171
172ifdef FEATURE_GC_STRESS
173;;
174;;
175;; GC Stress Hijack targets
176;;
177;;
178LEAF_ENTRY RhpGcStressHijackScalar, _TEXT
179        FixupHijackedCallstack
180        mov         ecx, DEFAULT_FRAME_SAVE_FLAGS
181        jmp         RhpGcStressProbe
182LEAF_END RhpGcStressHijackScalar, _TEXT
183
184LEAF_ENTRY RhpGcStressHijackObject, _TEXT
185        FixupHijackedCallstack
186        mov         ecx, DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_RAX_IS_GCREF
187        jmp         RhpGcStressProbe
188LEAF_END RhpGcStressHijackObject, _TEXT
189
190LEAF_ENTRY RhpGcStressHijackByref, _TEXT
191        FixupHijackedCallstack
192        mov         ecx, DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_RAX_IS_BYREF
193        jmp         RhpGcStressProbe
194LEAF_END RhpGcStressHijackByref, _TEXT
195
196;;
197;; Worker for our GC stress probes.  Do not call directly!!
198;; Instead, go through RhpGcStressHijack{Scalar|Object|Byref}.
199;; This worker performs the GC Stress work and returns to the original return address.
200;;
201;; Register state on entry:
202;;  RDX: thread pointer
203;;  RCX: register bitmask
204;;
205;; Register state on exit:
206;;  Scratch registers, except for RAX, have been trashed
207;;  All other registers restored as they were when the hijack was first reached.
208;;
209NESTED_ENTRY RhpGcStressProbe, _TEXT
210        PUSH_PROBE_FRAME rdx, rax, 0, rcx
211        END_PROLOGUE
212
213        call        REDHAWKGCINTERFACE__STRESSGC
214
215        POP_PROBE_FRAME 0
216        ret
217NESTED_END RhpGcStressProbe, _TEXT
218
219endif ;; FEATURE_GC_STRESS
220
221EXTERN RhpThrowHwEx : PROC
222
223NESTED_ENTRY RhpGcProbe, _TEXT
224        test        [RhpTrapThreads], TrapThreadsFlags_TrapThreads
225        jnz         @f
226        ret
227@@:
228        PUSH_PROBE_FRAME rdx, rax, 0, rcx
229        END_PROLOGUE
230
231        mov         rbx, rdx
232        WaitForGCCompletion
233
234        mov         rax, [rbx + OFFSETOF__Thread__m_pHackPInvokeTunnel]
235        test        dword ptr [rax + OFFSETOF__PInvokeTransitionFrame__m_Flags], PTFF_THREAD_ABORT
236        jnz         Abort
237        POP_PROBE_FRAME 0
238        ret
239Abort:
240        POP_PROBE_FRAME 0
241        mov         rcx, STATUS_REDHAWK_THREAD_ABORT
242        pop         rdx         ;; return address as exception RIP
243        jmp         RhpThrowHwEx ;; Throw the ThreadAbortException as a special kind of hardware exception
244
245NESTED_END RhpGcProbe, _TEXT
246
247
248LEAF_ENTRY RhpGcPoll, _TEXT
249        ;
250        ; loop hijacking is used instead
251        ;
252        int 3
253
254LEAF_END RhpGcPoll, _TEXT
255
256LEAF_ENTRY RhpGcPollStress, _TEXT
257        ;
258        ; loop hijacking is used instead
259        ;
260        int 3
261
262LEAF_END RhpGcPollStress, _TEXT
263
264ifdef FEATURE_GC_STRESS
265;; PAL_LIMITED_CONTEXT, 6 xmm regs to save, 2 scratch regs to save, plus 20h bytes for scratch space
266RhpHijackForGcStress_FrameSize equ SIZEOF__PAL_LIMITED_CONTEXT + 6*10h + 2*8h + 20h
267
268; -----------------------------------------------------------------------------------------------------------
269; RhpHijackForGcStress
270;
271; Called at the beginning of the epilog when a method is bound with /gcstress
272;
273; N.B. -- Leaf frames may not have aligned the stack or reserved any scratch space on the stack.  Also, in
274;         order to have a resonable stacktrace in the debugger, we must use the .pushframe unwind directive.
275;
276; N.B. #2 -- The "EH jump epilog" codegen depends on rcx/rdx being preserved across this call.  We currently
277;            will trash R8-R11, but we can do better, if necessary.
278;
279NESTED_ENTRY RhpHijackForGcStress, _TEXT
280
281        lea         r10, [rsp+8]        ;; save the original RSP (prior to call)
282        mov         r11, [rsp]          ;; get the return address
283
284        ;; Align the stack
285        and         rsp, -16
286
287        ;; Push the expected "machine frame" for the unwinder to see.  All that it looks at is the RSP and
288        ;; RIP, so we push zero for the others.
289        xor     r8, r8
290        push    r8              ;; just aligning the stack
291        push    r8              ;; SS
292        push    r10             ;; original RSP
293        push    r8              ;; EFLAGS
294        push    r8              ;; CS
295        push    r11             ;; return address
296
297        ; Tell the unwinder that the frame is there now
298        .pushframe
299
300        alloc_stack     RhpHijackForGcStress_FrameSize
301        END_PROLOGUE
302
303        ;; Save xmm scratch regs -- this is probably overkill, only the return value reg is
304        ;; likely to be interesting at this point, but it's a bit ambiguous.
305        movdqa      [rsp + 20h + 0*10h], xmm0
306        movdqa      [rsp + 20h + 1*10h], xmm1
307        movdqa      [rsp + 20h + 2*10h], xmm2
308        movdqa      [rsp + 20h + 3*10h], xmm3
309        movdqa      [rsp + 20h + 4*10h], xmm4
310        movdqa      [rsp + 20h + 5*10h], xmm5
311
312        mov         [rsp + 20h + 6*10h + 0*8h], rcx
313        mov         [rsp + 20h + 6*10h + 1*8h], rdx
314
315        ;;
316        ;; Setup a PAL_LIMITED_CONTEXT that looks like what you'd get if you had suspended this thread at the
317        ;; IP after the call to this helper.
318        ;;
319        ;; This is very likely overkill since the calculation of the return address should only need RSP and
320        ;; RBP, but this is test code, so I'm not too worried about efficiency.
321        ;;
322        mov         [rsp + 20h + 6*10h + 2*8h + OFFSETOF__PAL_LIMITED_CONTEXT__IP],  r11     ; rip at callsite
323        mov         [rsp + 20h + 6*10h + 2*8h + OFFSETOF__PAL_LIMITED_CONTEXT__Rsp], r10     ; rsp at callsite
324        mov         [rsp + 20h + 6*10h + 2*8h + OFFSETOF__PAL_LIMITED_CONTEXT__Rbp], rbp
325        mov         [rsp + 20h + 6*10h + 2*8h + OFFSETOF__PAL_LIMITED_CONTEXT__Rdi], rdi
326        mov         [rsp + 20h + 6*10h + 2*8h + OFFSETOF__PAL_LIMITED_CONTEXT__Rsi], rsi
327        mov         [rsp + 20h + 6*10h + 2*8h + OFFSETOF__PAL_LIMITED_CONTEXT__Rax], rax
328        mov         [rsp + 20h + 6*10h + 2*8h + OFFSETOF__PAL_LIMITED_CONTEXT__Rbx], rbx
329
330        mov         [rsp + 20h + 6*10h + 2*8h + OFFSETOF__PAL_LIMITED_CONTEXT__R12], r12
331        mov         [rsp + 20h + 6*10h + 2*8h + OFFSETOF__PAL_LIMITED_CONTEXT__R13], r13
332        mov         [rsp + 20h + 6*10h + 2*8h + OFFSETOF__PAL_LIMITED_CONTEXT__R14], r14
333        mov         [rsp + 20h + 6*10h + 2*8h + OFFSETOF__PAL_LIMITED_CONTEXT__R15], r15
334
335        lea         rcx, [rsp + 20h + 6*10h + 2*8h]   ;; address of PAL_LIMITED_CONTEXT
336        call        THREAD__HIJACKFORGCSTRESS
337
338        ;; Note: we only restore the scratch registers here. No GC has occured, so restoring
339        ;; the callee saved ones is unnecessary.
340        mov         rax, [rsp + 20h + 6*10h + 2*8h + OFFSETOF__PAL_LIMITED_CONTEXT__Rax]
341        mov         rcx, [rsp + 20h + 6*10h + 0*8h]
342        mov         rdx, [rsp + 20h + 6*10h + 1*8h]
343
344        ;; Restore xmm scratch regs
345        movdqa      xmm0, [rsp + 20h + 0*10h]
346        movdqa      xmm1, [rsp + 20h + 1*10h]
347        movdqa      xmm2, [rsp + 20h + 2*10h]
348        movdqa      xmm3, [rsp + 20h + 3*10h]
349        movdqa      xmm4, [rsp + 20h + 4*10h]
350        movdqa      xmm5, [rsp + 20h + 5*10h]
351
352        ;; epilog
353        mov         r10, [rsp + 20h + 6*10h + 2*8h + OFFSETOF__PAL_LIMITED_CONTEXT__Rsp]
354        lea         rsp, [r10 - 8]              ;; adjust RSP to point back at the return address
355        ret
356NESTED_END RhpHijackForGcStress, _TEXT
357
358endif ;; FEATURE_GC_STRESS
359
360
361;;
362;; The following functions are _jumped_ to when we need to transfer control from one method to another for EH
363;; dispatch. These are needed to properly coordinate with the GC hijacking logic. We are essentially replacing
364;; the return from the throwing method with a jump to the handler in the caller, but we need to be aware of
365;; any return address hijack that may be in place for GC suspension. These routines use a quick test of the
366;; return address against a specific GC hijack routine, and then fixup the stack pointer to what it would be
367;; after a real return from the throwing method. Then, if we are not hijacked we can simply jump to the
368;; handler in the caller.
369;;
370;; If we are hijacked, then we jump to a routine that will unhijack appropriatley and wait for the GC to
371;; complete. There are also variants for GC stress.
372;;
373;; Note that at this point we are eiher hijacked or we are not, and this will not change until we return to
374;; managed code. It is an invariant of the system that a thread will only attempt to hijack or unhijack
375;; another thread while the target thread is suspended in managed code, and this is _not_ managed code.
376;;
377;; Register state on entry:
378;;  RAX: pointer to this function (i.e., trash)
379;;  RCX: reference to the exception object.
380;;  RDX: handler address we want to jump to.
381;;  RBX, RSI, RDI, RBP, and R12-R15 are all already correct for return to the caller.
382;;  The stack still contains the return address.
383;;
384;; Register state on exit:
385;;  RSP: what it would be after a complete return to the caler.
386;;  RDX: TRASHED
387;;
388RTU_EH_JUMP_HELPER macro funcName, hijackFuncName, isStress, stressFuncName
389LEAF_ENTRY funcName, _TEXT
390        lea         rax, [hijackFuncName]
391        cmp         [rsp], rax
392        je          RhpGCProbeForEHJump
393
394IF isStress EQ 1
395        lea         rax, [stressFuncName]
396        cmp         [rsp], rax
397        je          RhpGCStressProbeForEHJump
398ENDIF
399
400        ;; We are not hijacked, so we can return to the handler.
401        ;; We return to keep the call/return prediction balanced.
402        mov         [rsp], rdx  ; Update the return address
403        ret
404
405LEAF_END funcName, _TEXT
406endm
407
408;; We need an instance of the helper for each possible hijack function. The binder has enough
409;; information to determine which one we need to use for any function.
410RTU_EH_JUMP_HELPER RhpEHJumpScalar,         RhpGcProbeHijackScalar, 0, 0
411RTU_EH_JUMP_HELPER RhpEHJumpObject,         RhpGcProbeHijackObject, 0, 0
412RTU_EH_JUMP_HELPER RhpEHJumpByref,          RhpGcProbeHijackByref,  0, 0
413ifdef FEATURE_GC_STRESS
414RTU_EH_JUMP_HELPER RhpEHJumpScalarGCStress, RhpGcProbeHijackScalar, 1, RhpGcStressHijackScalar
415RTU_EH_JUMP_HELPER RhpEHJumpObjectGCStress, RhpGcProbeHijackObject, 1, RhpGcStressHijackObject
416RTU_EH_JUMP_HELPER RhpEHJumpByrefGCStress,  RhpGcProbeHijackByref,  1, RhpGcStressHijackByref
417endif
418
419;;
420;; Macro to setup our frame and adjust the location of the EH object reference for EH jump probe funcs.
421;;
422;; Register state on entry:
423;;  RAX: scratch
424;;  RCX: reference to the exception object.
425;;  RDX: handler address we want to jump to.
426;;  RBX, RSI, RDI, RBP, and R12-R15 are all already correct for return to the caller.
427;;  The stack is as if we are just about to returned from the call
428;;
429;; Register state on exit:
430;;  RAX: reference to the exception object
431;;  RCX: scratch
432;;  RDX: thread pointer
433;;
434EHJumpProbeProlog_extraStack = 1*8
435EHJumpProbeProlog macro
436        push_nonvol_reg rdx         ; save the handler address so we can jump to it later
437        mov             rax, rcx    ; move the ex object reference into rax so we can report it
438
439        ;; rdx <- GetThread(), TRASHES rcx
440        INLINE_GETTHREAD rdx, rcx
441
442        ;; Fix the stack by patching the original return address
443        mov         rcx, [rdx + OFFSETOF__Thread__m_pvHijackedReturnAddress]
444        mov         [rsp + EHJumpProbeProlog_extraStack], rcx
445
446        ClearHijackState
447
448        ; TRASHES r10
449        PUSH_PROBE_FRAME rdx, r10, EHJumpProbeProlog_extraStack, PROBE_SAVE_FLAGS_RAX_IS_GCREF
450
451        END_PROLOGUE
452endm
453
454;;
455;; Macro to re-adjust the location of the EH object reference, cleanup the frame, and make the
456;; final jump to the handler for EH jump probe funcs.
457;;
458;; Register state on entry:
459;;  RAX: reference to the exception object
460;;  RCX: scratch
461;;  RDX: scratch
462;;
463;; Register state on exit:
464;;  RSP: correct for return to the caller
465;;  RCX: reference to the exception object
466;;  RDX: trashed
467;;
468EHJumpProbeEpilog macro
469        POP_PROBE_FRAME EHJumpProbeProlog_extraStack
470        mov         rcx, rax    ; Put the EX obj ref back into rcx for the handler.
471
472        pop         rax         ; Recover the handler address.
473        mov         [rsp], rax  ; Update the return address
474        ret
475endm
476
477;;
478;; We are hijacked for a normal GC (not GC stress), so we need to unhijcak and wait for the GC to complete.
479;;
480;; Register state on entry:
481;;  RAX: scratch
482;;  RCX: reference to the exception object.
483;;  RDX: handler address we want to jump to.
484;;  RBX, RSI, RDI, RBP, and R12-R15 are all already correct for return to the caller.
485;;  The stack is as if we have tail called to this function (rsp points to return address).
486;;
487;; Register state on exit:
488;;  RSP: correct for return to the caller
489;;  RBP: previous ebp frame
490;;  RCX: reference to the exception object
491;;
492NESTED_ENTRY RhpGCProbeForEHJump, _TEXT
493        EHJumpProbeProlog
494
495ifdef _DEBUG
496        ;;
497        ;; If we get here, then we have been hijacked for a real GC, and our SyncState must
498        ;; reflect that we've been requested to synchronize.
499
500        test        [RhpTrapThreads], TrapThreadsFlags_TrapThreads
501        jnz         @F
502
503        call        RhDebugBreak
504@@:
505endif ;; _DEBUG
506
507        mov         rbx, rdx
508        WaitForGCCompletion
509
510        EHJumpProbeEpilog
511
512NESTED_END RhpGCProbeForEHJump, _TEXT
513
514ifdef FEATURE_GC_STRESS
515;;
516;; We are hijacked for GC Stress (not a normal GC) so we need to invoke the GC stress helper.
517;;
518;; Register state on entry:
519;;  RAX: scratch
520;;  RCX: reference to the exception object.
521;;  RDX: handler address we want to jump to.
522;;  RBX, RSI, RDI, RBP, and R12-R15 are all already correct for return to the caller.
523;;  The stack is as if we have tail called to this function (rsp points to return address).
524;;
525;; Register state on exit:
526;;  RSP: correct for return to the caller
527;;  RBP: previous ebp frame
528;;  RCX: reference to the exception object
529;;
530NESTED_ENTRY RhpGCStressProbeForEHJump, _TEXT
531        EHJumpProbeProlog
532
533        call        REDHAWKGCINTERFACE__STRESSGC
534
535        EHJumpProbeEpilog
536
537NESTED_END RhpGCStressProbeForEHJump, _TEXT
538
539g_pTheRuntimeInstance equ ?g_pTheRuntimeInstance@@3PEAVRuntimeInstance@@EA
540EXTERN g_pTheRuntimeInstance : QWORD
541RuntimeInstance__ShouldHijackLoopForGcStress equ ?ShouldHijackLoopForGcStress@RuntimeInstance@@QEAA_N_K@Z
542EXTERN RuntimeInstance__ShouldHijackLoopForGcStress : PROC
543
544endif ;; FEATURE_GC_STRESS
545
546EXTERN RecoverLoopHijackTarget : PROC
547EXTERN g_fGcStressStarted : DWORD
548EXTERN g_fHasFastFxsave : BYTE
549
550FXSAVE_SIZE             equ 512
551
552;; Trap a loop to GC.
553;; Set up the P/Invoke transition frame with the original loop target as the safe point.
554;; All registers, both volatile and non-volatile, are preserved.
555;; Input: ModuleHeader, chunk starting index and chunk sub-index which are used to get the original loop target
556;; The function is not called but jumped directly
557NESTED_ENTRY RhpLoopHijack, _TEXT
558
559    sizeof_OutgoingScratchSpace equ 20h
560    sizeof_PInvokeFrame         equ OFFSETOF__PInvokeTransitionFrame__m_PreservedRegs + 15*8
561    sizeof_XmmAlignPad          equ 8
562    sizeof_XmmSave              equ FXSAVE_SIZE
563    sizeof_MachineFrame         equ 6*8
564    sizeof_ThunkPushedArgs      equ 4*8             ;; eflags, ModuleHeader *, chunk starting index, chunk sub-index
565    sizeof_FixedFrame           equ sizeof_OutgoingScratchSpace + sizeof_PInvokeFrame + sizeof_XmmAlignPad + sizeof_XmmSave + sizeof_MachineFrame
566
567        ;; On the stack on entry:
568        ;;   [rsp     ]  -> ModuleHeader *
569        ;;   [rsp +  8]  -> chunk starting index
570        ;;   [rsp + 10]  -> chunk sub-index (0-256)  BEWARE: this has been sign-extended, but it is unsigned
571
572        ;; save eflags before we trash them
573        pushfq
574
575        ;; What we want to get to:
576        ;;
577        ;;   [rsp     ]  -> outgoing scratch area
578        ;;
579        ;;   [rsp + 20]  -> m_RIP                           -------|
580        ;;   [rsp + 28]  -> m_FramePointer                         |
581        ;;   [rsp + 30]  -> m_pThread                              |
582        ;;   [rsp + 38]  -> m_Flags / m_dwAlignPad2                |
583        ;;   [rsp + 40]  -> rbx save                               |
584        ;;   [rsp + 48]  -> rsi save                               |
585        ;;   [rsp + 50]  -> rdi save                               |
586        ;;   [rsp + 58]  -> r12 save                               |
587        ;;   [rsp + 60]  -> r13 save                               |
588        ;;   [rsp + 68]  -> r14 save                               | PInvokeTransitionFrame
589        ;;   [rsp + 70]  -> r15 save                               |
590        ;;   [rsp + 78]  -> rsp save                               |
591        ;;   [rsp + 80]  -> rax save                               |
592        ;;   [rsp + 88]  -> rcx save                               |
593        ;;   [rsp + 90]  -> rdx save                               |
594        ;;   [rsp + 98]  -> r8 save                                |
595        ;;   [rsp + a0]  -> r9 save                                |
596        ;;   [rsp + a8]  -> r10 save                               |
597        ;;   [rsp + b0]  -> r11 save                        -------|
598        ;;
599        ;;   [rsp + b8]  -> [XmmAlignPad]
600        ;;
601        ;;   [rsp + c0]  -> FXSAVE area
602        ;;
603        ;;   [rsp +2c0]  | RIP      |
604        ;;   [rsp +2c8]  | CS       |
605        ;;   [rsp +2d0]  | EFLAGS   | <-- 'machine frame'
606        ;;   [rsp +2d8]  | RSP      |
607        ;;   [rsp +2e0]  | SS       |
608        ;;   [rsp +2e8]  | padding  |
609        ;;
610        ;;   [rsp +2f0]  [optional stack alignment]
611        ;;
612        ;;   [PSP - 20] -> eflags save
613        ;;   [PSP - 18] -> ModuleHeader *
614        ;;   [PSP - 10] -> chunk starting index
615        ;;   [PSP -  8] -> chunk sub-index (0-256)  BEWARE: this has been sign-extended, but it is unsigned
616        ;;   [PSP]      -> caller's frame
617
618        test        rsp, 0Fh
619        jz          AlreadyAligned
620
621        sub         rsp, sizeof_XmmAlignPad + sizeof_XmmSave + sizeof_MachineFrame + 8  ; +8 to align RSP
622        push        r11                         ; save incoming R11 into save location
623        lea         r11, [rsp + 8 + sizeof_XmmAlignPad + sizeof_XmmSave + sizeof_MachineFrame + 8 + sizeof_ThunkPushedArgs]
624        jmp         PspCalculated
625
626    AlreadyAligned:
627        sub         rsp, sizeof_XmmAlignPad + sizeof_XmmSave + sizeof_MachineFrame
628        push        r11                         ; save incoming R11 into save location
629        lea         r11, [rsp + 8 + sizeof_XmmAlignPad + sizeof_XmmSave + sizeof_MachineFrame + sizeof_ThunkPushedArgs]
630
631    PspCalculated:
632        push        r10                         ; save incoming R10 into save location
633        xor         r10d, r10d
634
635        ;;
636        ;; Populate the 'machine frame' in the diagram above.  We have only pushed up to the 'r10 save', so we have not
637        ;; yet pushed 0xA8 bytes of that diagram.
638        ;;
639        ;; [rsp + {offset-in-target-frame-layout-diagram} - {as-yet-unpushed-stack-size}]
640        mov         [rsp + 2c0h - 0a8h], r10           ; init RIP to zero
641        mov         [rsp + 2c8h - 0a8h], r10           ; init CS to zero
642        mov         [rsp + 2d0h - 0a8h], r10           ; init EFLAGS to zero
643        mov         [rsp + 2d8h - 0a8h], r11           ; save PSP in the 'machine frame'
644        mov         [rsp + 2e0h - 0a8h], r10           ; init SS to zero
645
646        .pushframe
647        .allocstack sizeof_XmmAlignPad + sizeof_XmmSave + 2*8    ;; only 2 of the regs from the PInvokeTransitionFrame are on the stack
648
649        push_vol_reg    r9
650        push_vol_reg    r8
651        push_vol_reg    rdx
652        push_vol_reg    rcx
653        push_vol_reg    rax
654        push_vol_reg    r11         ; PSP gets saved into the PInvokeTransitionFrame
655        push_nonvol_reg r15
656        push_nonvol_reg r14
657        push_nonvol_reg r13
658        push_nonvol_reg r12
659        push_nonvol_reg rdi
660        push_nonvol_reg rsi
661        push_nonvol_reg rbx
662        push_vol_reg    PROBE_SAVE_FLAGS_EVERYTHING     ; m_Flags / m_dwAlignPad2
663
664        ;; rdx <- GetThread(), TRASHES rcx
665        INLINE_GETTHREAD rdx, rcx
666
667        push_vol_reg    rdx                             ; m_pThread
668        push_nonvol_reg rbp                             ; m_FramePointer
669        push_vol_reg    r10                             ; m_RIP
670
671        alloc_stack     sizeof_OutgoingScratchSpace
672        END_PROLOGUE
673
674        mov         rbx, r11    ; put PSP into RBX
675        mov         rsi, rdx    ; put Thread* into RSI
676
677        ; RBX is PSP
678        ; RSI is Thread*
679
680        fxsave      [rsp + 0c0h]
681
682        cmp         [g_fHasFastFxsave], 0   ; fast fxsave won't save the xmm registers, so we must do it
683        jz          DontSaveXmmAgain
684
685        ;; 0C0h -> offset of FXSAVE area
686        ;; 0A0h -> offset of xmm0 save area within the FXSAVE area
687        movdqa      [rsp + 0c0h + 0a0h +  0*10h], xmm0
688        movdqa      [rsp + 0c0h + 0a0h +  1*10h], xmm1
689        movdqa      [rsp + 0c0h + 0a0h +  2*10h], xmm2
690        movdqa      [rsp + 0c0h + 0a0h +  3*10h], xmm3
691        movdqa      [rsp + 0c0h + 0a0h +  4*10h], xmm4
692        movdqa      [rsp + 0c0h + 0a0h +  5*10h], xmm5
693        movdqa      [rsp + 0c0h + 0a0h +  6*10h], xmm6
694        movdqa      [rsp + 0c0h + 0a0h +  7*10h], xmm7
695        movdqa      [rsp + 0c0h + 0a0h +  8*10h], xmm8
696        movdqa      [rsp + 0c0h + 0a0h +  9*10h], xmm9
697        movdqa      [rsp + 0c0h + 0a0h + 10*10h], xmm10
698        movdqa      [rsp + 0c0h + 0a0h + 11*10h], xmm11
699        movdqa      [rsp + 0c0h + 0a0h + 12*10h], xmm12
700        movdqa      [rsp + 0c0h + 0a0h + 13*10h], xmm13
701        movdqa      [rsp + 0c0h + 0a0h + 14*10h], xmm14
702        movdqa      [rsp + 0c0h + 0a0h + 15*10h], xmm15
703
704DontSaveXmmAgain:
705        xor         ecx, ecx                ; Combine the two indexes
706        mov         cl,  [r11 -  8h]        ; ...
707        add         ecx, [r11 - 10h]        ; ...
708        mov         rdx, [r11 - 18h]        ; Load the ModuleHeader*
709
710        ;; RCX contains full index now
711        ;; RDX contains the ModuleHeader*
712
713        call        RecoverLoopHijackTarget
714        mov         [rsp + 2c0h], rax       ; save original target address into 'machine frame'
715        mov         [rsp +  20h], rax       ; save original target address into PInvokeTransitionFrame
716
717        mov         [rbx - 8], rax          ; store original target address for our 'return'
718
719        ; Early out if GC stress is currently suppressed. Do this after we have computed the real address to
720        ; return to but before we link the transition frame onto m_pHackPInvokeTunnel (because hitting this
721        ; condition implies we're running restricted callouts during a GC itself and we could end up
722        ; overwriting a co-op frame set by the code that caused the GC in the first place, e.g. a GC.Collect
723        ; call).
724        test        dword ptr [rsi + OFFSETOF__Thread__m_ThreadStateFlags], TSF_SuppressGcStress + TSF_DoNotTriggerGc
725        jnz         DoneWaitingForGc
726
727        ; link the frame into the Thread
728        lea         rcx, [rsp + sizeof_OutgoingScratchSpace]    ; rcx <- PInvokeTransitionFrame*
729        mov         [rsi + OFFSETOF__Thread__m_pHackPInvokeTunnel], rcx
730
731        ;;
732        ;; Unhijack this thread, if necessary.
733        ;;
734        INLINE_THREAD_UNHIJACK  rsi, rax, rcx       ;; trashes RAX, RCX
735
736ifdef FEATURE_GC_STRESS
737        xor         eax, eax
738        cmp         [g_fGcStressStarted], eax
739        jz          @F
740
741        mov         rdx, [rsp + 2c0h]
742        mov         rcx, [g_pTheRuntimeInstance]
743        call        RuntimeInstance__ShouldHijackLoopForGcStress
744        cmp         al, 0
745        je          @F
746
747        call        REDHAWKGCINTERFACE__STRESSGC
748@@:
749endif ;; FEATURE_GC_STRESS
750
751        lea         rcx, [rsp + sizeof_OutgoingScratchSpace]    ; calculate PInvokeTransitionFrame pointer
752        call        RhpWaitForGCNoAbort
753
754    DoneWaitingForGc:
755        ;; Prepare for our return by stashing a scratch register where we can pop it just before returning
756        mov         rcx, [rsp + 88h]        ; get RCX save value
757        mov         [rbx - 10h], rcx        ; store RCX save value into location for popping
758        mov         rcx, rbx                ; RCX <- PSP
759
760        fxrstor     [rsp + 0c0h]
761
762        cmp         [g_fHasFastFxsave], 0
763        jz          DontRestoreXmmAgain
764
765        movdqa      xmm0 , [rsp + 0c0h + 0a0h +  0*10h]
766        movdqa      xmm1 , [rsp + 0c0h + 0a0h +  1*10h]
767        movdqa      xmm2 , [rsp + 0c0h + 0a0h +  2*10h]
768        movdqa      xmm3 , [rsp + 0c0h + 0a0h +  3*10h]
769        movdqa      xmm4 , [rsp + 0c0h + 0a0h +  4*10h]
770        movdqa      xmm5 , [rsp + 0c0h + 0a0h +  5*10h]
771        movdqa      xmm6 , [rsp + 0c0h + 0a0h +  6*10h]
772        movdqa      xmm7 , [rsp + 0c0h + 0a0h +  7*10h]
773        movdqa      xmm8 , [rsp + 0c0h + 0a0h +  8*10h]
774        movdqa      xmm9 , [rsp + 0c0h + 0a0h +  9*10h]
775        movdqa      xmm10, [rsp + 0c0h + 0a0h + 10*10h]
776        movdqa      xmm11, [rsp + 0c0h + 0a0h + 11*10h]
777        movdqa      xmm12, [rsp + 0c0h + 0a0h + 12*10h]
778        movdqa      xmm13, [rsp + 0c0h + 0a0h + 13*10h]
779        movdqa      xmm14, [rsp + 0c0h + 0a0h + 14*10h]
780        movdqa      xmm15, [rsp + 0c0h + 0a0h + 15*10h]
781
782DontRestoreXmmAgain:
783        add         rsp, sizeof_OutgoingScratchSpace
784        mov         eax, [rsp + OFFSETOF__PInvokeTransitionFrame__m_Flags]
785        test        eax, PTFF_THREAD_ABORT
786        pop         rax                     ; m_RIP
787        pop         rbp                     ; m_FramePointer
788        pop         rax                     ; m_pThread
789        pop         rax                     ; m_Flags / m_dwAlign2
790        pop         rbx
791        pop         rsi
792        pop         rdi
793        pop         r12
794        pop         r13
795        pop         r14
796        pop         r15
797        pop         rax                     ; RSP
798        pop         rax                     ; RAX save
799        pop         rdx                     ; RCX save (intentionally discarding it)
800        pop         rdx
801        pop         r8
802        pop         r9
803        pop         r10
804        pop         r11
805
806
807        ;; RCX is PSP at this point and the stack looks like this:
808        ;;   [PSP - 20] -> eflags save
809        ;;   [PSP - 18] -> ModuleHeader *
810        ;;   [PSP - 10] -> rcx save
811        ;;   [PSP -  8] -> return address
812        ;;   [PSP]      -> caller's frame
813        ;;
814        ;; The final step is to restore eflags, rcx, and return back to the loop target location.
815
816        lea         rsp, [rcx - 20h]
817        jz          @f          ;; result of the test instruction before the pops above
818        popfq                   ;; restore flags
819        pop         rcx         ;; discard ModuleHeader*
820        pop         rcx         ;; restore rcx
821        mov         rcx, STATUS_REDHAWK_THREAD_ABORT
822        pop         rdx         ;; return address as exception RIP
823        jmp         RhpThrowHwEx ;; Throw the ThreadAbortException as a special kind of hardware exception
824
825@@:
826        popfq               ;; restore flags
827        pop         rcx     ;; discard ModuleHeader*
828        pop         rcx     ;; restore rcx
829        ret
830
831NESTED_END RhpLoopHijack, _TEXT
832
833;; Trap to GC.
834;; Set up the P/Invoke transition frame with the return address as the safe point.
835;; All registers, both volatile and non-volatile, are preserved.
836;; The function should be called not jumped because it's expecting the return address
837NESTED_ENTRY RhpTrapToGC, _TEXT
838
839    sizeof_OutgoingScratchSpace equ 20h
840    sizeof_PInvokeFrame         equ OFFSETOF__PInvokeTransitionFrame__m_PreservedRegs + 15*8
841    sizeof_XmmAlignPad          equ 8
842    sizeof_XmmSave              equ FXSAVE_SIZE
843    sizeof_MachineFrame         equ 6*8
844    sizeof_InitialPushedArgs    equ 3*8             ;; eflags, rcx, return value
845    sizeof_FixedFrame           equ sizeof_OutgoingScratchSpace + sizeof_PInvokeFrame + sizeof_XmmAlignPad + sizeof_XmmSave + sizeof_MachineFrame
846
847        ;; On the stack on entry:
848        ;;   [rsp     ]  -> Return address
849
850        ;; Prepare for our return by stashing a scratch register where we can pop it just before returning
851        ;; The scratch register will be used as PSP in the epilog
852        push        rcx
853
854        ;; save eflags before we trash them
855        pushfq
856
857        ;; What we want to get to:
858        ;;
859        ;;   [rsp     ]  -> outgoing scratch area
860        ;;
861        ;;   [rsp + 20]  -> m_RIP                           -------|
862        ;;   [rsp + 28]  -> m_FramePointer                         |
863        ;;   [rsp + 30]  -> m_pThread                              |
864        ;;   [rsp + 38]  -> m_Flags / m_dwAlignPad2                |
865        ;;   [rsp + 40]  -> rbx save                               |
866        ;;   [rsp + 48]  -> rsi save                               |
867        ;;   [rsp + 50]  -> rdi save                               |
868        ;;   [rsp + 58]  -> r12 save                               |
869        ;;   [rsp + 60]  -> r13 save                               |
870        ;;   [rsp + 68]  -> r14 save                               | PInvokeTransitionFrame
871        ;;   [rsp + 70]  -> r15 save                               |
872        ;;   [rsp + 78]  -> rsp save                               |
873        ;;   [rsp + 80]  -> rax save                               |
874        ;;   [rsp + 88]  -> rcx save                               |
875        ;;   [rsp + 90]  -> rdx save                               |
876        ;;   [rsp + 98]  -> r8 save                                |
877        ;;   [rsp + a0]  -> r9 save                                |
878        ;;   [rsp + a8]  -> r10 save                               |
879        ;;   [rsp + b0]  -> r11 save                        -------|
880        ;;
881        ;;   [rsp + b8]  -> [XmmAlignPad]
882        ;;
883        ;;   [rsp + c0]  -> FXSAVE area
884        ;;
885        ;;   [rsp +2c0]  | RIP      |
886        ;;   [rsp +2c8]  | CS       |
887        ;;   [rsp +2d0]  | EFLAGS   | <-- 'machine frame'
888        ;;   [rsp +2d8]  | RSP      |
889        ;;   [rsp +2e0]  | SS       |
890        ;;   [rsp +2e8]  | padding  |
891        ;;
892        ;;   [rsp +2f0]  [optional stack alignment]
893        ;;
894        ;;   [PSP - 18] -> eflags save
895        ;;   [PSP - 10] -> rcx save
896        ;;   [PSP -  8] -> Return address
897        ;;   [PSP]      -> caller's frame
898
899        test        rsp, 0Fh
900        jz          AlreadyAligned
901
902        sub         rsp, sizeof_XmmAlignPad + sizeof_XmmSave + sizeof_MachineFrame + 8  ; +8 to align RSP
903        push        r11                         ; save incoming R11 into save location
904        lea         r11, [rsp + 8 + sizeof_XmmAlignPad + sizeof_XmmSave + sizeof_MachineFrame + 8 + sizeof_InitialPushedArgs]
905        jmp         PspCalculated
906
907    AlreadyAligned:
908        sub         rsp, sizeof_XmmAlignPad + sizeof_XmmSave + sizeof_MachineFrame
909        push        r11                         ; save incoming R11 into save location
910        lea         r11, [rsp + 8 + sizeof_XmmAlignPad + sizeof_XmmSave + sizeof_MachineFrame + sizeof_InitialPushedArgs]
911
912    PspCalculated:
913        push        r10                         ; save incoming R10 into save location
914        xor         r10d, r10d
915
916        ;;
917        ;; Populate the 'machine frame' in the diagram above.  We have only pushed up to the 'r10 save', so we have not
918        ;; yet pushed 0xA8 bytes of that diagram.
919        ;;
920        ;; [rsp + {offset-in-target-frame-layout-diagram} - {as-yet-unpushed-stack-size}]
921        mov         [rsp + 2c0h - 0a8h], r10           ; init RIP to zero
922        mov         [rsp + 2c8h - 0a8h], r10           ; init CS to zero
923        mov         [rsp + 2d0h - 0a8h], r10           ; init EFLAGS to zero
924        mov         [rsp + 2d8h - 0a8h], r11           ; save PSP in the 'machine frame'
925        mov         [rsp + 2e0h - 0a8h], r10           ; init SS to zero
926
927        .pushframe
928        .allocstack sizeof_XmmAlignPad + sizeof_XmmSave + 2*8    ;; only 2 of the regs from the PInvokeTransitionFrame are on the stack
929
930        push_vol_reg    r9
931        push_vol_reg    r8
932        push_vol_reg    rdx
933        push_vol_reg    rcx
934        push_vol_reg    rax
935        push_vol_reg    r11         ; PSP gets saved into the PInvokeTransitionFrame
936        push_nonvol_reg r15
937        push_nonvol_reg r14
938        push_nonvol_reg r13
939        push_nonvol_reg r12
940        push_nonvol_reg rdi
941        push_nonvol_reg rsi
942        push_nonvol_reg rbx
943        push_vol_reg    PROBE_SAVE_FLAGS_EVERYTHING     ; m_Flags / m_dwAlignPad2
944
945        ;; rdx <- GetThread(), TRASHES rcx
946        INLINE_GETTHREAD rdx, rcx
947
948        push_vol_reg    rdx                             ; m_pThread
949        push_nonvol_reg rbp                             ; m_FramePointer
950        push_vol_reg    r10                             ; m_RIP
951
952        alloc_stack     sizeof_OutgoingScratchSpace
953        END_PROLOGUE
954
955        mov         rbx, r11    ; put PSP into RBX
956        mov         rsi, rdx    ; put Thread* into RSI
957
958        ; RBX is PSP
959        ; RSI is Thread*
960
961        fxsave      [rsp + 0c0h]
962
963        cmp         [g_fHasFastFxsave], 0   ; fast fxsave won't save the xmm registers, so we must do it
964        jz          DontSaveXmmAgain
965
966        ;; 0C0h -> offset of FXSAVE area
967        ;; 0A0h -> offset of xmm0 save area within the FXSAVE area
968        movdqa      [rsp + 0c0h + 0a0h +  0*10h], xmm0
969        movdqa      [rsp + 0c0h + 0a0h +  1*10h], xmm1
970        movdqa      [rsp + 0c0h + 0a0h +  2*10h], xmm2
971        movdqa      [rsp + 0c0h + 0a0h +  3*10h], xmm3
972        movdqa      [rsp + 0c0h + 0a0h +  4*10h], xmm4
973        movdqa      [rsp + 0c0h + 0a0h +  5*10h], xmm5
974        movdqa      [rsp + 0c0h + 0a0h +  6*10h], xmm6
975        movdqa      [rsp + 0c0h + 0a0h +  7*10h], xmm7
976        movdqa      [rsp + 0c0h + 0a0h +  8*10h], xmm8
977        movdqa      [rsp + 0c0h + 0a0h +  9*10h], xmm9
978        movdqa      [rsp + 0c0h + 0a0h + 10*10h], xmm10
979        movdqa      [rsp + 0c0h + 0a0h + 11*10h], xmm11
980        movdqa      [rsp + 0c0h + 0a0h + 12*10h], xmm12
981        movdqa      [rsp + 0c0h + 0a0h + 13*10h], xmm13
982        movdqa      [rsp + 0c0h + 0a0h + 14*10h], xmm14
983        movdqa      [rsp + 0c0h + 0a0h + 15*10h], xmm15
984
985DontSaveXmmAgain:
986        mov         rax, [rbx - 8]
987        mov         [rsp + 2c0h], rax       ; save return address into 'machine frame'
988        mov         [rsp +  20h], rax       ; save return address into PInvokeTransitionFrame
989
990        ; Early out if GC stress is currently suppressed. Do this after we have computed the real address to
991        ; return to but before we link the transition frame onto m_pHackPInvokeTunnel (because hitting this
992        ; condition implies we're running restricted callouts during a GC itself and we could end up
993        ; overwriting a co-op frame set by the code that caused the GC in the first place, e.g. a GC.Collect
994        ; call).
995        test        dword ptr [rsi + OFFSETOF__Thread__m_ThreadStateFlags], TSF_SuppressGcStress + TSF_DoNotTriggerGc
996        jnz         DoneWaitingForGc
997
998        ; link the frame into the Thread
999        lea         rcx, [rsp + sizeof_OutgoingScratchSpace]    ; rcx <- PInvokeTransitionFrame*
1000        mov         [rsi + OFFSETOF__Thread__m_pHackPInvokeTunnel], rcx
1001
1002        ;;
1003        ;; Unhijack this thread, if necessary.
1004        ;;
1005        INLINE_THREAD_UNHIJACK  rsi, rax, rcx       ;; trashes RAX, RCX
1006
1007ifdef FEATURE_GC_STRESS
1008        xor         eax, eax
1009        cmp         [g_fGcStressStarted], eax
1010        jz          @F
1011
1012        mov         rdx, [rsp + 2c0h]
1013        mov         rcx, [g_pTheRuntimeInstance]
1014        call        RuntimeInstance__ShouldHijackLoopForGcStress
1015        cmp         al, 0
1016        je          @F
1017
1018        call        REDHAWKGCINTERFACE__STRESSGC
1019@@:
1020endif ;; FEATURE_GC_STRESS
1021
1022        lea         rcx, [rsp + sizeof_OutgoingScratchSpace]    ; calculate PInvokeTransitionFrame pointer
1023        call        RhpWaitForGCNoAbort
1024
1025    DoneWaitingForGc:
1026        mov         rcx, rbx                ; RCX <- PSP
1027
1028        fxrstor     [rsp + 0c0h]
1029
1030        cmp         [g_fHasFastFxsave], 0
1031        jz          DontRestoreXmmAgain
1032
1033        movdqa      xmm0 , [rsp + 0c0h + 0a0h +  0*10h]
1034        movdqa      xmm1 , [rsp + 0c0h + 0a0h +  1*10h]
1035        movdqa      xmm2 , [rsp + 0c0h + 0a0h +  2*10h]
1036        movdqa      xmm3 , [rsp + 0c0h + 0a0h +  3*10h]
1037        movdqa      xmm4 , [rsp + 0c0h + 0a0h +  4*10h]
1038        movdqa      xmm5 , [rsp + 0c0h + 0a0h +  5*10h]
1039        movdqa      xmm6 , [rsp + 0c0h + 0a0h +  6*10h]
1040        movdqa      xmm7 , [rsp + 0c0h + 0a0h +  7*10h]
1041        movdqa      xmm8 , [rsp + 0c0h + 0a0h +  8*10h]
1042        movdqa      xmm9 , [rsp + 0c0h + 0a0h +  9*10h]
1043        movdqa      xmm10, [rsp + 0c0h + 0a0h + 10*10h]
1044        movdqa      xmm11, [rsp + 0c0h + 0a0h + 11*10h]
1045        movdqa      xmm12, [rsp + 0c0h + 0a0h + 12*10h]
1046        movdqa      xmm13, [rsp + 0c0h + 0a0h + 13*10h]
1047        movdqa      xmm14, [rsp + 0c0h + 0a0h + 14*10h]
1048        movdqa      xmm15, [rsp + 0c0h + 0a0h + 15*10h]
1049
1050DontRestoreXmmAgain:
1051        add         rsp, sizeof_OutgoingScratchSpace
1052        mov         eax, [rsp + OFFSETOF__PInvokeTransitionFrame__m_Flags]
1053        test        eax, PTFF_THREAD_ABORT
1054        pop         rax                     ; m_RIP
1055        pop         rbp                     ; m_FramePointer
1056        pop         rax                     ; m_pThread
1057        pop         rax                     ; m_Flags / m_dwAlign2
1058        pop         rbx
1059        pop         rsi
1060        pop         rdi
1061        pop         r12
1062        pop         r13
1063        pop         r14
1064        pop         r15
1065        pop         rax                     ; RSP
1066        pop         rax                     ; RAX save
1067        pop         rdx                     ; RCX save (intentionally discarding it)
1068        pop         rdx
1069        pop         r8
1070        pop         r9
1071        pop         r10
1072        pop         r11
1073
1074
1075        ;; RCX is PSP at this point and the stack looks like this:
1076        ;;   [PSP - 18] -> eflags save
1077        ;;   [PSP - 10] -> rcx save
1078        ;;   [PSP -  8] -> return address
1079        ;;   [PSP]      -> caller's frame
1080        ;;
1081        ;; The final step is to restore eflags, rcx, and return back to the loop target location.
1082
1083        lea         rsp, [rcx - 18h]
1084        jz          @f          ;; result of the test instruction before the pops above
1085        popfq                   ;; restore flags
1086        pop         rcx         ;; restore rcx
1087        mov         rcx, STATUS_REDHAWK_THREAD_ABORT
1088        pop         rdx         ;; return address as exception RIP
1089        jmp         RhpThrowHwEx ;; Throw the ThreadAbortException as a special kind of hardware exception
1090
1091@@:
1092        popfq               ;; restore flags
1093        pop         rcx     ;; restore rcx
1094        ret
1095
1096NESTED_END RhpTrapToGC, _TEXT
1097
1098ifdef FEATURE_GC_STRESS
1099;;
1100;; INVARIANT: Don't trash the argument registers, the binder codegen depends on this.
1101;;
1102LEAF_ENTRY RhpSuppressGcStress, _TEXT
1103
1104        INLINE_GETTHREAD    rax, r10
1105   lock or          dword ptr [rax + OFFSETOF__Thread__m_ThreadStateFlags], TSF_SuppressGcStress
1106        ret
1107
1108LEAF_END RhpSuppressGcStress, _TEXT
1109endif ;; FEATURE_GC_STRESS
1110
1111        end
1112