1-----------------------------------------------------------------------------
2--
3-- Stg to C--: heap management functions
4--
5-- (c) The University of Glasgow 2004-2006
6--
7-----------------------------------------------------------------------------
8
9module GHC.StgToCmm.Heap (
10        getVirtHp, setVirtHp, setRealHp,
11        getHpRelOffset,
12
13        entryHeapCheck, altHeapCheck, noEscapeHeapCheck, altHeapCheckReturnsTo,
14        heapStackCheckGen,
15        entryHeapCheck',
16
17        mkStaticClosureFields, mkStaticClosure,
18
19        allocDynClosure, allocDynClosureCmm, allocHeapClosure,
20        emitSetDynHdr
21    ) where
22
23import GhcPrelude hiding ((<*>))
24
25import StgSyn
26import CLabel
27import GHC.StgToCmm.Layout
28import GHC.StgToCmm.Utils
29import GHC.StgToCmm.Monad
30import GHC.StgToCmm.Prof (profDynAlloc, dynProfHdr, staticProfHdr)
31import GHC.StgToCmm.Ticky
32import GHC.StgToCmm.Closure
33import GHC.StgToCmm.Env
34
35import MkGraph
36
37import Hoopl.Label
38import SMRep
39import BlockId
40import Cmm
41import CmmUtils
42import CostCentre
43import IdInfo( CafInfo(..), mayHaveCafRefs )
44import Id ( Id )
45import Module
46import DynFlags
47import FastString( mkFastString, fsLit )
48import Panic( sorry )
49
50import Control.Monad (when)
51import Data.Maybe (isJust)
52
53-----------------------------------------------------------
54--              Initialise dynamic heap objects
55-----------------------------------------------------------
56
57allocDynClosure
58        :: Maybe Id
59        -> CmmInfoTable
60        -> LambdaFormInfo
61        -> CmmExpr              -- Cost Centre to stick in the object
62        -> CmmExpr              -- Cost Centre to blame for this alloc
63                                -- (usually the same; sometimes "OVERHEAD")
64
65        -> [(NonVoid StgArg, VirtualHpOffset)]  -- Offsets from start of object
66                                                -- ie Info ptr has offset zero.
67                                                -- No void args in here
68        -> FCode CmmExpr -- returns Hp+n
69
70allocDynClosureCmm
71        :: Maybe Id -> CmmInfoTable -> LambdaFormInfo -> CmmExpr -> CmmExpr
72        -> [(CmmExpr, ByteOff)]
73        -> FCode CmmExpr -- returns Hp+n
74
75-- allocDynClosure allocates the thing in the heap,
76-- and modifies the virtual Hp to account for this.
77-- The second return value is the graph that sets the value of the
78-- returned LocalReg, which should point to the closure after executing
79-- the graph.
80
81-- allocDynClosure returns an (Hp+8) CmmExpr, and hence the result is
82-- only valid until Hp is changed.  The caller should assign the
83-- result to a LocalReg if it is required to remain live.
84--
85-- The reason we don't assign it to a LocalReg here is that the caller
86-- is often about to call regIdInfo, which immediately assigns the
87-- result of allocDynClosure to a new temp in order to add the tag.
88-- So by not generating a LocalReg here we avoid a common source of
89-- new temporaries and save some compile time.  This can be quite
90-- significant - see test T4801.
91
92
93allocDynClosure mb_id info_tbl lf_info use_cc _blame_cc args_w_offsets = do
94  let (args, offsets) = unzip args_w_offsets
95  cmm_args <- mapM getArgAmode args     -- No void args
96  allocDynClosureCmm mb_id info_tbl lf_info
97                     use_cc _blame_cc (zip cmm_args offsets)
98
99
100allocDynClosureCmm mb_id info_tbl lf_info use_cc _blame_cc amodes_w_offsets = do
101  -- SAY WHAT WE ARE ABOUT TO DO
102  let rep = cit_rep info_tbl
103  tickyDynAlloc mb_id rep lf_info
104  let info_ptr = CmmLit (CmmLabel (cit_lbl info_tbl))
105  allocHeapClosure rep info_ptr use_cc amodes_w_offsets
106
107
108-- | Low-level heap object allocation.
109allocHeapClosure
110  :: SMRep                            -- ^ representation of the object
111  -> CmmExpr                          -- ^ info pointer
112  -> CmmExpr                          -- ^ cost centre
113  -> [(CmmExpr,ByteOff)]              -- ^ payload
114  -> FCode CmmExpr                    -- ^ returns the address of the object
115allocHeapClosure rep info_ptr use_cc payload = do
116  profDynAlloc rep use_cc
117
118  virt_hp <- getVirtHp
119
120  -- Find the offset of the info-ptr word
121  let info_offset = virt_hp + 1
122            -- info_offset is the VirtualHpOffset of the first
123            -- word of the new object
124            -- Remember, virtHp points to last allocated word,
125            -- ie 1 *before* the info-ptr word of new object.
126
127  base <- getHpRelOffset info_offset
128  emitComment $ mkFastString "allocHeapClosure"
129  emitSetDynHdr base info_ptr use_cc
130
131  -- Fill in the fields
132  hpStore base payload
133
134  -- Bump the virtual heap pointer
135  dflags <- getDynFlags
136  setVirtHp (virt_hp + heapClosureSizeW dflags rep)
137
138  return base
139
140
141emitSetDynHdr :: CmmExpr -> CmmExpr -> CmmExpr -> FCode ()
142emitSetDynHdr base info_ptr ccs
143  = do dflags <- getDynFlags
144       hpStore base (zip (header dflags) [0, wORD_SIZE dflags ..])
145  where
146    header :: DynFlags -> [CmmExpr]
147    header dflags = [info_ptr] ++ dynProfHdr dflags ccs
148        -- ToDo: Parallel stuff
149        -- No ticky header
150
151-- Store the item (expr,off) in base[off]
152hpStore :: CmmExpr -> [(CmmExpr, ByteOff)] -> FCode ()
153hpStore base vals = do
154  dflags <- getDynFlags
155  sequence_ $
156    [ emitStore (cmmOffsetB dflags base off) val | (val,off) <- vals ]
157
158-----------------------------------------------------------
159--              Layout of static closures
160-----------------------------------------------------------
161
162-- Make a static closure, adding on any extra padding needed for CAFs,
163-- and adding a static link field if necessary.
164
165mkStaticClosureFields
166        :: DynFlags
167        -> CmmInfoTable
168        -> CostCentreStack
169        -> CafInfo
170        -> [CmmLit]             -- Payload
171        -> [CmmLit]             -- The full closure
172mkStaticClosureFields dflags info_tbl ccs caf_refs payload
173  = mkStaticClosure dflags info_lbl ccs payload padding
174        static_link_field saved_info_field
175  where
176    info_lbl = cit_lbl info_tbl
177
178    -- CAFs must have consistent layout, regardless of whether they
179    -- are actually updatable or not.  The layout of a CAF is:
180    --
181    --        3 saved_info
182    --        2 static_link
183    --        1 indirectee
184    --        0 info ptr
185    --
186    -- the static_link and saved_info fields must always be in the
187    -- same place.  So we use isThunkRep rather than closureUpdReqd
188    -- here:
189
190    is_caf = isThunkRep (cit_rep info_tbl)
191
192    padding
193        | is_caf && null payload = [mkIntCLit dflags 0]
194        | otherwise = []
195
196    static_link_field
197        | is_caf || staticClosureNeedsLink (mayHaveCafRefs caf_refs) info_tbl
198        = [static_link_value]
199        | otherwise
200        = []
201
202    saved_info_field
203        | is_caf     = [mkIntCLit dflags 0]
204        | otherwise  = []
205
206        -- For a static constructor which has NoCafRefs, we set the
207        -- static link field to a non-zero value so the garbage
208        -- collector will ignore it.
209    static_link_value
210        | mayHaveCafRefs caf_refs  = mkIntCLit dflags 0
211        | otherwise                = mkIntCLit dflags 3  -- No CAF refs
212                                      -- See Note [STATIC_LINK fields]
213                                      -- in rts/sm/Storage.h
214
215mkStaticClosure :: DynFlags -> CLabel -> CostCentreStack -> [CmmLit]
216  -> [CmmLit] -> [CmmLit] -> [CmmLit] -> [CmmLit]
217mkStaticClosure dflags info_lbl ccs payload padding static_link_field saved_info_field
218  =  [CmmLabel info_lbl]
219  ++ staticProfHdr dflags ccs
220  ++ payload
221  ++ padding
222  ++ static_link_field
223  ++ saved_info_field
224
225-----------------------------------------------------------
226--              Heap overflow checking
227-----------------------------------------------------------
228
229{- Note [Heap checks]
230   ~~~~~~~~~~~~~~~~~~
231Heap checks come in various forms.  We provide the following entry
232points to the runtime system, all of which use the native C-- entry
233convention.
234
235  * gc() performs garbage collection and returns
236    nothing to its caller
237
238  * A series of canned entry points like
239        r = gc_1p( r )
240    where r is a pointer.  This performs gc, and
241    then returns its argument r to its caller.
242
243  * A series of canned entry points like
244        gcfun_2p( f, x, y )
245    where f is a function closure of arity 2
246    This performs garbage collection, keeping alive the
247    three argument ptrs, and then tail-calls f(x,y)
248
249These are used in the following circumstances
250
251* entryHeapCheck: Function entry
252    (a) With a canned GC entry sequence
253        f( f_clo, x:ptr, y:ptr ) {
254             Hp = Hp+8
255             if Hp > HpLim goto L
256             ...
257          L: HpAlloc = 8
258             jump gcfun_2p( f_clo, x, y ) }
259     Note the tail call to the garbage collector;
260     it should do no register shuffling
261
262    (b) No canned sequence
263        f( f_clo, x:ptr, y:ptr, ...etc... ) {
264          T: Hp = Hp+8
265             if Hp > HpLim goto L
266             ...
267          L: HpAlloc = 8
268             call gc()  -- Needs an info table
269             goto T }
270
271* altHeapCheck: Immediately following an eval
272  Started as
273        case f x y of r { (p,q) -> rhs }
274  (a) With a canned sequence for the results of f
275       (which is the very common case since
276       all boxed cases return just one pointer
277           ...
278           r = f( x, y )
279        K:      -- K needs an info table
280           Hp = Hp+8
281           if Hp > HpLim goto L
282           ...code for rhs...
283
284        L: r = gc_1p( r )
285           goto K }
286
287        Here, the info table needed by the call
288        to gc_1p should be the *same* as the
289        one for the call to f; the C-- optimiser
290        spots this sharing opportunity)
291
292   (b) No canned sequence for results of f
293       Note second info table
294           ...
295           (r1,r2,r3) = call f( x, y )
296        K:
297           Hp = Hp+8
298           if Hp > HpLim goto L
299           ...code for rhs...
300
301        L: call gc()    -- Extra info table here
302           goto K
303
304* generalHeapCheck: Anywhere else
305  e.g. entry to thunk
306       case branch *not* following eval,
307       or let-no-escape
308  Exactly the same as the previous case:
309
310        K:      -- K needs an info table
311           Hp = Hp+8
312           if Hp > HpLim goto L
313           ...
314
315        L: call gc()
316           goto K
317-}
318
319--------------------------------------------------------------
320-- A heap/stack check at a function or thunk entry point.
321
322entryHeapCheck :: ClosureInfo
323               -> Maybe LocalReg -- Function (closure environment)
324               -> Int            -- Arity -- not same as len args b/c of voids
325               -> [LocalReg]     -- Non-void args (empty for thunk)
326               -> FCode ()
327               -> FCode ()
328
329entryHeapCheck cl_info nodeSet arity args code
330  = entryHeapCheck' is_fastf node arity args code
331  where
332    node = case nodeSet of
333              Just r  -> CmmReg (CmmLocal r)
334              Nothing -> CmmLit (CmmLabel $ staticClosureLabel cl_info)
335
336    is_fastf = case closureFunInfo cl_info of
337                 Just (_, ArgGen _) -> False
338                 _otherwise         -> True
339
340-- | lower-level version for CmmParse
341entryHeapCheck' :: Bool           -- is a known function pattern
342                -> CmmExpr        -- expression for the closure pointer
343                -> Int            -- Arity -- not same as len args b/c of voids
344                -> [LocalReg]     -- Non-void args (empty for thunk)
345                -> FCode ()
346                -> FCode ()
347entryHeapCheck' is_fastf node arity args code
348  = do dflags <- getDynFlags
349       let is_thunk = arity == 0
350
351           args' = map (CmmReg . CmmLocal) args
352           stg_gc_fun    = CmmReg (CmmGlobal GCFun)
353           stg_gc_enter1 = CmmReg (CmmGlobal GCEnter1)
354
355           {- Thunks:          jump stg_gc_enter_1
356
357              Function (fast): call (NativeNode) stg_gc_fun(fun, args)
358
359              Function (slow): call (slow) stg_gc_fun(fun, args)
360           -}
361           gc_call upd
362               | is_thunk
363                 = mkJump dflags NativeNodeCall stg_gc_enter1 [node] upd
364
365               | is_fastf
366                 = mkJump dflags NativeNodeCall stg_gc_fun (node : args') upd
367
368               | otherwise
369                 = mkJump dflags Slow stg_gc_fun (node : args') upd
370
371       updfr_sz <- getUpdFrameOff
372
373       loop_id <- newBlockId
374       emitLabel loop_id
375       heapCheck True True (gc_call updfr_sz <*> mkBranch loop_id) code
376
377-- ------------------------------------------------------------
378-- A heap/stack check in a case alternative
379
380
381-- If there are multiple alts and we need to GC, but don't have a
382-- continuation already (the scrut was simple), then we should
383-- pre-generate the continuation.  (if there are multiple alts it is
384-- always a canned GC point).
385
386-- altHeapCheck:
387-- If we have a return continuation,
388--   then if it is a canned GC pattern,
389--           then we do mkJumpReturnsTo
390--           else we do a normal call to stg_gc_noregs
391--   else if it is a canned GC pattern,
392--           then generate the continuation and do mkCallReturnsTo
393--           else we do a normal call to stg_gc_noregs
394
395altHeapCheck :: [LocalReg] -> FCode a -> FCode a
396altHeapCheck regs code = altOrNoEscapeHeapCheck False regs code
397
398altOrNoEscapeHeapCheck :: Bool -> [LocalReg] -> FCode a -> FCode a
399altOrNoEscapeHeapCheck checkYield regs code = do
400    dflags <- getDynFlags
401    case cannedGCEntryPoint dflags regs of
402      Nothing -> genericGC checkYield code
403      Just gc -> do
404        lret <- newBlockId
405        let (off, _, copyin) = copyInOflow dflags NativeReturn (Young lret) regs []
406        lcont <- newBlockId
407        tscope <- getTickScope
408        emitOutOfLine lret (copyin <*> mkBranch lcont, tscope)
409        emitLabel lcont
410        cannedGCReturnsTo checkYield False gc regs lret off code
411
412altHeapCheckReturnsTo :: [LocalReg] -> Label -> ByteOff -> FCode a -> FCode a
413altHeapCheckReturnsTo regs lret off code
414  = do dflags <- getDynFlags
415       case cannedGCEntryPoint dflags regs of
416           Nothing -> genericGC False code
417           Just gc -> cannedGCReturnsTo False True gc regs lret off code
418
419-- noEscapeHeapCheck is implemented identically to altHeapCheck (which
420-- is more efficient), but cannot be optimized away in the non-allocating
421-- case because it may occur in a loop
422noEscapeHeapCheck :: [LocalReg] -> FCode a -> FCode a
423noEscapeHeapCheck regs code = altOrNoEscapeHeapCheck True regs code
424
425cannedGCReturnsTo :: Bool -> Bool -> CmmExpr -> [LocalReg] -> Label -> ByteOff
426                  -> FCode a
427                  -> FCode a
428cannedGCReturnsTo checkYield cont_on_stack gc regs lret off code
429  = do dflags <- getDynFlags
430       updfr_sz <- getUpdFrameOff
431       heapCheck False checkYield (gc_call dflags gc updfr_sz) code
432  where
433    reg_exprs = map (CmmReg . CmmLocal) regs
434      -- Note [stg_gc arguments]
435
436      -- NB. we use the NativeReturn convention for passing arguments
437      -- to the canned heap-check routines, because we are in a case
438      -- alternative and hence the [LocalReg] was passed to us in the
439      -- NativeReturn convention.
440    gc_call dflags label sp
441      | cont_on_stack
442      = mkJumpReturnsTo dflags label NativeReturn reg_exprs lret off sp
443      | otherwise
444      = mkCallReturnsTo dflags label NativeReturn reg_exprs lret off sp []
445
446genericGC :: Bool -> FCode a -> FCode a
447genericGC checkYield code
448  = do updfr_sz <- getUpdFrameOff
449       lretry <- newBlockId
450       emitLabel lretry
451       call <- mkCall generic_gc (GC, GC) [] [] updfr_sz []
452       heapCheck False checkYield (call <*> mkBranch lretry) code
453
454cannedGCEntryPoint :: DynFlags -> [LocalReg] -> Maybe CmmExpr
455cannedGCEntryPoint dflags regs
456  = case map localRegType regs of
457      []  -> Just (mkGcLabel "stg_gc_noregs")
458      [ty]
459          | isGcPtrType ty -> Just (mkGcLabel "stg_gc_unpt_r1")
460          | isFloatType ty -> case width of
461                                  W32       -> Just (mkGcLabel "stg_gc_f1")
462                                  W64       -> Just (mkGcLabel "stg_gc_d1")
463                                  _         -> Nothing
464
465          | width == wordWidth dflags -> Just (mkGcLabel "stg_gc_unbx_r1")
466          | width == W64              -> Just (mkGcLabel "stg_gc_l1")
467          | otherwise                 -> Nothing
468          where
469              width = typeWidth ty
470      [ty1,ty2]
471          |  isGcPtrType ty1
472          && isGcPtrType ty2 -> Just (mkGcLabel "stg_gc_pp")
473      [ty1,ty2,ty3]
474          |  isGcPtrType ty1
475          && isGcPtrType ty2
476          && isGcPtrType ty3 -> Just (mkGcLabel "stg_gc_ppp")
477      [ty1,ty2,ty3,ty4]
478          |  isGcPtrType ty1
479          && isGcPtrType ty2
480          && isGcPtrType ty3
481          && isGcPtrType ty4 -> Just (mkGcLabel "stg_gc_pppp")
482      _otherwise -> Nothing
483
484-- Note [stg_gc arguments]
485-- It might seem that we could avoid passing the arguments to the
486-- stg_gc function, because they are already in the right registers.
487-- While this is usually the case, it isn't always.  Sometimes the
488-- code generator has cleverly avoided the eval in a case, e.g. in
489-- ffi/should_run/4221.hs we found
490--
491--   case a_r1mb of z
492--     FunPtr x y -> ...
493--
494-- where a_r1mb is bound a top-level constructor, and is known to be
495-- evaluated.  The codegen just assigns x, y and z, and continues;
496-- R1 is never assigned.
497--
498-- So we'll have to rely on optimisations to eliminatethese
499-- assignments where possible.
500
501
502-- | The generic GC procedure; no params, no results
503generic_gc :: CmmExpr
504generic_gc = mkGcLabel "stg_gc_noregs"
505
506-- | Create a CLabel for calling a garbage collector entry point
507mkGcLabel :: String -> CmmExpr
508mkGcLabel s = CmmLit (CmmLabel (mkCmmCodeLabel rtsUnitId (fsLit s)))
509
510-------------------------------
511heapCheck :: Bool -> Bool -> CmmAGraph -> FCode a -> FCode a
512heapCheck checkStack checkYield do_gc code
513  = getHeapUsage $ \ hpHw ->
514    -- Emit heap checks, but be sure to do it lazily so
515    -- that the conditionals on hpHw don't cause a black hole
516    do  { dflags <- getDynFlags
517        ; let mb_alloc_bytes
518                 | hpHw > mBLOCK_SIZE = sorry $ unlines
519                    [" Trying to allocate more than "++show mBLOCK_SIZE++" bytes.",
520                     "",
521                     "This is currently not possible due to a limitation of GHC's code generator.",
522                     "See https://gitlab.haskell.org/ghc/ghc/issues/4505 for details.",
523                     "Suggestion: read data from a file instead of having large static data",
524                     "structures in code."]
525                 | hpHw > 0  = Just (mkIntExpr dflags (hpHw * (wORD_SIZE dflags)))
526                 | otherwise = Nothing
527                 where mBLOCK_SIZE = bLOCKS_PER_MBLOCK dflags * bLOCK_SIZE_W dflags
528              stk_hwm | checkStack = Just (CmmLit CmmHighStackMark)
529                      | otherwise  = Nothing
530        ; codeOnly $ do_checks stk_hwm checkYield mb_alloc_bytes do_gc
531        ; tickyAllocHeap True hpHw
532        ; setRealHp hpHw
533        ; code }
534
535heapStackCheckGen :: Maybe CmmExpr -> Maybe CmmExpr -> FCode ()
536heapStackCheckGen stk_hwm mb_bytes
537  = do updfr_sz <- getUpdFrameOff
538       lretry <- newBlockId
539       emitLabel lretry
540       call <- mkCall generic_gc (GC, GC) [] [] updfr_sz []
541       do_checks stk_hwm False mb_bytes (call <*> mkBranch lretry)
542
543-- Note [Single stack check]
544-- ~~~~~~~~~~~~~~~~~~~~~~~~~
545-- When compiling a function we can determine how much stack space it
546-- will use. We therefore need to perform only a single stack check at
547-- the beginning of a function to see if we have enough stack space.
548--
549-- The check boils down to comparing Sp-N with SpLim, where N is the
550-- amount of stack space needed (see Note [Stack usage] below).  *BUT*
551-- at this stage of the pipeline we are not supposed to refer to Sp
552-- itself, because the stack is not yet manifest, so we don't quite
553-- know where Sp pointing.
554
555-- So instead of referring directly to Sp - as we used to do in the
556-- past - the code generator uses (old + 0) in the stack check. That
557-- is the address of the first word of the old area, so if we add N
558-- we'll get the address of highest used word.
559--
560-- This makes the check robust.  For example, while we need to perform
561-- only one stack check for each function, we could in theory place
562-- more stack checks later in the function. They would be redundant,
563-- but not incorrect (in a sense that they should not change program
564-- behaviour). We need to make sure however that a stack check
565-- inserted after incrementing the stack pointer checks for a
566-- respectively smaller stack space. This would not be the case if the
567-- code generator produced direct references to Sp. By referencing
568-- (old + 0) we make sure that we always check for a correct amount of
569-- stack: when converting (old + 0) to Sp the stack layout phase takes
570-- into account changes already made to stack pointer. The idea for
571-- this change came from observations made while debugging #8275.
572
573-- Note [Stack usage]
574-- ~~~~~~~~~~~~~~~~~~
575-- At the moment we convert from STG to Cmm we don't know N, the
576-- number of bytes of stack that the function will use, so we use a
577-- special late-bound CmmLit, namely
578--       CmmHighStackMark
579-- to stand for the number of bytes needed. When the stack is made
580-- manifest, the number of bytes needed is calculated, and used to
581-- replace occurrences of CmmHighStackMark
582--
583-- The (Maybe CmmExpr) passed to do_checks is usually
584--     Just (CmmLit CmmHighStackMark)
585-- but can also (in certain hand-written RTS functions)
586--     Just (CmmLit 8)  or some other fixed valuet
587-- If it is Nothing, we don't generate a stack check at all.
588
589do_checks :: Maybe CmmExpr    -- Should we check the stack?
590                              -- See Note [Stack usage]
591          -> Bool             -- Should we check for preemption?
592          -> Maybe CmmExpr    -- Heap headroom (bytes)
593          -> CmmAGraph        -- What to do on failure
594          -> FCode ()
595do_checks mb_stk_hwm checkYield mb_alloc_lit do_gc = do
596  dflags <- getDynFlags
597  gc_id <- newBlockId
598
599  let
600    Just alloc_lit = mb_alloc_lit
601
602    bump_hp   = cmmOffsetExprB dflags hpExpr alloc_lit
603
604    -- Sp overflow if ((old + 0) - CmmHighStack < SpLim)
605    -- At the beginning of a function old + 0 = Sp
606    -- See Note [Single stack check]
607    sp_oflo sp_hwm =
608         CmmMachOp (mo_wordULt dflags)
609                  [CmmMachOp (MO_Sub (typeWidth (cmmRegType dflags spReg)))
610                             [CmmStackSlot Old 0, sp_hwm],
611                   CmmReg spLimReg]
612
613    -- Hp overflow if (Hp > HpLim)
614    -- (Hp has been incremented by now)
615    -- HpLim points to the LAST WORD of valid allocation space.
616    hp_oflo = CmmMachOp (mo_wordUGt dflags) [hpExpr, hpLimExpr]
617
618    alloc_n = mkAssign hpAllocReg alloc_lit
619
620  case mb_stk_hwm of
621    Nothing -> return ()
622    Just stk_hwm -> tickyStackCheck
623      >> (emit =<< mkCmmIfGoto' (sp_oflo stk_hwm) gc_id (Just False) )
624
625  -- Emit new label that might potentially be a header
626  -- of a self-recursive tail call.
627  -- See Note [Self-recursive loop header].
628  self_loop_info <- getSelfLoop
629  case self_loop_info of
630    Just (_, loop_header_id, _)
631        | checkYield && isJust mb_stk_hwm -> emitLabel loop_header_id
632    _otherwise -> return ()
633
634  if (isJust mb_alloc_lit)
635    then do
636     tickyHeapCheck
637     emitAssign hpReg bump_hp
638     emit =<< mkCmmIfThen' hp_oflo (alloc_n <*> mkBranch gc_id) (Just False)
639    else do
640      when (checkYield && not (gopt Opt_OmitYields dflags)) $ do
641         -- Yielding if HpLim == 0
642         let yielding = CmmMachOp (mo_wordEq dflags)
643                                  [CmmReg hpLimReg,
644                                   CmmLit (zeroCLit dflags)]
645         emit =<< mkCmmIfGoto' yielding gc_id (Just False)
646
647  tscope <- getTickScope
648  emitOutOfLine gc_id
649   (do_gc, tscope) -- this is expected to jump back somewhere
650
651                -- Test for stack pointer exhaustion, then
652                -- bump heap pointer, and test for heap exhaustion
653                -- Note that we don't move the heap pointer unless the
654                -- stack check succeeds.  Otherwise we might end up
655                -- with slop at the end of the current block, which can
656                -- confuse the LDV profiler.
657
658-- Note [Self-recursive loop header]
659-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
660--
661-- Self-recursive loop header is required by loopification optimization (See
662-- Note [Self-recursive tail calls] in GHC.StgToCmm.Expr). We emit it if:
663--
664--  1. There is information about self-loop in the FCode environment. We don't
665--     check the binder (first component of the self_loop_info) because we are
666--     certain that if the self-loop info is present then we are compiling the
667--     binder body. Reason: the only possible way to get here with the
668--     self_loop_info present is from closureCodeBody.
669--
670--  2. checkYield && isJust mb_stk_hwm. checkYield tells us that it is possible
671--     to preempt the heap check (see #367 for motivation behind this check). It
672--     is True for heap checks placed at the entry to a function and
673--     let-no-escape heap checks but false for other heap checks (eg. in case
674--     alternatives or created from hand-written high-level Cmm). The second
675--     check (isJust mb_stk_hwm) is true for heap checks at the entry to a
676--     function and some heap checks created in hand-written Cmm. Otherwise it
677--     is Nothing. In other words the only situation when both conditions are
678--     true is when compiling stack and heap checks at the entry to a
679--     function. This is the only situation when we want to emit a self-loop
680--     label.
681