1----------------------------------------------------------------------------- 2-- 3-- Stg to C--: heap management functions 4-- 5-- (c) The University of Glasgow 2004-2006 6-- 7----------------------------------------------------------------------------- 8 9module GHC.StgToCmm.Heap ( 10 getVirtHp, setVirtHp, setRealHp, 11 getHpRelOffset, 12 13 entryHeapCheck, altHeapCheck, noEscapeHeapCheck, altHeapCheckReturnsTo, 14 heapStackCheckGen, 15 entryHeapCheck', 16 17 mkStaticClosureFields, mkStaticClosure, 18 19 allocDynClosure, allocDynClosureCmm, allocHeapClosure, 20 emitSetDynHdr 21 ) where 22 23import GhcPrelude hiding ((<*>)) 24 25import StgSyn 26import CLabel 27import GHC.StgToCmm.Layout 28import GHC.StgToCmm.Utils 29import GHC.StgToCmm.Monad 30import GHC.StgToCmm.Prof (profDynAlloc, dynProfHdr, staticProfHdr) 31import GHC.StgToCmm.Ticky 32import GHC.StgToCmm.Closure 33import GHC.StgToCmm.Env 34 35import MkGraph 36 37import Hoopl.Label 38import SMRep 39import BlockId 40import Cmm 41import CmmUtils 42import CostCentre 43import IdInfo( CafInfo(..), mayHaveCafRefs ) 44import Id ( Id ) 45import Module 46import DynFlags 47import FastString( mkFastString, fsLit ) 48import Panic( sorry ) 49 50import Control.Monad (when) 51import Data.Maybe (isJust) 52 53----------------------------------------------------------- 54-- Initialise dynamic heap objects 55----------------------------------------------------------- 56 57allocDynClosure 58 :: Maybe Id 59 -> CmmInfoTable 60 -> LambdaFormInfo 61 -> CmmExpr -- Cost Centre to stick in the object 62 -> CmmExpr -- Cost Centre to blame for this alloc 63 -- (usually the same; sometimes "OVERHEAD") 64 65 -> [(NonVoid StgArg, VirtualHpOffset)] -- Offsets from start of object 66 -- ie Info ptr has offset zero. 67 -- No void args in here 68 -> FCode CmmExpr -- returns Hp+n 69 70allocDynClosureCmm 71 :: Maybe Id -> CmmInfoTable -> LambdaFormInfo -> CmmExpr -> CmmExpr 72 -> [(CmmExpr, ByteOff)] 73 -> FCode CmmExpr -- returns Hp+n 74 75-- allocDynClosure allocates the thing in the heap, 76-- and modifies the virtual Hp to account for this. 77-- The second return value is the graph that sets the value of the 78-- returned LocalReg, which should point to the closure after executing 79-- the graph. 80 81-- allocDynClosure returns an (Hp+8) CmmExpr, and hence the result is 82-- only valid until Hp is changed. The caller should assign the 83-- result to a LocalReg if it is required to remain live. 84-- 85-- The reason we don't assign it to a LocalReg here is that the caller 86-- is often about to call regIdInfo, which immediately assigns the 87-- result of allocDynClosure to a new temp in order to add the tag. 88-- So by not generating a LocalReg here we avoid a common source of 89-- new temporaries and save some compile time. This can be quite 90-- significant - see test T4801. 91 92 93allocDynClosure mb_id info_tbl lf_info use_cc _blame_cc args_w_offsets = do 94 let (args, offsets) = unzip args_w_offsets 95 cmm_args <- mapM getArgAmode args -- No void args 96 allocDynClosureCmm mb_id info_tbl lf_info 97 use_cc _blame_cc (zip cmm_args offsets) 98 99 100allocDynClosureCmm mb_id info_tbl lf_info use_cc _blame_cc amodes_w_offsets = do 101 -- SAY WHAT WE ARE ABOUT TO DO 102 let rep = cit_rep info_tbl 103 tickyDynAlloc mb_id rep lf_info 104 let info_ptr = CmmLit (CmmLabel (cit_lbl info_tbl)) 105 allocHeapClosure rep info_ptr use_cc amodes_w_offsets 106 107 108-- | Low-level heap object allocation. 109allocHeapClosure 110 :: SMRep -- ^ representation of the object 111 -> CmmExpr -- ^ info pointer 112 -> CmmExpr -- ^ cost centre 113 -> [(CmmExpr,ByteOff)] -- ^ payload 114 -> FCode CmmExpr -- ^ returns the address of the object 115allocHeapClosure rep info_ptr use_cc payload = do 116 profDynAlloc rep use_cc 117 118 virt_hp <- getVirtHp 119 120 -- Find the offset of the info-ptr word 121 let info_offset = virt_hp + 1 122 -- info_offset is the VirtualHpOffset of the first 123 -- word of the new object 124 -- Remember, virtHp points to last allocated word, 125 -- ie 1 *before* the info-ptr word of new object. 126 127 base <- getHpRelOffset info_offset 128 emitComment $ mkFastString "allocHeapClosure" 129 emitSetDynHdr base info_ptr use_cc 130 131 -- Fill in the fields 132 hpStore base payload 133 134 -- Bump the virtual heap pointer 135 dflags <- getDynFlags 136 setVirtHp (virt_hp + heapClosureSizeW dflags rep) 137 138 return base 139 140 141emitSetDynHdr :: CmmExpr -> CmmExpr -> CmmExpr -> FCode () 142emitSetDynHdr base info_ptr ccs 143 = do dflags <- getDynFlags 144 hpStore base (zip (header dflags) [0, wORD_SIZE dflags ..]) 145 where 146 header :: DynFlags -> [CmmExpr] 147 header dflags = [info_ptr] ++ dynProfHdr dflags ccs 148 -- ToDo: Parallel stuff 149 -- No ticky header 150 151-- Store the item (expr,off) in base[off] 152hpStore :: CmmExpr -> [(CmmExpr, ByteOff)] -> FCode () 153hpStore base vals = do 154 dflags <- getDynFlags 155 sequence_ $ 156 [ emitStore (cmmOffsetB dflags base off) val | (val,off) <- vals ] 157 158----------------------------------------------------------- 159-- Layout of static closures 160----------------------------------------------------------- 161 162-- Make a static closure, adding on any extra padding needed for CAFs, 163-- and adding a static link field if necessary. 164 165mkStaticClosureFields 166 :: DynFlags 167 -> CmmInfoTable 168 -> CostCentreStack 169 -> CafInfo 170 -> [CmmLit] -- Payload 171 -> [CmmLit] -- The full closure 172mkStaticClosureFields dflags info_tbl ccs caf_refs payload 173 = mkStaticClosure dflags info_lbl ccs payload padding 174 static_link_field saved_info_field 175 where 176 info_lbl = cit_lbl info_tbl 177 178 -- CAFs must have consistent layout, regardless of whether they 179 -- are actually updatable or not. The layout of a CAF is: 180 -- 181 -- 3 saved_info 182 -- 2 static_link 183 -- 1 indirectee 184 -- 0 info ptr 185 -- 186 -- the static_link and saved_info fields must always be in the 187 -- same place. So we use isThunkRep rather than closureUpdReqd 188 -- here: 189 190 is_caf = isThunkRep (cit_rep info_tbl) 191 192 padding 193 | is_caf && null payload = [mkIntCLit dflags 0] 194 | otherwise = [] 195 196 static_link_field 197 | is_caf || staticClosureNeedsLink (mayHaveCafRefs caf_refs) info_tbl 198 = [static_link_value] 199 | otherwise 200 = [] 201 202 saved_info_field 203 | is_caf = [mkIntCLit dflags 0] 204 | otherwise = [] 205 206 -- For a static constructor which has NoCafRefs, we set the 207 -- static link field to a non-zero value so the garbage 208 -- collector will ignore it. 209 static_link_value 210 | mayHaveCafRefs caf_refs = mkIntCLit dflags 0 211 | otherwise = mkIntCLit dflags 3 -- No CAF refs 212 -- See Note [STATIC_LINK fields] 213 -- in rts/sm/Storage.h 214 215mkStaticClosure :: DynFlags -> CLabel -> CostCentreStack -> [CmmLit] 216 -> [CmmLit] -> [CmmLit] -> [CmmLit] -> [CmmLit] 217mkStaticClosure dflags info_lbl ccs payload padding static_link_field saved_info_field 218 = [CmmLabel info_lbl] 219 ++ staticProfHdr dflags ccs 220 ++ payload 221 ++ padding 222 ++ static_link_field 223 ++ saved_info_field 224 225----------------------------------------------------------- 226-- Heap overflow checking 227----------------------------------------------------------- 228 229{- Note [Heap checks] 230 ~~~~~~~~~~~~~~~~~~ 231Heap checks come in various forms. We provide the following entry 232points to the runtime system, all of which use the native C-- entry 233convention. 234 235 * gc() performs garbage collection and returns 236 nothing to its caller 237 238 * A series of canned entry points like 239 r = gc_1p( r ) 240 where r is a pointer. This performs gc, and 241 then returns its argument r to its caller. 242 243 * A series of canned entry points like 244 gcfun_2p( f, x, y ) 245 where f is a function closure of arity 2 246 This performs garbage collection, keeping alive the 247 three argument ptrs, and then tail-calls f(x,y) 248 249These are used in the following circumstances 250 251* entryHeapCheck: Function entry 252 (a) With a canned GC entry sequence 253 f( f_clo, x:ptr, y:ptr ) { 254 Hp = Hp+8 255 if Hp > HpLim goto L 256 ... 257 L: HpAlloc = 8 258 jump gcfun_2p( f_clo, x, y ) } 259 Note the tail call to the garbage collector; 260 it should do no register shuffling 261 262 (b) No canned sequence 263 f( f_clo, x:ptr, y:ptr, ...etc... ) { 264 T: Hp = Hp+8 265 if Hp > HpLim goto L 266 ... 267 L: HpAlloc = 8 268 call gc() -- Needs an info table 269 goto T } 270 271* altHeapCheck: Immediately following an eval 272 Started as 273 case f x y of r { (p,q) -> rhs } 274 (a) With a canned sequence for the results of f 275 (which is the very common case since 276 all boxed cases return just one pointer 277 ... 278 r = f( x, y ) 279 K: -- K needs an info table 280 Hp = Hp+8 281 if Hp > HpLim goto L 282 ...code for rhs... 283 284 L: r = gc_1p( r ) 285 goto K } 286 287 Here, the info table needed by the call 288 to gc_1p should be the *same* as the 289 one for the call to f; the C-- optimiser 290 spots this sharing opportunity) 291 292 (b) No canned sequence for results of f 293 Note second info table 294 ... 295 (r1,r2,r3) = call f( x, y ) 296 K: 297 Hp = Hp+8 298 if Hp > HpLim goto L 299 ...code for rhs... 300 301 L: call gc() -- Extra info table here 302 goto K 303 304* generalHeapCheck: Anywhere else 305 e.g. entry to thunk 306 case branch *not* following eval, 307 or let-no-escape 308 Exactly the same as the previous case: 309 310 K: -- K needs an info table 311 Hp = Hp+8 312 if Hp > HpLim goto L 313 ... 314 315 L: call gc() 316 goto K 317-} 318 319-------------------------------------------------------------- 320-- A heap/stack check at a function or thunk entry point. 321 322entryHeapCheck :: ClosureInfo 323 -> Maybe LocalReg -- Function (closure environment) 324 -> Int -- Arity -- not same as len args b/c of voids 325 -> [LocalReg] -- Non-void args (empty for thunk) 326 -> FCode () 327 -> FCode () 328 329entryHeapCheck cl_info nodeSet arity args code 330 = entryHeapCheck' is_fastf node arity args code 331 where 332 node = case nodeSet of 333 Just r -> CmmReg (CmmLocal r) 334 Nothing -> CmmLit (CmmLabel $ staticClosureLabel cl_info) 335 336 is_fastf = case closureFunInfo cl_info of 337 Just (_, ArgGen _) -> False 338 _otherwise -> True 339 340-- | lower-level version for CmmParse 341entryHeapCheck' :: Bool -- is a known function pattern 342 -> CmmExpr -- expression for the closure pointer 343 -> Int -- Arity -- not same as len args b/c of voids 344 -> [LocalReg] -- Non-void args (empty for thunk) 345 -> FCode () 346 -> FCode () 347entryHeapCheck' is_fastf node arity args code 348 = do dflags <- getDynFlags 349 let is_thunk = arity == 0 350 351 args' = map (CmmReg . CmmLocal) args 352 stg_gc_fun = CmmReg (CmmGlobal GCFun) 353 stg_gc_enter1 = CmmReg (CmmGlobal GCEnter1) 354 355 {- Thunks: jump stg_gc_enter_1 356 357 Function (fast): call (NativeNode) stg_gc_fun(fun, args) 358 359 Function (slow): call (slow) stg_gc_fun(fun, args) 360 -} 361 gc_call upd 362 | is_thunk 363 = mkJump dflags NativeNodeCall stg_gc_enter1 [node] upd 364 365 | is_fastf 366 = mkJump dflags NativeNodeCall stg_gc_fun (node : args') upd 367 368 | otherwise 369 = mkJump dflags Slow stg_gc_fun (node : args') upd 370 371 updfr_sz <- getUpdFrameOff 372 373 loop_id <- newBlockId 374 emitLabel loop_id 375 heapCheck True True (gc_call updfr_sz <*> mkBranch loop_id) code 376 377-- ------------------------------------------------------------ 378-- A heap/stack check in a case alternative 379 380 381-- If there are multiple alts and we need to GC, but don't have a 382-- continuation already (the scrut was simple), then we should 383-- pre-generate the continuation. (if there are multiple alts it is 384-- always a canned GC point). 385 386-- altHeapCheck: 387-- If we have a return continuation, 388-- then if it is a canned GC pattern, 389-- then we do mkJumpReturnsTo 390-- else we do a normal call to stg_gc_noregs 391-- else if it is a canned GC pattern, 392-- then generate the continuation and do mkCallReturnsTo 393-- else we do a normal call to stg_gc_noregs 394 395altHeapCheck :: [LocalReg] -> FCode a -> FCode a 396altHeapCheck regs code = altOrNoEscapeHeapCheck False regs code 397 398altOrNoEscapeHeapCheck :: Bool -> [LocalReg] -> FCode a -> FCode a 399altOrNoEscapeHeapCheck checkYield regs code = do 400 dflags <- getDynFlags 401 case cannedGCEntryPoint dflags regs of 402 Nothing -> genericGC checkYield code 403 Just gc -> do 404 lret <- newBlockId 405 let (off, _, copyin) = copyInOflow dflags NativeReturn (Young lret) regs [] 406 lcont <- newBlockId 407 tscope <- getTickScope 408 emitOutOfLine lret (copyin <*> mkBranch lcont, tscope) 409 emitLabel lcont 410 cannedGCReturnsTo checkYield False gc regs lret off code 411 412altHeapCheckReturnsTo :: [LocalReg] -> Label -> ByteOff -> FCode a -> FCode a 413altHeapCheckReturnsTo regs lret off code 414 = do dflags <- getDynFlags 415 case cannedGCEntryPoint dflags regs of 416 Nothing -> genericGC False code 417 Just gc -> cannedGCReturnsTo False True gc regs lret off code 418 419-- noEscapeHeapCheck is implemented identically to altHeapCheck (which 420-- is more efficient), but cannot be optimized away in the non-allocating 421-- case because it may occur in a loop 422noEscapeHeapCheck :: [LocalReg] -> FCode a -> FCode a 423noEscapeHeapCheck regs code = altOrNoEscapeHeapCheck True regs code 424 425cannedGCReturnsTo :: Bool -> Bool -> CmmExpr -> [LocalReg] -> Label -> ByteOff 426 -> FCode a 427 -> FCode a 428cannedGCReturnsTo checkYield cont_on_stack gc regs lret off code 429 = do dflags <- getDynFlags 430 updfr_sz <- getUpdFrameOff 431 heapCheck False checkYield (gc_call dflags gc updfr_sz) code 432 where 433 reg_exprs = map (CmmReg . CmmLocal) regs 434 -- Note [stg_gc arguments] 435 436 -- NB. we use the NativeReturn convention for passing arguments 437 -- to the canned heap-check routines, because we are in a case 438 -- alternative and hence the [LocalReg] was passed to us in the 439 -- NativeReturn convention. 440 gc_call dflags label sp 441 | cont_on_stack 442 = mkJumpReturnsTo dflags label NativeReturn reg_exprs lret off sp 443 | otherwise 444 = mkCallReturnsTo dflags label NativeReturn reg_exprs lret off sp [] 445 446genericGC :: Bool -> FCode a -> FCode a 447genericGC checkYield code 448 = do updfr_sz <- getUpdFrameOff 449 lretry <- newBlockId 450 emitLabel lretry 451 call <- mkCall generic_gc (GC, GC) [] [] updfr_sz [] 452 heapCheck False checkYield (call <*> mkBranch lretry) code 453 454cannedGCEntryPoint :: DynFlags -> [LocalReg] -> Maybe CmmExpr 455cannedGCEntryPoint dflags regs 456 = case map localRegType regs of 457 [] -> Just (mkGcLabel "stg_gc_noregs") 458 [ty] 459 | isGcPtrType ty -> Just (mkGcLabel "stg_gc_unpt_r1") 460 | isFloatType ty -> case width of 461 W32 -> Just (mkGcLabel "stg_gc_f1") 462 W64 -> Just (mkGcLabel "stg_gc_d1") 463 _ -> Nothing 464 465 | width == wordWidth dflags -> Just (mkGcLabel "stg_gc_unbx_r1") 466 | width == W64 -> Just (mkGcLabel "stg_gc_l1") 467 | otherwise -> Nothing 468 where 469 width = typeWidth ty 470 [ty1,ty2] 471 | isGcPtrType ty1 472 && isGcPtrType ty2 -> Just (mkGcLabel "stg_gc_pp") 473 [ty1,ty2,ty3] 474 | isGcPtrType ty1 475 && isGcPtrType ty2 476 && isGcPtrType ty3 -> Just (mkGcLabel "stg_gc_ppp") 477 [ty1,ty2,ty3,ty4] 478 | isGcPtrType ty1 479 && isGcPtrType ty2 480 && isGcPtrType ty3 481 && isGcPtrType ty4 -> Just (mkGcLabel "stg_gc_pppp") 482 _otherwise -> Nothing 483 484-- Note [stg_gc arguments] 485-- It might seem that we could avoid passing the arguments to the 486-- stg_gc function, because they are already in the right registers. 487-- While this is usually the case, it isn't always. Sometimes the 488-- code generator has cleverly avoided the eval in a case, e.g. in 489-- ffi/should_run/4221.hs we found 490-- 491-- case a_r1mb of z 492-- FunPtr x y -> ... 493-- 494-- where a_r1mb is bound a top-level constructor, and is known to be 495-- evaluated. The codegen just assigns x, y and z, and continues; 496-- R1 is never assigned. 497-- 498-- So we'll have to rely on optimisations to eliminatethese 499-- assignments where possible. 500 501 502-- | The generic GC procedure; no params, no results 503generic_gc :: CmmExpr 504generic_gc = mkGcLabel "stg_gc_noregs" 505 506-- | Create a CLabel for calling a garbage collector entry point 507mkGcLabel :: String -> CmmExpr 508mkGcLabel s = CmmLit (CmmLabel (mkCmmCodeLabel rtsUnitId (fsLit s))) 509 510------------------------------- 511heapCheck :: Bool -> Bool -> CmmAGraph -> FCode a -> FCode a 512heapCheck checkStack checkYield do_gc code 513 = getHeapUsage $ \ hpHw -> 514 -- Emit heap checks, but be sure to do it lazily so 515 -- that the conditionals on hpHw don't cause a black hole 516 do { dflags <- getDynFlags 517 ; let mb_alloc_bytes 518 | hpHw > mBLOCK_SIZE = sorry $ unlines 519 [" Trying to allocate more than "++show mBLOCK_SIZE++" bytes.", 520 "", 521 "This is currently not possible due to a limitation of GHC's code generator.", 522 "See https://gitlab.haskell.org/ghc/ghc/issues/4505 for details.", 523 "Suggestion: read data from a file instead of having large static data", 524 "structures in code."] 525 | hpHw > 0 = Just (mkIntExpr dflags (hpHw * (wORD_SIZE dflags))) 526 | otherwise = Nothing 527 where mBLOCK_SIZE = bLOCKS_PER_MBLOCK dflags * bLOCK_SIZE_W dflags 528 stk_hwm | checkStack = Just (CmmLit CmmHighStackMark) 529 | otherwise = Nothing 530 ; codeOnly $ do_checks stk_hwm checkYield mb_alloc_bytes do_gc 531 ; tickyAllocHeap True hpHw 532 ; setRealHp hpHw 533 ; code } 534 535heapStackCheckGen :: Maybe CmmExpr -> Maybe CmmExpr -> FCode () 536heapStackCheckGen stk_hwm mb_bytes 537 = do updfr_sz <- getUpdFrameOff 538 lretry <- newBlockId 539 emitLabel lretry 540 call <- mkCall generic_gc (GC, GC) [] [] updfr_sz [] 541 do_checks stk_hwm False mb_bytes (call <*> mkBranch lretry) 542 543-- Note [Single stack check] 544-- ~~~~~~~~~~~~~~~~~~~~~~~~~ 545-- When compiling a function we can determine how much stack space it 546-- will use. We therefore need to perform only a single stack check at 547-- the beginning of a function to see if we have enough stack space. 548-- 549-- The check boils down to comparing Sp-N with SpLim, where N is the 550-- amount of stack space needed (see Note [Stack usage] below). *BUT* 551-- at this stage of the pipeline we are not supposed to refer to Sp 552-- itself, because the stack is not yet manifest, so we don't quite 553-- know where Sp pointing. 554 555-- So instead of referring directly to Sp - as we used to do in the 556-- past - the code generator uses (old + 0) in the stack check. That 557-- is the address of the first word of the old area, so if we add N 558-- we'll get the address of highest used word. 559-- 560-- This makes the check robust. For example, while we need to perform 561-- only one stack check for each function, we could in theory place 562-- more stack checks later in the function. They would be redundant, 563-- but not incorrect (in a sense that they should not change program 564-- behaviour). We need to make sure however that a stack check 565-- inserted after incrementing the stack pointer checks for a 566-- respectively smaller stack space. This would not be the case if the 567-- code generator produced direct references to Sp. By referencing 568-- (old + 0) we make sure that we always check for a correct amount of 569-- stack: when converting (old + 0) to Sp the stack layout phase takes 570-- into account changes already made to stack pointer. The idea for 571-- this change came from observations made while debugging #8275. 572 573-- Note [Stack usage] 574-- ~~~~~~~~~~~~~~~~~~ 575-- At the moment we convert from STG to Cmm we don't know N, the 576-- number of bytes of stack that the function will use, so we use a 577-- special late-bound CmmLit, namely 578-- CmmHighStackMark 579-- to stand for the number of bytes needed. When the stack is made 580-- manifest, the number of bytes needed is calculated, and used to 581-- replace occurrences of CmmHighStackMark 582-- 583-- The (Maybe CmmExpr) passed to do_checks is usually 584-- Just (CmmLit CmmHighStackMark) 585-- but can also (in certain hand-written RTS functions) 586-- Just (CmmLit 8) or some other fixed valuet 587-- If it is Nothing, we don't generate a stack check at all. 588 589do_checks :: Maybe CmmExpr -- Should we check the stack? 590 -- See Note [Stack usage] 591 -> Bool -- Should we check for preemption? 592 -> Maybe CmmExpr -- Heap headroom (bytes) 593 -> CmmAGraph -- What to do on failure 594 -> FCode () 595do_checks mb_stk_hwm checkYield mb_alloc_lit do_gc = do 596 dflags <- getDynFlags 597 gc_id <- newBlockId 598 599 let 600 Just alloc_lit = mb_alloc_lit 601 602 bump_hp = cmmOffsetExprB dflags hpExpr alloc_lit 603 604 -- Sp overflow if ((old + 0) - CmmHighStack < SpLim) 605 -- At the beginning of a function old + 0 = Sp 606 -- See Note [Single stack check] 607 sp_oflo sp_hwm = 608 CmmMachOp (mo_wordULt dflags) 609 [CmmMachOp (MO_Sub (typeWidth (cmmRegType dflags spReg))) 610 [CmmStackSlot Old 0, sp_hwm], 611 CmmReg spLimReg] 612 613 -- Hp overflow if (Hp > HpLim) 614 -- (Hp has been incremented by now) 615 -- HpLim points to the LAST WORD of valid allocation space. 616 hp_oflo = CmmMachOp (mo_wordUGt dflags) [hpExpr, hpLimExpr] 617 618 alloc_n = mkAssign hpAllocReg alloc_lit 619 620 case mb_stk_hwm of 621 Nothing -> return () 622 Just stk_hwm -> tickyStackCheck 623 >> (emit =<< mkCmmIfGoto' (sp_oflo stk_hwm) gc_id (Just False) ) 624 625 -- Emit new label that might potentially be a header 626 -- of a self-recursive tail call. 627 -- See Note [Self-recursive loop header]. 628 self_loop_info <- getSelfLoop 629 case self_loop_info of 630 Just (_, loop_header_id, _) 631 | checkYield && isJust mb_stk_hwm -> emitLabel loop_header_id 632 _otherwise -> return () 633 634 if (isJust mb_alloc_lit) 635 then do 636 tickyHeapCheck 637 emitAssign hpReg bump_hp 638 emit =<< mkCmmIfThen' hp_oflo (alloc_n <*> mkBranch gc_id) (Just False) 639 else do 640 when (checkYield && not (gopt Opt_OmitYields dflags)) $ do 641 -- Yielding if HpLim == 0 642 let yielding = CmmMachOp (mo_wordEq dflags) 643 [CmmReg hpLimReg, 644 CmmLit (zeroCLit dflags)] 645 emit =<< mkCmmIfGoto' yielding gc_id (Just False) 646 647 tscope <- getTickScope 648 emitOutOfLine gc_id 649 (do_gc, tscope) -- this is expected to jump back somewhere 650 651 -- Test for stack pointer exhaustion, then 652 -- bump heap pointer, and test for heap exhaustion 653 -- Note that we don't move the heap pointer unless the 654 -- stack check succeeds. Otherwise we might end up 655 -- with slop at the end of the current block, which can 656 -- confuse the LDV profiler. 657 658-- Note [Self-recursive loop header] 659-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 660-- 661-- Self-recursive loop header is required by loopification optimization (See 662-- Note [Self-recursive tail calls] in GHC.StgToCmm.Expr). We emit it if: 663-- 664-- 1. There is information about self-loop in the FCode environment. We don't 665-- check the binder (first component of the self_loop_info) because we are 666-- certain that if the self-loop info is present then we are compiling the 667-- binder body. Reason: the only possible way to get here with the 668-- self_loop_info present is from closureCodeBody. 669-- 670-- 2. checkYield && isJust mb_stk_hwm. checkYield tells us that it is possible 671-- to preempt the heap check (see #367 for motivation behind this check). It 672-- is True for heap checks placed at the entry to a function and 673-- let-no-escape heap checks but false for other heap checks (eg. in case 674-- alternatives or created from hand-written high-level Cmm). The second 675-- check (isJust mb_stk_hwm) is true for heap checks at the entry to a 676-- function and some heap checks created in hand-written Cmm. Otherwise it 677-- is Nothing. In other words the only situation when both conditions are 678-- true is when compiling stack and heap checks at the entry to a 679-- function. This is the only situation when we want to emit a self-loop 680-- label. 681