1{-# LANGUAGE BangPatterns #-} 2{-# LANGUAGE CPP #-} 3 4----------------------------------------------------------------------------- 5-- 6-- Building info tables. 7-- 8-- (c) The University of Glasgow 2004-2006 9-- 10----------------------------------------------------------------------------- 11 12module GHC.StgToCmm.Layout ( 13 mkArgDescr, 14 emitCall, emitReturn, adjustHpBackwards, 15 16 emitClosureProcAndInfoTable, 17 emitClosureAndInfoTable, 18 19 slowCall, directCall, 20 21 FieldOffOrPadding(..), 22 ClosureHeader(..), 23 mkVirtHeapOffsets, 24 mkVirtHeapOffsetsWithPadding, 25 mkVirtConstrOffsets, 26 mkVirtConstrSizes, 27 getHpRelOffset, 28 29 ArgRep(..), toArgRep, argRepSizeW -- re-exported from GHC.StgToCmm.ArgRep 30 ) where 31 32 33#include "HsVersions.h" 34 35import GhcPrelude hiding ((<*>)) 36 37import GHC.StgToCmm.Closure 38import GHC.StgToCmm.Env 39import GHC.StgToCmm.ArgRep -- notably: ( slowCallPattern ) 40import GHC.StgToCmm.Ticky 41import GHC.StgToCmm.Monad 42import GHC.StgToCmm.Utils 43 44import MkGraph 45import SMRep 46import BlockId 47import Cmm 48import CmmUtils 49import CmmInfo 50import CLabel 51import StgSyn 52import Id 53import TyCon ( PrimRep(..), primRepSizeB ) 54import BasicTypes ( RepArity ) 55import DynFlags 56import Module 57 58import Util 59import Data.List 60import Outputable 61import FastString 62import Control.Monad 63 64------------------------------------------------------------------------ 65-- Call and return sequences 66------------------------------------------------------------------------ 67 68-- | Return multiple values to the sequel 69-- 70-- If the sequel is @Return@ 71-- 72-- > return (x,y) 73-- 74-- If the sequel is @AssignTo [p,q]@ 75-- 76-- > p=x; q=y; 77-- 78emitReturn :: [CmmExpr] -> FCode ReturnKind 79emitReturn results 80 = do { dflags <- getDynFlags 81 ; sequel <- getSequel 82 ; updfr_off <- getUpdFrameOff 83 ; case sequel of 84 Return -> 85 do { adjustHpBackwards 86 ; let e = CmmLoad (CmmStackSlot Old updfr_off) (gcWord dflags) 87 ; emit (mkReturn dflags (entryCode dflags e) results updfr_off) 88 } 89 AssignTo regs adjust -> 90 do { when adjust adjustHpBackwards 91 ; emitMultiAssign regs results } 92 ; return AssignedDirectly 93 } 94 95 96-- | @emitCall conv fun args@ makes a call to the entry-code of @fun@, 97-- using the call/return convention @conv@, passing @args@, and 98-- returning the results to the current sequel. 99-- 100emitCall :: (Convention, Convention) -> CmmExpr -> [CmmExpr] -> FCode ReturnKind 101emitCall convs fun args 102 = emitCallWithExtraStack convs fun args noExtraStack 103 104 105-- | @emitCallWithExtraStack conv fun args stack@ makes a call to the 106-- entry-code of @fun@, using the call/return convention @conv@, 107-- passing @args@, pushing some extra stack frames described by 108-- @stack@, and returning the results to the current sequel. 109-- 110emitCallWithExtraStack 111 :: (Convention, Convention) -> CmmExpr -> [CmmExpr] 112 -> [CmmExpr] -> FCode ReturnKind 113emitCallWithExtraStack (callConv, retConv) fun args extra_stack 114 = do { dflags <- getDynFlags 115 ; adjustHpBackwards 116 ; sequel <- getSequel 117 ; updfr_off <- getUpdFrameOff 118 ; case sequel of 119 Return -> do 120 emit $ mkJumpExtra dflags callConv fun args updfr_off extra_stack 121 return AssignedDirectly 122 AssignTo res_regs _ -> do 123 k <- newBlockId 124 let area = Young k 125 (off, _, copyin) = copyInOflow dflags retConv area res_regs [] 126 copyout = mkCallReturnsTo dflags fun callConv args k off updfr_off 127 extra_stack 128 tscope <- getTickScope 129 emit (copyout <*> mkLabel k tscope <*> copyin) 130 return (ReturnedTo k off) 131 } 132 133 134adjustHpBackwards :: FCode () 135-- This function adjusts the heap pointer just before a tail call or 136-- return. At a call or return, the virtual heap pointer may be less 137-- than the real Hp, because the latter was advanced to deal with 138-- the worst-case branch of the code, and we may be in a better-case 139-- branch. In that case, move the real Hp *back* and retract some 140-- ticky allocation count. 141-- 142-- It *does not* deal with high-water-mark adjustment. That's done by 143-- functions which allocate heap. 144adjustHpBackwards 145 = do { hp_usg <- getHpUsage 146 ; let rHp = realHp hp_usg 147 vHp = virtHp hp_usg 148 adjust_words = vHp -rHp 149 ; new_hp <- getHpRelOffset vHp 150 151 ; emit (if adjust_words == 0 152 then mkNop 153 else mkAssign hpReg new_hp) -- Generates nothing when vHp==rHp 154 155 ; tickyAllocHeap False adjust_words -- ...ditto 156 157 ; setRealHp vHp 158 } 159 160 161------------------------------------------------------------------------- 162-- Making calls: directCall and slowCall 163------------------------------------------------------------------------- 164 165-- General plan is: 166-- - we'll make *one* fast call, either to the function itself 167-- (directCall) or to stg_ap_<pat>_fast (slowCall) 168-- Any left-over arguments will be pushed on the stack, 169-- 170-- e.g. Sp[old+8] = arg1 171-- Sp[old+16] = arg2 172-- Sp[old+32] = stg_ap_pp_info 173-- R2 = arg3 174-- R3 = arg4 175-- call f() return to Nothing updfr_off: 32 176 177 178directCall :: Convention -> CLabel -> RepArity -> [StgArg] -> FCode ReturnKind 179-- (directCall f n args) 180-- calls f(arg1, ..., argn), and applies the result to the remaining args 181-- The function f has arity n, and there are guaranteed at least n args 182-- Both arity and args include void args 183directCall conv lbl arity stg_args 184 = do { argreps <- getArgRepsAmodes stg_args 185 ; direct_call "directCall" conv lbl arity argreps } 186 187 188slowCall :: CmmExpr -> [StgArg] -> FCode ReturnKind 189-- (slowCall fun args) applies fun to args, returning the results to Sequel 190slowCall fun stg_args 191 = do dflags <- getDynFlags 192 argsreps <- getArgRepsAmodes stg_args 193 let (rts_fun, arity) = slowCallPattern (map fst argsreps) 194 195 (r, slow_code) <- getCodeR $ do 196 r <- direct_call "slow_call" NativeNodeCall 197 (mkRtsApFastLabel rts_fun) arity ((P,Just fun):argsreps) 198 emitComment $ mkFastString ("slow_call for " ++ 199 showSDoc dflags (ppr fun) ++ 200 " with pat " ++ unpackFS rts_fun) 201 return r 202 203 -- Note [avoid intermediate PAPs] 204 let n_args = length stg_args 205 if n_args > arity && optLevel dflags >= 2 206 then do 207 funv <- (CmmReg . CmmLocal) `fmap` assignTemp fun 208 fun_iptr <- (CmmReg . CmmLocal) `fmap` 209 assignTemp (closureInfoPtr dflags (cmmUntag dflags funv)) 210 211 -- ToDo: we could do slightly better here by reusing the 212 -- continuation from the slow call, which we have in r. 213 -- Also we'd like to push the continuation on the stack 214 -- before the branch, so that we only get one copy of the 215 -- code that saves all the live variables across the 216 -- call, but that might need some improvements to the 217 -- special case in the stack layout code to handle this 218 -- (see Note [diamond proc point]). 219 220 fast_code <- getCode $ 221 emitCall (NativeNodeCall, NativeReturn) 222 (entryCode dflags fun_iptr) 223 (nonVArgs ((P,Just funv):argsreps)) 224 225 slow_lbl <- newBlockId 226 fast_lbl <- newBlockId 227 is_tagged_lbl <- newBlockId 228 end_lbl <- newBlockId 229 230 let correct_arity = cmmEqWord dflags (funInfoArity dflags fun_iptr) 231 (mkIntExpr dflags n_args) 232 233 tscope <- getTickScope 234 emit (mkCbranch (cmmIsTagged dflags funv) 235 is_tagged_lbl slow_lbl (Just True) 236 <*> mkLabel is_tagged_lbl tscope 237 <*> mkCbranch correct_arity fast_lbl slow_lbl (Just True) 238 <*> mkLabel fast_lbl tscope 239 <*> fast_code 240 <*> mkBranch end_lbl 241 <*> mkLabel slow_lbl tscope 242 <*> slow_code 243 <*> mkLabel end_lbl tscope) 244 return r 245 246 else do 247 emit slow_code 248 return r 249 250 251-- Note [avoid intermediate PAPs] 252-- 253-- A slow call which needs multiple generic apply patterns will be 254-- almost guaranteed to create one or more intermediate PAPs when 255-- applied to a function that takes the correct number of arguments. 256-- We try to avoid this situation by generating code to test whether 257-- we are calling a function with the correct number of arguments 258-- first, i.e.: 259-- 260-- if (TAG(f) != 0} { // f is not a thunk 261-- if (f->info.arity == n) { 262-- ... make a fast call to f ... 263-- } 264-- } 265-- ... otherwise make the slow call ... 266-- 267-- We *only* do this when the call requires multiple generic apply 268-- functions, which requires pushing extra stack frames and probably 269-- results in intermediate PAPs. (I say probably, because it might be 270-- that we're over-applying a function, but that seems even less 271-- likely). 272-- 273-- This very rarely applies, but if it does happen in an inner loop it 274-- can have a severe impact on performance (#6084). 275 276 277-------------- 278direct_call :: String 279 -> Convention -- e.g. NativeNodeCall or NativeDirectCall 280 -> CLabel -> RepArity 281 -> [(ArgRep,Maybe CmmExpr)] -> FCode ReturnKind 282direct_call caller call_conv lbl arity args 283 | debugIsOn && args `lengthLessThan` real_arity -- Too few args 284 = do -- Caller should ensure that there enough args! 285 pprPanic "direct_call" $ 286 text caller <+> ppr arity <+> 287 ppr lbl <+> ppr (length args) <+> 288 ppr (map snd args) <+> ppr (map fst args) 289 290 | null rest_args -- Precisely the right number of arguments 291 = emitCall (call_conv, NativeReturn) target (nonVArgs args) 292 293 | otherwise -- Note [over-saturated calls] 294 = do dflags <- getDynFlags 295 emitCallWithExtraStack (call_conv, NativeReturn) 296 target 297 (nonVArgs fast_args) 298 (nonVArgs (stack_args dflags)) 299 where 300 target = CmmLit (CmmLabel lbl) 301 (fast_args, rest_args) = splitAt real_arity args 302 stack_args dflags = slowArgs dflags rest_args 303 real_arity = case call_conv of 304 NativeNodeCall -> arity+1 305 _ -> arity 306 307 308-- When constructing calls, it is easier to keep the ArgReps and the 309-- CmmExprs zipped together. However, a void argument has no 310-- representation, so we need to use Maybe CmmExpr (the alternative of 311-- using zeroCLit or even undefined would work, but would be ugly). 312-- 313getArgRepsAmodes :: [StgArg] -> FCode [(ArgRep, Maybe CmmExpr)] 314getArgRepsAmodes = mapM getArgRepAmode 315 where getArgRepAmode arg 316 | V <- rep = return (V, Nothing) 317 | otherwise = do expr <- getArgAmode (NonVoid arg) 318 return (rep, Just expr) 319 where rep = toArgRep (argPrimRep arg) 320 321nonVArgs :: [(ArgRep, Maybe CmmExpr)] -> [CmmExpr] 322nonVArgs [] = [] 323nonVArgs ((_,Nothing) : args) = nonVArgs args 324nonVArgs ((_,Just arg) : args) = arg : nonVArgs args 325 326{- 327Note [over-saturated calls] 328 329The natural thing to do for an over-saturated call would be to call 330the function with the correct number of arguments, and then apply the 331remaining arguments to the value returned, e.g. 332 333 f a b c d (where f has arity 2) 334 --> 335 r = call f(a,b) 336 call r(c,d) 337 338but this entails 339 - saving c and d on the stack 340 - making a continuation info table 341 - at the continuation, loading c and d off the stack into regs 342 - finally, call r 343 344Note that since there are a fixed number of different r's 345(e.g. stg_ap_pp_fast), we can also pre-compile continuations 346that correspond to each of them, rather than generating a fresh 347one for each over-saturated call. 348 349Not only does this generate much less code, it is faster too. We will 350generate something like: 351 352Sp[old+16] = c 353Sp[old+24] = d 354Sp[old+32] = stg_ap_pp_info 355call f(a,b) -- usual calling convention 356 357For the purposes of the CmmCall node, we count this extra stack as 358just more arguments that we are passing on the stack (cml_args). 359-} 360 361-- | 'slowArgs' takes a list of function arguments and prepares them for 362-- pushing on the stack for "extra" arguments to a function which requires 363-- fewer arguments than we currently have. 364slowArgs :: DynFlags -> [(ArgRep, Maybe CmmExpr)] -> [(ArgRep, Maybe CmmExpr)] 365slowArgs _ [] = [] 366slowArgs dflags args -- careful: reps contains voids (V), but args does not 367 | gopt Opt_SccProfilingOn dflags 368 = save_cccs ++ this_pat ++ slowArgs dflags rest_args 369 | otherwise = this_pat ++ slowArgs dflags rest_args 370 where 371 (arg_pat, n) = slowCallPattern (map fst args) 372 (call_args, rest_args) = splitAt n args 373 374 stg_ap_pat = mkCmmRetInfoLabel rtsUnitId arg_pat 375 this_pat = (N, Just (mkLblExpr stg_ap_pat)) : call_args 376 save_cccs = [(N, Just (mkLblExpr save_cccs_lbl)), (N, Just cccsExpr)] 377 save_cccs_lbl = mkCmmRetInfoLabel rtsUnitId (fsLit "stg_restore_cccs") 378 379------------------------------------------------------------------------- 380---- Laying out objects on the heap and stack 381------------------------------------------------------------------------- 382 383-- The heap always grows upwards, so hpRel is easy to compute 384hpRel :: VirtualHpOffset -- virtual offset of Hp 385 -> VirtualHpOffset -- virtual offset of The Thing 386 -> WordOff -- integer word offset 387hpRel hp off = off - hp 388 389getHpRelOffset :: VirtualHpOffset -> FCode CmmExpr 390-- See Note [Virtual and real heap pointers] in GHC.StgToCmm.Monad 391getHpRelOffset virtual_offset 392 = do dflags <- getDynFlags 393 hp_usg <- getHpUsage 394 return (cmmRegOffW dflags hpReg (hpRel (realHp hp_usg) virtual_offset)) 395 396data FieldOffOrPadding a 397 = FieldOff (NonVoid a) -- Something that needs an offset. 398 ByteOff -- Offset in bytes. 399 | Padding ByteOff -- Length of padding in bytes. 400 ByteOff -- Offset in bytes. 401 402-- | Used to tell the various @mkVirtHeapOffsets@ functions what kind 403-- of header the object has. This will be accounted for in the 404-- offsets of the fields returned. 405data ClosureHeader 406 = NoHeader 407 | StdHeader 408 | ThunkHeader 409 410mkVirtHeapOffsetsWithPadding 411 :: DynFlags 412 -> ClosureHeader -- What kind of header to account for 413 -> [NonVoid (PrimRep, a)] -- Things to make offsets for 414 -> ( WordOff -- Total number of words allocated 415 , WordOff -- Number of words allocated for *pointers* 416 , [FieldOffOrPadding a] -- Either an offset or padding. 417 ) 418 419-- Things with their offsets from start of object in order of 420-- increasing offset; BUT THIS MAY BE DIFFERENT TO INPUT ORDER 421-- First in list gets lowest offset, which is initial offset + 1. 422-- 423-- mkVirtHeapOffsetsWithPadding always returns boxed things with smaller offsets 424-- than the unboxed things 425 426mkVirtHeapOffsetsWithPadding dflags header things = 427 ASSERT(not (any (isVoidRep . fst . fromNonVoid) things)) 428 ( tot_wds 429 , bytesToWordsRoundUp dflags bytes_of_ptrs 430 , concat (ptrs_w_offsets ++ non_ptrs_w_offsets) ++ final_pad 431 ) 432 where 433 hdr_words = case header of 434 NoHeader -> 0 435 StdHeader -> fixedHdrSizeW dflags 436 ThunkHeader -> thunkHdrSize dflags 437 hdr_bytes = wordsToBytes dflags hdr_words 438 439 (ptrs, non_ptrs) = partition (isGcPtrRep . fst . fromNonVoid) things 440 441 (bytes_of_ptrs, ptrs_w_offsets) = 442 mapAccumL computeOffset 0 ptrs 443 (tot_bytes, non_ptrs_w_offsets) = 444 mapAccumL computeOffset bytes_of_ptrs non_ptrs 445 446 tot_wds = bytesToWordsRoundUp dflags tot_bytes 447 448 final_pad_size = tot_wds * word_size - tot_bytes 449 final_pad 450 | final_pad_size > 0 = [(Padding final_pad_size 451 (hdr_bytes + tot_bytes))] 452 | otherwise = [] 453 454 word_size = wORD_SIZE dflags 455 456 computeOffset bytes_so_far nv_thing = 457 (new_bytes_so_far, with_padding field_off) 458 where 459 (rep, thing) = fromNonVoid nv_thing 460 461 -- Size of the field in bytes. 462 !sizeB = primRepSizeB dflags rep 463 464 -- Align the start offset (eg, 2-byte value should be 2-byte aligned). 465 -- But not more than to a word. 466 !align = min word_size sizeB 467 !start = roundUpTo bytes_so_far align 468 !padding = start - bytes_so_far 469 470 -- Final offset is: 471 -- size of header + bytes_so_far + padding 472 !final_offset = hdr_bytes + bytes_so_far + padding 473 !new_bytes_so_far = start + sizeB 474 field_off = FieldOff (NonVoid thing) final_offset 475 476 with_padding field_off 477 | padding == 0 = [field_off] 478 | otherwise = [ Padding padding (hdr_bytes + bytes_so_far) 479 , field_off 480 ] 481 482 483mkVirtHeapOffsets 484 :: DynFlags 485 -> ClosureHeader -- What kind of header to account for 486 -> [NonVoid (PrimRep,a)] -- Things to make offsets for 487 -> (WordOff, -- _Total_ number of words allocated 488 WordOff, -- Number of words allocated for *pointers* 489 [(NonVoid a, ByteOff)]) 490mkVirtHeapOffsets dflags header things = 491 ( tot_wds 492 , ptr_wds 493 , [ (field, offset) | (FieldOff field offset) <- things_offsets ] 494 ) 495 where 496 (tot_wds, ptr_wds, things_offsets) = 497 mkVirtHeapOffsetsWithPadding dflags header things 498 499-- | Just like mkVirtHeapOffsets, but for constructors 500mkVirtConstrOffsets 501 :: DynFlags -> [NonVoid (PrimRep, a)] 502 -> (WordOff, WordOff, [(NonVoid a, ByteOff)]) 503mkVirtConstrOffsets dflags = mkVirtHeapOffsets dflags StdHeader 504 505-- | Just like mkVirtConstrOffsets, but used when we don't have the actual 506-- arguments. Useful when e.g. generating info tables; we just need to know 507-- sizes of pointer and non-pointer fields. 508mkVirtConstrSizes :: DynFlags -> [NonVoid PrimRep] -> (WordOff, WordOff) 509mkVirtConstrSizes dflags field_reps 510 = (tot_wds, ptr_wds) 511 where 512 (tot_wds, ptr_wds, _) = 513 mkVirtConstrOffsets dflags 514 (map (\nv_rep -> NonVoid (fromNonVoid nv_rep, ())) field_reps) 515 516------------------------------------------------------------------------- 517-- 518-- Making argument descriptors 519-- 520-- An argument descriptor describes the layout of args on the stack, 521-- both for * GC (stack-layout) purposes, and 522-- * saving/restoring registers when a heap-check fails 523-- 524-- Void arguments aren't important, therefore (contrast constructSlowCall) 525-- 526------------------------------------------------------------------------- 527 528-- bring in ARG_P, ARG_N, etc. 529#include "../includes/rts/storage/FunTypes.h" 530 531mkArgDescr :: DynFlags -> [Id] -> ArgDescr 532mkArgDescr dflags args 533 = let arg_bits = argBits dflags arg_reps 534 arg_reps = filter isNonV (map idArgRep args) 535 -- Getting rid of voids eases matching of standard patterns 536 in case stdPattern arg_reps of 537 Just spec_id -> ArgSpec spec_id 538 Nothing -> ArgGen arg_bits 539 540argBits :: DynFlags -> [ArgRep] -> [Bool] -- True for non-ptr, False for ptr 541argBits _ [] = [] 542argBits dflags (P : args) = False : argBits dflags args 543argBits dflags (arg : args) = take (argRepSizeW dflags arg) (repeat True) 544 ++ argBits dflags args 545 546---------------------- 547stdPattern :: [ArgRep] -> Maybe Int 548stdPattern reps 549 = case reps of 550 [] -> Just ARG_NONE -- just void args, probably 551 [N] -> Just ARG_N 552 [P] -> Just ARG_P 553 [F] -> Just ARG_F 554 [D] -> Just ARG_D 555 [L] -> Just ARG_L 556 [V16] -> Just ARG_V16 557 [V32] -> Just ARG_V32 558 [V64] -> Just ARG_V64 559 560 [N,N] -> Just ARG_NN 561 [N,P] -> Just ARG_NP 562 [P,N] -> Just ARG_PN 563 [P,P] -> Just ARG_PP 564 565 [N,N,N] -> Just ARG_NNN 566 [N,N,P] -> Just ARG_NNP 567 [N,P,N] -> Just ARG_NPN 568 [N,P,P] -> Just ARG_NPP 569 [P,N,N] -> Just ARG_PNN 570 [P,N,P] -> Just ARG_PNP 571 [P,P,N] -> Just ARG_PPN 572 [P,P,P] -> Just ARG_PPP 573 574 [P,P,P,P] -> Just ARG_PPPP 575 [P,P,P,P,P] -> Just ARG_PPPPP 576 [P,P,P,P,P,P] -> Just ARG_PPPPPP 577 578 _ -> Nothing 579 580------------------------------------------------------------------------- 581-- 582-- Generating the info table and code for a closure 583-- 584------------------------------------------------------------------------- 585 586-- Here we make an info table of type 'CmmInfo'. The concrete 587-- representation as a list of 'CmmAddr' is handled later 588-- in the pipeline by 'cmmToRawCmm'. 589-- When loading the free variables, a function closure pointer may be tagged, 590-- so we must take it into account. 591 592emitClosureProcAndInfoTable :: Bool -- top-level? 593 -> Id -- name of the closure 594 -> LambdaFormInfo 595 -> CmmInfoTable 596 -> [NonVoid Id] -- incoming arguments 597 -> ((Int, LocalReg, [LocalReg]) -> FCode ()) -- function body 598 -> FCode () 599emitClosureProcAndInfoTable top_lvl bndr lf_info info_tbl args body 600 = do { dflags <- getDynFlags 601 -- Bind the binder itself, but only if it's not a top-level 602 -- binding. We need non-top let-bindings to refer to the 603 -- top-level binding, which this binding would incorrectly shadow. 604 ; node <- if top_lvl then return $ idToReg dflags (NonVoid bndr) 605 else bindToReg (NonVoid bndr) lf_info 606 ; let node_points = nodeMustPointToIt dflags lf_info 607 ; arg_regs <- bindArgsToRegs args 608 ; let args' = if node_points then (node : arg_regs) else arg_regs 609 conv = if nodeMustPointToIt dflags lf_info then NativeNodeCall 610 else NativeDirectCall 611 (offset, _, _) = mkCallEntry dflags conv args' [] 612 ; emitClosureAndInfoTable info_tbl conv args' $ body (offset, node, arg_regs) 613 } 614 615-- Data constructors need closures, but not with all the argument handling 616-- needed for functions. The shared part goes here. 617emitClosureAndInfoTable :: 618 CmmInfoTable -> Convention -> [LocalReg] -> FCode () -> FCode () 619emitClosureAndInfoTable info_tbl conv args body 620 = do { (_, blks) <- getCodeScoped body 621 ; let entry_lbl = toEntryLbl (cit_lbl info_tbl) 622 ; emitProcWithConvention conv (Just info_tbl) entry_lbl args blks 623 } 624