1{-# LANGUAGE BangPatterns #-} 2{-# LANGUAGE CPP #-} 3{-# LANGUAGE ViewPatterns #-} 4 5{-# OPTIONS_GHC -Wno-incomplete-record-updates #-} 6 7{- 8(c) The GRASP/AQUA Project, Glasgow University, 1992-1998 9 10************************************************************************ 11* * 12\section[OccurAnal]{Occurrence analysis pass} 13* * 14************************************************************************ 15 16The occurrence analyser re-typechecks a core expression, returning a new 17core expression with (hopefully) improved usage information. 18-} 19 20module GHC.Core.Opt.OccurAnal ( occurAnalysePgm, occurAnalyseExpr ) where 21 22#include "GhclibHsVersions.h" 23 24import GHC.Prelude 25 26import GHC.Driver.Ppr 27 28import GHC.Core 29import GHC.Core.FVs 30import GHC.Core.Utils ( exprIsTrivial, isDefaultAlt, isExpandableApp, 31 stripTicksTopE, mkTicks ) 32import GHC.Core.Opt.Arity ( joinRhsArity ) 33import GHC.Types.Id 34import GHC.Types.Id.Info 35import GHC.Types.Basic 36import GHC.Types.Tickish 37import GHC.Unit.Module( Module ) 38import GHC.Core.Coercion 39import GHC.Core.Type 40import GHC.Core.TyCo.FVs( tyCoVarsOfMCo ) 41 42import GHC.Types.Var.Set 43import GHC.Types.Var.Env 44import GHC.Types.Var 45import GHC.Types.Demand ( argOneShots, argsOneShots ) 46import GHC.Data.Graph.Directed ( SCC(..), Node(..) 47 , stronglyConnCompFromEdgedVerticesUniq 48 , stronglyConnCompFromEdgedVerticesUniqR ) 49import GHC.Builtin.Names( runRWKey ) 50import GHC.Types.Unique 51import GHC.Types.Unique.FM 52import GHC.Types.Unique.Set 53import GHC.Utils.Misc 54import GHC.Data.Maybe( isJust ) 55import GHC.Utils.Outputable 56import GHC.Utils.Panic 57import Data.List (mapAccumL, mapAccumR) 58 59{- 60************************************************************************ 61* * 62 occurAnalysePgm, occurAnalyseExpr 63* * 64************************************************************************ 65 66Here's the externally-callable interface: 67-} 68 69occurAnalyseExpr :: CoreExpr -> CoreExpr 70-- Do occurrence analysis, and discard occurrence info returned 71occurAnalyseExpr expr 72 = snd (occAnal initOccEnv expr) 73 74occurAnalysePgm :: Module -- Used only in debug output 75 -> (Id -> Bool) -- Active unfoldings 76 -> (Activation -> Bool) -- Active rules 77 -> [CoreRule] -- Local rules for imported Ids 78 -> CoreProgram -> CoreProgram 79occurAnalysePgm this_mod active_unf active_rule imp_rules binds 80 | isEmptyDetails final_usage 81 = occ_anald_binds 82 83 | otherwise -- See Note [Glomming] 84 = WARN( True, hang (text "Glomming in" <+> ppr this_mod <> colon) 85 2 (ppr final_usage ) ) 86 occ_anald_glommed_binds 87 where 88 init_env = initOccEnv { occ_rule_act = active_rule 89 , occ_unf_act = active_unf } 90 91 (final_usage, occ_anald_binds) = go init_env binds 92 (_, occ_anald_glommed_binds) = occAnalRecBind init_env TopLevel 93 imp_rule_edges 94 (flattenBinds binds) 95 initial_uds 96 -- It's crucial to re-analyse the glommed-together bindings 97 -- so that we establish the right loop breakers. Otherwise 98 -- we can easily create an infinite loop (#9583 is an example) 99 -- 100 -- Also crucial to re-analyse the /original/ bindings 101 -- in case the first pass accidentally discarded as dead code 102 -- a binding that was actually needed (albeit before its 103 -- definition site). #17724 threw this up. 104 105 initial_uds = addManyOccs emptyDetails (rulesFreeVars imp_rules) 106 -- The RULES declarations keep things alive! 107 108 -- imp_rule_edges maps a top-level local binder 'f' to the 109 -- RHS free vars of any IMP-RULE, a local RULE for an imported function, 110 -- where 'f' appears on the LHS 111 -- e.g. RULE foldr f = blah 112 -- imp_rule_edges contains f :-> fvs(blah) 113 -- We treat such RULES as extra rules for 'f' 114 -- See Note [Preventing loops due to imported functions rules] 115 imp_rule_edges :: ImpRuleEdges 116 imp_rule_edges = foldr (plusVarEnv_C (++)) emptyVarEnv 117 [ mapVarEnv (const [(act,rhs_fvs)]) $ getUniqSet $ 118 exprsFreeIds args `delVarSetList` bndrs 119 | Rule { ru_act = act, ru_bndrs = bndrs 120 , ru_args = args, ru_rhs = rhs } <- imp_rules 121 -- Not BuiltinRules; see Note [Plugin rules] 122 , let rhs_fvs = exprFreeIds rhs `delVarSetList` bndrs ] 123 124 go :: OccEnv -> [CoreBind] -> (UsageDetails, [CoreBind]) 125 go _ [] 126 = (initial_uds, []) 127 go env (bind:binds) 128 = (final_usage, bind' ++ binds') 129 where 130 (bs_usage, binds') = go env binds 131 (final_usage, bind') = occAnalBind env TopLevel imp_rule_edges bind 132 bs_usage 133 134{- ********************************************************************* 135* * 136 IMP-RULES 137 Local rules for imported functions 138* * 139********************************************************************* -} 140 141type ImpRuleEdges = IdEnv [(Activation, VarSet)] 142 -- Mapping from a local Id 'f' to info about its IMP-RULES, 143 -- i.e. /local/ rules for an imported Id that mention 'f' on the LHS 144 -- We record (a) its Activation and (b) the RHS free vars 145 -- See Note [IMP-RULES: local rules for imported functions] 146 147noImpRuleEdges :: ImpRuleEdges 148noImpRuleEdges = emptyVarEnv 149 150lookupImpRules :: ImpRuleEdges -> Id -> [(Activation,VarSet)] 151lookupImpRules imp_rule_edges bndr 152 = case lookupVarEnv imp_rule_edges bndr of 153 Nothing -> [] 154 Just vs -> vs 155 156impRulesScopeUsage :: [(Activation,VarSet)] -> UsageDetails 157-- Variable mentioned in RHS of an IMP-RULE for the bndr, 158-- whether active or not 159impRulesScopeUsage imp_rules_info 160 = foldr add emptyDetails imp_rules_info 161 where 162 add (_,vs) usage = addManyOccs usage vs 163 164impRulesActiveFvs :: (Activation -> Bool) -> VarSet 165 -> [(Activation,VarSet)] -> VarSet 166impRulesActiveFvs is_active bndr_set vs 167 = foldr add emptyVarSet vs `intersectVarSet` bndr_set 168 where 169 add (act,vs) acc | is_active act = vs `unionVarSet` acc 170 | otherwise = acc 171 172{- Note [IMP-RULES: local rules for imported functions] 173~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 174We quite often have 175 * A /local/ rule 176 * for an /imported/ function 177like this: 178 foo x = blah 179 {-# RULE "map/foo" forall xs. map foo xs = xs #-} 180We call them IMP-RULES. They are important in practice, and occur a 181lot in the libraries. 182 183IMP-RULES are held in mg_rules of ModGuts, and passed in to 184occurAnalysePgm. 185 186Main Invariant: 187 188* Throughout, we treat an IMP-RULE that mentions 'f' on its LHS 189 just like a RULE for f. 190 191Note [IMP-RULES: unavoidable loops] 192~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 193Consider this 194 f = /\a. B.g a 195 RULE B.g Int = 1 + f Int 196Note that 197 * The RULE is for an imported function. 198 * f is non-recursive 199Now we 200can get 201 f Int --> B.g Int Inlining f 202 --> 1 + f Int Firing RULE 203and so the simplifier goes into an infinite loop. This 204would not happen if the RULE was for a local function, 205because we keep track of dependencies through rules. But 206that is pretty much impossible to do for imported Ids. Suppose 207f's definition had been 208 f = /\a. C.h a 209where (by some long and devious process), C.h eventually inlines to 210B.g. We could only spot such loops by exhaustively following 211unfoldings of C.h etc, in case we reach B.g, and hence (via the RULE) 212f. 213 214We regard this potential infinite loop as a *programmer* error. 215It's up the programmer not to write silly rules like 216 RULE f x = f x 217and the example above is just a more complicated version. 218 219Note [Specialising imported functions] (referred to from Specialise) 220~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 221For *automatically-generated* rules, the programmer can't be 222responsible for the "programmer error" in Note [IMP-RULES: unavoidable 223loops]. In particular, consider specialising a recursive function 224defined in another module. If we specialise a recursive function B.g, 225we get 226 g_spec = .....(B.g Int)..... 227 RULE B.g Int = g_spec 228Here, g_spec doesn't look recursive, but when the rule fires, it 229becomes so. And if B.g was mutually recursive, the loop might not be 230as obvious as it is here. 231 232To avoid this, 233 * When specialising a function that is a loop breaker, 234 give a NOINLINE pragma to the specialised function 235 236Note [Preventing loops due to imported functions rules] 237~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 238Consider: 239 import GHC.Base (foldr) 240 241 {-# RULES "filterList" forall p. foldr (filterFB (:) p) [] = filter p #-} 242 filter p xs = build (\c n -> foldr (filterFB c p) n xs) 243 filterFB c p = ... 244 245 f = filter p xs 246 247Note that filter is not a loop-breaker, so what happens is: 248 f = filter p xs 249 = {inline} build (\c n -> foldr (filterFB c p) n xs) 250 = {inline} foldr (filterFB (:) p) [] xs 251 = {RULE} filter p xs 252 253We are in an infinite loop. 254 255A more elaborate example (that I actually saw in practice when I went to 256mark GHC.List.filter as INLINABLE) is as follows. Say I have this module: 257 {-# LANGUAGE RankNTypes #-} 258 module GHCList where 259 260 import Prelude hiding (filter) 261 import GHC.Base (build) 262 263 {-# INLINABLE filter #-} 264 filter :: (a -> Bool) -> [a] -> [a] 265 filter p [] = [] 266 filter p (x:xs) = if p x then x : filter p xs else filter p xs 267 268 {-# NOINLINE [0] filterFB #-} 269 filterFB :: (a -> b -> b) -> (a -> Bool) -> a -> b -> b 270 filterFB c p x r | p x = x `c` r 271 | otherwise = r 272 273 {-# RULES 274 "filter" [~1] forall p xs. filter p xs = build (\c n -> foldr 275 (filterFB c p) n xs) 276 "filterList" [1] forall p. foldr (filterFB (:) p) [] = filter p 277 #-} 278 279Then (because RULES are applied inside INLINABLE unfoldings, but inlinings 280are not), the unfolding given to "filter" in the interface file will be: 281 filter p [] = [] 282 filter p (x:xs) = if p x then x : build (\c n -> foldr (filterFB c p) n xs) 283 else build (\c n -> foldr (filterFB c p) n xs 284 285Note that because this unfolding does not mention "filter", filter is not 286marked as a strong loop breaker. Therefore at a use site in another module: 287 filter p xs 288 = {inline} 289 case xs of [] -> [] 290 (x:xs) -> if p x then x : build (\c n -> foldr (filterFB c p) n xs) 291 else build (\c n -> foldr (filterFB c p) n xs) 292 293 build (\c n -> foldr (filterFB c p) n xs) 294 = {inline} foldr (filterFB (:) p) [] xs 295 = {RULE} filter p xs 296 297And we are in an infinite loop again, except that this time the loop is producing an 298infinitely large *term* (an unrolling of filter) and so the simplifier finally 299dies with "ticks exhausted" 300 301SOLUTION: we treat the rule "filterList" as an extra rule for 'filterFB' 302because it mentions 'filterFB' on the LHS. This is the Main Invariant 303in Note [IMP-RULES: local rules for imported functions]. 304 305So, during loop-breaker analysis: 306 307- for each active RULE for a local function 'f' we add an edge between 308 'f' and the local FVs of the rule RHS 309 310- for each active RULE for an *imported* function we add dependency 311 edges between the *local* FVS of the rule LHS and the *local* FVS of 312 the rule RHS. 313 314Even with this extra hack we aren't always going to get things 315right. For example, it might be that the rule LHS mentions an imported 316Id, and another module has a RULE that can rewrite that imported Id to 317one of our local Ids. 318 319Note [Plugin rules] 320~~~~~~~~~~~~~~~~~~~ 321Conal Elliott (#11651) built a GHC plugin that added some 322BuiltinRules (for imported Ids) to the mg_rules field of ModGuts, to 323do some domain-specific transformations that could not be expressed 324with an ordinary pattern-matching CoreRule. But then we can't extract 325the dependencies (in imp_rule_edges) from ru_rhs etc, because a 326BuiltinRule doesn't have any of that stuff. 327 328So we simply assume that BuiltinRules have no dependencies, and filter 329them out from the imp_rule_edges comprehension. 330 331Note [Glomming] 332~~~~~~~~~~~~~~~ 333RULES for imported Ids can make something at the top refer to 334something at the bottom: 335 336 foo = ...(B.f @Int)... 337 $sf = blah 338 RULE: B.f @Int = $sf 339 340Applying this rule makes foo refer to $sf, although foo doesn't appear to 341depend on $sf. (And, as in Note [Rules for imported functions], the 342dependency might be more indirect. For example, foo might mention C.t 343rather than B.f, where C.t eventually inlines to B.f.) 344 345NOTICE that this cannot happen for rules whose head is a 346locally-defined function, because we accurately track dependencies 347through RULES. It only happens for rules whose head is an imported 348function (B.f in the example above). 349 350Solution: 351 - When simplifying, bring all top level identifiers into 352 scope at the start, ignoring the Rec/NonRec structure, so 353 that when 'h' pops up in f's rhs, we find it in the in-scope set 354 (as the simplifier generally expects). This happens in simplTopBinds. 355 356 - In the occurrence analyser, if there are any out-of-scope 357 occurrences that pop out of the top, which will happen after 358 firing the rule: f = \x -> h x 359 h = \y -> 3 360 then just glom all the bindings into a single Rec, so that 361 the *next* iteration of the occurrence analyser will sort 362 them all out. This part happens in occurAnalysePgm. 363-} 364 365{- 366************************************************************************ 367* * 368 Bindings 369* * 370************************************************************************ 371 372Note [Recursive bindings: the grand plan] 373~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 374Loop breaking is surprisingly subtle. First read the section 4 of 375"Secrets of the GHC inliner". This describes our basic plan. We 376avoid infinite inlinings by choosing loop breakers, and ensuring that 377a loop breaker cuts each loop. 378 379See also Note [Inlining and hs-boot files] in GHC.Core.ToIface, which 380deals with a closely related source of infinite loops. 381 382When we come across a binding group 383 Rec { x1 = r1; ...; xn = rn } 384we treat it like this (occAnalRecBind): 385 3861. Note [Forming Rec groups] 387 Occurrence-analyse each right hand side, and build a 388 "Details" for each binding to capture the results. 389 Wrap the details in a LetrecNode, ready for SCC analysis. 390 All this is done by makeNode. 391 392 The edges of this graph are the "scope edges". 393 3942. Do SCC-analysis on these Nodes: 395 - Each CyclicSCC will become a new Rec 396 - Each AcyclicSCC will become a new NonRec 397 398 The key property is that every free variable of a binding is 399 accounted for by the scope edges, so that when we are done 400 everything is still in scope. 401 4023. For each AcyclicSCC, just make a NonRec binding. 403 4044. For each CyclicSCC of the scope-edge SCC-analysis in (2), we 405 identify suitable loop-breakers to ensure that inlining terminates. 406 This is done by occAnalRec. 407 408 To do so, form the loop-breaker graph, do SCC analysis. For each 409 CyclicSCC we choose a loop breaker, delete all edges to that node, 410 re-analyse the SCC, and iterate. See Note [Choosing loop breakers] 411 for the details 412 413 414Note [Dead code] 415~~~~~~~~~~~~~~~~ 416Dropping dead code for a cyclic Strongly Connected Component is done 417in a very simple way: 418 419 the entire SCC is dropped if none of its binders are mentioned 420 in the body; otherwise the whole thing is kept. 421 422The key observation is that dead code elimination happens after 423dependency analysis: so 'occAnalBind' processes SCCs instead of the 424original term's binding groups. 425 426Thus 'occAnalBind' does indeed drop 'f' in an example like 427 428 letrec f = ...g... 429 g = ...(...g...)... 430 in 431 ...g... 432 433when 'g' no longer uses 'f' at all (eg 'f' does not occur in a RULE in 434'g'). 'occAnalBind' first consumes 'CyclicSCC g' and then it consumes 435'AcyclicSCC f', where 'body_usage' won't contain 'f'. 436 437Note [Forming Rec groups] 438~~~~~~~~~~~~~~~~~~~~~~~~~ 439The key point about the "Forming Rec groups" step is that it /preserves 440scoping/. If 'x' is mentioned, it had better be bound somewhere. So if 441we start with 442 Rec { f = ...h... 443 ; g = ...f... 444 ; h = ...f... } 445we can split into SCCs 446 Rec { f = ...h... 447 ; h = ..f... } 448 NonRec { g = ...f... } 449 450We put bindings {f = ef; g = eg } in a Rec group if "f uses g" and "g 451uses f", no matter how indirectly. We do a SCC analysis with an edge 452f -> g if "f mentions g". That is, g is free in: 453 a) the rhs 'ef' 454 b) or the RHS of a rule for f, whether active or inactive 455 Note [Rules are extra RHSs] 456 c) or the LHS or a rule for f, whether active or inactive 457 Note [Rule dependency info] 458 d) the RHS of an /active/ local IMP-RULE 459 Note [IMP-RULES: local rules for imported functions] 460 461(b) and (c) apply regardless of the activation of the RULE, because even if 462the rule is inactive its free variables must be bound. But (d) doesn't need 463to worry about this because IMP-RULES are always notionally at the bottom 464of the file. 465 466 * Note [Rules are extra RHSs] 467 ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 468 A RULE for 'f' is like an extra RHS for 'f'. That way the "parent" 469 keeps the specialised "children" alive. If the parent dies 470 (because it isn't referenced any more), then the children will die 471 too (unless they are already referenced directly). 472 473 So in Example [eftInt], eftInt and eftIntFB will be put in the 474 same Rec, even though their 'main' RHSs are both non-recursive. 475 476 We must also include inactive rules, so that their free vars 477 remain in scope. 478 479 * Note [Rule dependency info] 480 ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 481 The VarSet in a RuleInfo is used for dependency analysis in the 482 occurrence analyser. We must track free vars in *both* lhs and rhs. 483 Hence use of idRuleVars, rather than idRuleRhsVars in occAnalBind. 484 Why both? Consider 485 x = y 486 RULE f x = v+4 487 Then if we substitute y for x, we'd better do so in the 488 rule's LHS too, so we'd better ensure the RULE appears to mention 'x' 489 as well as 'v' 490 491 * Note [Rules are visible in their own rec group] 492 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 493 We want the rules for 'f' to be visible in f's right-hand side. 494 And we'd like them to be visible in other functions in f's Rec 495 group. E.g. in Note [Specialisation rules] we want f' rule 496 to be visible in both f's RHS, and fs's RHS. 497 498 This means that we must simplify the RULEs first, before looking 499 at any of the definitions. This is done by Simplify.simplRecBind, 500 when it calls addLetIdInfo. 501 502Note [Stable unfoldings] 503~~~~~~~~~~~~~~~~~~~~~~~~ 504None of the above stuff about RULES applies to a stable unfolding 505stored in a CoreUnfolding. The unfolding, if any, is simplified 506at the same time as the regular RHS of the function (ie *not* like 507Note [Rules are visible in their own rec group]), so it should be 508treated *exactly* like an extra RHS. 509 510Or, rather, when computing loop-breaker edges, 511 * If f has an INLINE pragma, and it is active, we treat the 512 INLINE rhs as f's rhs 513 * If it's inactive, we treat f as having no rhs 514 * If it has no INLINE pragma, we look at f's actual rhs 515 516 517There is a danger that we'll be sub-optimal if we see this 518 f = ...f... 519 [INLINE f = ..no f...] 520where f is recursive, but the INLINE is not. This can just about 521happen with a sufficiently odd set of rules; eg 522 523 foo :: Int -> Int 524 {-# INLINE [1] foo #-} 525 foo x = x+1 526 527 bar :: Int -> Int 528 {-# INLINE [1] bar #-} 529 bar x = foo x + 1 530 531 {-# RULES "foo" [~1] forall x. foo x = bar x #-} 532 533Here the RULE makes bar recursive; but it's INLINE pragma remains 534non-recursive. It's tempting to then say that 'bar' should not be 535a loop breaker, but an attempt to do so goes wrong in two ways: 536 a) We may get 537 $df = ...$cfoo... 538 $cfoo = ...$df.... 539 [INLINE $cfoo = ...no-$df...] 540 But we want $cfoo to depend on $df explicitly so that we 541 put the bindings in the right order to inline $df in $cfoo 542 and perhaps break the loop altogether. (Maybe this 543 b) 544 545 546Example [eftInt] 547~~~~~~~~~~~~~~~ 548Example (from GHC.Enum): 549 550 eftInt :: Int# -> Int# -> [Int] 551 eftInt x y = ...(non-recursive)... 552 553 {-# INLINE [0] eftIntFB #-} 554 eftIntFB :: (Int -> r -> r) -> r -> Int# -> Int# -> r 555 eftIntFB c n x y = ...(non-recursive)... 556 557 {-# RULES 558 "eftInt" [~1] forall x y. eftInt x y = build (\ c n -> eftIntFB c n x y) 559 "eftIntList" [1] eftIntFB (:) [] = eftInt 560 #-} 561 562Note [Specialisation rules] 563~~~~~~~~~~~~~~~~~~~~~~~~~~~ 564Consider this group, which is typical of what SpecConstr builds: 565 566 fs a = ....f (C a).... 567 f x = ....f (C a).... 568 {-# RULE f (C a) = fs a #-} 569 570So 'f' and 'fs' are in the same Rec group (since f refers to fs via its RULE). 571 572But watch out! If 'fs' is not chosen as a loop breaker, we may get an infinite loop: 573 - the RULE is applied in f's RHS (see Note [Self-recursive rules] in GHC.Core.Opt.Simplify 574 - fs is inlined (say it's small) 575 - now there's another opportunity to apply the RULE 576 577This showed up when compiling Control.Concurrent.Chan.getChanContents. 578Hence the transitive rule_fv_env stuff described in 579Note [Rules and loop breakers]. 580 581------------------------------------------------------------ 582Note [Finding join points] 583~~~~~~~~~~~~~~~~~~~~~~~~~~ 584It's the occurrence analyser's job to find bindings that we can turn into join 585points, but it doesn't perform that transformation right away. Rather, it marks 586the eligible bindings as part of their occurrence data, leaving it to the 587simplifier (or to simpleOptPgm) to actually change the binder's 'IdDetails'. 588The simplifier then eta-expands the RHS if needed and then updates the 589occurrence sites. Dividing the work this way means that the occurrence analyser 590still only takes one pass, yet one can always tell the difference between a 591function call and a jump by looking at the occurrence (because the same pass 592changes the 'IdDetails' and propagates the binders to their occurrence sites). 593 594To track potential join points, we use the 'occ_tail' field of OccInfo. A value 595of `AlwaysTailCalled n` indicates that every occurrence of the variable is a 596tail call with `n` arguments (counting both value and type arguments). Otherwise 597'occ_tail' will be 'NoTailCallInfo'. The tail call info flows bottom-up with the 598rest of 'OccInfo' until it goes on the binder. 599 600Note [Join points and unfoldings/rules] 601~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 602Consider 603 let j2 y = blah 604 let j x = j2 (x+x) 605 {-# INLINE [2] j #-} 606 in case e of { A -> j 1; B -> ...; C -> j 2 } 607 608Before j is inlined, we'll have occurrences of j2 in 609both j's RHS and in its stable unfolding. We want to discover 610j2 as a join point. So we must do the adjustRhsUsage thing 611on j's RHS. That's why we pass mb_join_arity to calcUnfolding. 612 613Aame with rules. Suppose we have: 614 615 let j :: Int -> Int 616 j y = 2 * y 617 let k :: Int -> Int -> Int 618 {-# RULES "SPEC k 0" k 0 y = j y #-} 619 k x y = x + 2 * y 620 in case e of { A -> k 1 2; B -> k 3 5; C -> blah } 621 622We identify k as a join point, and we want j to be a join point too. 623Without the RULE it would be, and we don't want the RULE to mess it 624up. So provided the join-point arity of k matches the args of the 625rule we can allow the tail-cal info from the RHS of the rule to 626propagate. 627 628* Wrinkle for Rec case. In the recursive case we don't know the 629 join-point arity in advance, when calling occAnalUnfolding and 630 occAnalRules. (See makeNode.) We don't want to pass Nothing, 631 because then a recursive joinrec might lose its join-poin-hood 632 when SpecConstr adds a RULE. So we just make do with the 633 *current* join-poin-hood, stored in the Id. 634 635 In the non-recursive case things are simple: see occAnalNonRecBind 636 637* Wrinkle for RULES. Suppose the example was a bit different: 638 let j :: Int -> Int 639 j y = 2 * y 640 k :: Int -> Int -> Int 641 {-# RULES "SPEC k 0" k 0 = j #-} 642 k x y = x + 2 * y 643 in ... 644 If we eta-expanded the rule all would be well, but as it stands the 645 one arg of the rule don't match the join-point arity of 2. 646 647 Conceivably we could notice that a potential join point would have 648 an "undersaturated" rule and account for it. This would mean we 649 could make something that's been specialised a join point, for 650 instance. But local bindings are rarely specialised, and being 651 overly cautious about rules only costs us anything when, for some `j`: 652 653 * Before specialisation, `j` has non-tail calls, so it can't be a join point. 654 * During specialisation, `j` gets specialised and thus acquires rules. 655 * Sometime afterward, the non-tail calls to `j` disappear (as dead code, say), 656 and so now `j` *could* become a join point. 657 658 This appears to be very rare in practice. TODO Perhaps we should gather 659 statistics to be sure. 660 661Note [Unfoldings and join points] 662~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 663We assume that anything in an unfolding occurs multiple times, since 664unfoldings are often copied (that's the whole point!). But we still 665need to track tail calls for the purpose of finding join points. 666 667 668------------------------------------------------------------ 669Note [Adjusting right-hand sides] 670~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 671There's a bit of a dance we need to do after analysing a lambda expression or 672a right-hand side. In particular, we need to 673 674 a) call 'markAllInsideLam' *unless* the binding is for a thunk, a one-shot 675 lambda, or a non-recursive join point; and 676 b) call 'markAllNonTail' *unless* the binding is for a join point. 677 678Some examples, with how the free occurrences in e (assumed not to be a value 679lambda) get marked: 680 681 inside lam non-tail-called 682 ------------------------------------------------------------ 683 let x = e No Yes 684 let f = \x -> e Yes Yes 685 let f = \x{OneShot} -> e No Yes 686 \x -> e Yes Yes 687 join j x = e No No 688 joinrec j x = e Yes No 689 690There are a few other caveats; most importantly, if we're marking a binding as 691'AlwaysTailCalled', it's *going* to be a join point, so we treat it as one so 692that the effect cascades properly. Consequently, at the time the RHS is 693analysed, we won't know what adjustments to make; thus 'occAnalLamOrRhs' must 694return the unadjusted 'UsageDetails', to be adjusted by 'adjustRhsUsage' once 695join-point-hood has been decided. 696 697Thus the overall sequence taking place in 'occAnalNonRecBind' and 698'occAnalRecBind' is as follows: 699 700 1. Call 'occAnalLamOrRhs' to find usage information for the RHS. 701 2. Call 'tagNonRecBinder' or 'tagRecBinders', which decides whether to make 702 the binding a join point. 703 3. Call 'adjustRhsUsage' accordingly. (Done as part of 'tagRecBinders' when 704 recursive.) 705 706(In the recursive case, this logic is spread between 'makeNode' and 707'occAnalRec'.) 708-} 709 710------------------------------------------------------------------ 711-- occAnalBind 712------------------------------------------------------------------ 713 714occAnalBind :: OccEnv -- The incoming OccEnv 715 -> TopLevelFlag 716 -> ImpRuleEdges 717 -> CoreBind 718 -> UsageDetails -- Usage details of scope 719 -> (UsageDetails, -- Of the whole let(rec) 720 [CoreBind]) 721 722occAnalBind env lvl top_env (NonRec binder rhs) body_usage 723 = occAnalNonRecBind env lvl top_env binder rhs body_usage 724occAnalBind env lvl top_env (Rec pairs) body_usage 725 = occAnalRecBind env lvl top_env pairs body_usage 726 727----------------- 728occAnalNonRecBind :: OccEnv -> TopLevelFlag -> ImpRuleEdges -> Var -> CoreExpr 729 -> UsageDetails -> (UsageDetails, [CoreBind]) 730occAnalNonRecBind env lvl imp_rule_edges bndr rhs body_usage 731 | isTyVar bndr -- A type let; we don't gather usage info 732 = (body_usage, [NonRec bndr rhs]) 733 734 | not (bndr `usedIn` body_usage) -- It's not mentioned 735 = (body_usage, []) 736 737 | otherwise -- It's mentioned in the body 738 = (body_usage' `andUDs` rhs_usage, [NonRec final_bndr rhs']) 739 where 740 (body_usage', tagged_bndr) = tagNonRecBinder lvl body_usage bndr 741 final_bndr = tagged_bndr `setIdUnfolding` unf' 742 `setIdSpecialisation` mkRuleInfo rules' 743 rhs_usage = rhs_uds `andUDs` unf_uds `andUDs` rule_uds 744 745 -- Get the join info from the *new* decision 746 -- See Note [Join points and unfoldings/rules] 747 mb_join_arity = willBeJoinId_maybe tagged_bndr 748 is_join_point = isJust mb_join_arity 749 750 --------- Right hand side --------- 751 env1 | is_join_point = env -- See Note [Join point RHSs] 752 | certainly_inline = env -- See Note [Cascading inlines] 753 | otherwise = rhsCtxt env 754 755 -- See Note [Sources of one-shot information] 756 rhs_env = env1 { occ_one_shots = argOneShots dmd } 757 (rhs_uds, rhs') = occAnalRhs rhs_env NonRecursive mb_join_arity rhs 758 759 --------- Unfolding --------- 760 -- See Note [Unfoldings and join points] 761 unf = idUnfolding bndr 762 (unf_uds, unf') = occAnalUnfolding rhs_env NonRecursive mb_join_arity unf 763 764 --------- Rules --------- 765 -- See Note [Rules are extra RHSs] and Note [Rule dependency info] 766 rules_w_uds = occAnalRules rhs_env mb_join_arity bndr 767 rules' = map fstOf3 rules_w_uds 768 imp_rule_uds = impRulesScopeUsage (lookupImpRules imp_rule_edges bndr) 769 -- imp_rule_uds: consider 770 -- h = ... 771 -- g = ... 772 -- RULE map g = h 773 -- Then we want to ensure that h is in scope everwhere 774 -- that g is (since the RULE might turn g into h), so 775 -- we make g mention h. 776 777 rule_uds = foldr add_rule_uds imp_rule_uds rules_w_uds 778 add_rule_uds (_, l, r) uds = l `andUDs` r `andUDs` uds 779 780 ---------- 781 occ = idOccInfo tagged_bndr 782 certainly_inline -- See Note [Cascading inlines] 783 = case occ of 784 OneOcc { occ_in_lam = NotInsideLam, occ_n_br = 1 } 785 -> active && not_stable 786 _ -> False 787 788 dmd = idDemandInfo bndr 789 active = isAlwaysActive (idInlineActivation bndr) 790 not_stable = not (isStableUnfolding (idUnfolding bndr)) 791 792----------------- 793occAnalRecBind :: OccEnv -> TopLevelFlag -> ImpRuleEdges -> [(Var,CoreExpr)] 794 -> UsageDetails -> (UsageDetails, [CoreBind]) 795-- For a recursive group, we 796-- * occ-analyse all the RHSs 797-- * compute strongly-connected components 798-- * feed those components to occAnalRec 799-- See Note [Recursive bindings: the grand plan] 800occAnalRecBind env lvl imp_rule_edges pairs body_usage 801 = foldr (occAnalRec rhs_env lvl) (body_usage, []) sccs 802 where 803 sccs :: [SCC Details] 804 sccs = {-# SCC "occAnalBind.scc" #-} 805 stronglyConnCompFromEdgedVerticesUniq nodes 806 807 nodes :: [LetrecNode] 808 nodes = {-# SCC "occAnalBind.assoc" #-} 809 map (makeNode rhs_env imp_rule_edges bndr_set) pairs 810 811 bndrs = map fst pairs 812 bndr_set = mkVarSet bndrs 813 rhs_env = env `addInScope` bndrs 814 815 816----------------------------- 817occAnalRec :: OccEnv -> TopLevelFlag 818 -> SCC Details 819 -> (UsageDetails, [CoreBind]) 820 -> (UsageDetails, [CoreBind]) 821 822 -- The NonRec case is just like a Let (NonRec ...) above 823occAnalRec _ lvl (AcyclicSCC (ND { nd_bndr = bndr, nd_rhs = rhs 824 , nd_uds = rhs_uds, nd_rhs_bndrs = rhs_bndrs })) 825 (body_uds, binds) 826 | not (bndr `usedIn` body_uds) 827 = (body_uds, binds) -- See Note [Dead code] 828 829 | otherwise -- It's mentioned in the body 830 = (body_uds' `andUDs` rhs_uds', 831 NonRec tagged_bndr rhs : binds) 832 where 833 (body_uds', tagged_bndr) = tagNonRecBinder lvl body_uds bndr 834 rhs_uds' = adjustRhsUsage NonRecursive (willBeJoinId_maybe tagged_bndr) 835 rhs_bndrs rhs_uds 836 837 -- The Rec case is the interesting one 838 -- See Note [Recursive bindings: the grand plan] 839 -- See Note [Loop breaking] 840occAnalRec env lvl (CyclicSCC details_s) (body_uds, binds) 841 | not (any (`usedIn` body_uds) bndrs) -- NB: look at body_uds, not total_uds 842 = (body_uds, binds) -- See Note [Dead code] 843 844 | otherwise -- At this point we always build a single Rec 845 = -- pprTrace "occAnalRec" (ppr loop_breaker_nodes) 846 (final_uds, Rec pairs : binds) 847 848 where 849 bndrs = map nd_bndr details_s 850 all_simple = all nd_simple details_s 851 852 ------------------------------ 853 -- Make the nodes for the loop-breaker analysis 854 -- See Note [Choosing loop breakers] for loop_breaker_nodes 855 final_uds :: UsageDetails 856 loop_breaker_nodes :: [LetrecNode] 857 (final_uds, loop_breaker_nodes) = mkLoopBreakerNodes env lvl body_uds details_s 858 859 ------------------------------ 860 active_rule_fvs :: VarSet 861 active_rule_fvs = mapUnionVarSet nd_active_rule_fvs details_s 862 863 --------------------------- 864 -- Now reconstruct the cycle 865 pairs :: [(Id,CoreExpr)] 866 pairs | all_simple = reOrderNodes 0 active_rule_fvs loop_breaker_nodes [] 867 | otherwise = loopBreakNodes 0 active_rule_fvs loop_breaker_nodes [] 868 -- In the common case when all are "simple" (no rules at all) 869 -- the loop_breaker_nodes will include all the scope edges 870 -- so a SCC computation would yield a single CyclicSCC result; 871 -- and reOrderNodes deals with exactly that case. 872 -- Saves a SCC analysis in a common case 873 874 875{- ********************************************************************* 876* * 877 Loop breaking 878* * 879********************************************************************* -} 880 881{- Note [Choosing loop breakers] 882~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 883In Step 4 in Note [Recursive bindings: the grand plan]), occAnalRec does 884loop-breaking on each CyclicSCC of the original program: 885 886* mkLoopBreakerNodes: Form the loop-breaker graph for that CyclicSCC 887 888* loopBreakNodes: Do SCC analysis on it 889 890* reOrderNodes: For each CyclicSCC, pick a loop breaker 891 * Delete edges to that loop breaker 892 * Do another SCC analysis on that reduced SCC 893 * Repeat 894 895To form the loop-breaker graph, we construct a new set of Nodes, the 896"loop-breaker nodes", with the same details but different edges, the 897"loop-breaker edges". The loop-breaker nodes have both more and fewer 898dependencies than the scope edges: 899 900 More edges: 901 If f calls g, and g has an active rule that mentions h then 902 we add an edge from f -> h. See Note [Rules and loop breakers]. 903 904 Fewer edges: we only include dependencies 905 * only on /active/ rules, 906 * on rule /RHSs/ (not LHSs) 907 908The scope edges, by contrast, must be much more inclusive. 909 910The nd_simple flag tracks the common case when a binding has no RULES 911at all, in which case the loop-breaker edges will be identical to the 912scope edges. 913 914Note that in Example [eftInt], *neither* eftInt *nor* eftIntFB is 915chosen as a loop breaker, because their RHSs don't mention each other. 916And indeed both can be inlined safely. 917 918Note [inl_fvs] 919~~~~~~~~~~~~~~ 920Note that the loop-breaker graph includes edges for occurrences in 921/both/ the RHS /and/ the stable unfolding. Consider this, which actually 922occurred when compiling BooleanFormula.hs in GHC: 923 924 Rec { lvl1 = go 925 ; lvl2[StableUnf = go] = lvl1 926 ; go = ...go...lvl2... } 927 928From the point of view of infinite inlining, we need only these edges: 929 lvl1 :-> go 930 lvl2 :-> go -- The RHS lvl1 will never be used for inlining 931 go :-> go, lvl2 932 933But the danger is that, lacking any edge to lvl1, we'll put it at the 934end thus 935 Rec { lvl2[ StableUnf = go] = lvl1 936 ; go[LoopBreaker] = ...go...lvl2... } 937 ; lvl1[Occ=Once] = go } 938 939And now the Simplifer will try to use PreInlineUnconditionally on lvl1 940(which occurs just once), but because it is last we won't actually 941substitute in lvl2. Sigh. 942 943To avoid this possiblity, we include edges from lvl2 to /both/ its 944stable unfolding /and/ its RHS. Hence the defn of inl_fvs in 945makeNode. Maybe we could be more clever, but it's very much a corner 946case. 947 948Note [Weak loop breakers] 949~~~~~~~~~~~~~~~~~~~~~~~~~ 950There is a last nasty wrinkle. Suppose we have 951 952 Rec { f = f_rhs 953 RULE f [] = g 954 955 h = h_rhs 956 g = h 957 ...more... 958 } 959 960Remember that we simplify the RULES before any RHS (see Note 961[Rules are visible in their own rec group] above). 962 963So we must *not* postInlineUnconditionally 'g', even though 964its RHS turns out to be trivial. (I'm assuming that 'g' is 965not chosen as a loop breaker.) Why not? Because then we 966drop the binding for 'g', which leaves it out of scope in the 967RULE! 968 969Here's a somewhat different example of the same thing 970 Rec { q = r 971 ; r = ...p... 972 ; p = p_rhs 973 RULE p [] = q } 974Here the RULE is "below" q, but we *still* can't postInlineUnconditionally 975q, because the RULE for p is active throughout. So the RHS of r 976might rewrite to r = ...q... 977So q must remain in scope in the output program! 978 979We "solve" this by: 980 981 Make q a "weak" loop breaker (OccInfo = IAmLoopBreaker True) 982 iff q is a mentioned in the RHS of an active RULE in the Rec group 983 984A normal "strong" loop breaker has IAmLoopBreaker False. So: 985 986 Inline postInlineUnconditionally 987strong IAmLoopBreaker False no no 988weak IAmLoopBreaker True yes no 989 other yes yes 990 991The **sole** reason for this kind of loop breaker is so that 992postInlineUnconditionally does not fire. Ugh. 993 994Annoyingly, since we simplify the rules *first* we'll never inline 995q into p's RULE. That trivial binding for q will hang around until 996we discard the rule. Yuk. But it's rare. 997 998 Note [Rules and loop breakers] 999~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1000When we form the loop-breaker graph (Step 4 in Note [Recursive 1001bindings: the grand plan]), we must be careful about RULEs. 1002 1003For a start, we want a loop breaker to cut every cycle, so inactive 1004rules play no part; we need only consider /active/ rules. 1005See Note [Finding rule RHS free vars] 1006 1007The second point is more subtle. A RULE is like an equation for 1008'f' that is *always* inlined if it is applicable. We do *not* disable 1009rules for loop-breakers. It's up to whoever makes the rules to make 1010sure that the rules themselves always terminate. See Note [Rules for 1011recursive functions] in GHC.Core.Opt.Simplify 1012 1013Hence, if 1014 f's RHS (or its stable unfolding if it has one) mentions g, and 1015 g has a RULE that mentions h, and 1016 h has a RULE that mentions f 1017 1018then we *must* choose f to be a loop breaker. Example: see Note 1019[Specialisation rules]. So out plan is this: 1020 1021 Take the free variables of f's RHS, and augment it with all the 1022 variables reachable by a transitive sequence RULES from those 1023 starting points. 1024 1025That is the whole reason for computing rule_fv_env in mkLoopBreakerNodes. 1026Wrinkles: 1027 1028* We only consider /active/ rules. See Note [Finding rule RHS free vars] 1029 1030* We need only consider free vars that are also binders in this Rec 1031 group. See also Note [Finding rule RHS free vars] 1032 1033* We only consider variables free in the *RHS* of the rule, in 1034 contrast to the way we build the Rec group in the first place (Note 1035 [Rule dependency info]) 1036 1037* Why "transitive sequence of rules"? Because active rules apply 1038 unconditionally, without checking loop-breaker-ness. 1039 See Note [Loop breaker dependencies]. 1040 1041Note [Finding rule RHS free vars] 1042~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1043Consider this real example from Data Parallel Haskell 1044 tagZero :: Array Int -> Array Tag 1045 {-# INLINE [1] tagZeroes #-} 1046 tagZero xs = pmap (\x -> fromBool (x==0)) xs 1047 1048 {-# RULES "tagZero" [~1] forall xs n. 1049 pmap fromBool <blah blah> = tagZero xs #-} 1050So tagZero's RHS mentions pmap, and pmap's RULE mentions tagZero. 1051However, tagZero can only be inlined in phase 1 and later, while 1052the RULE is only active *before* phase 1. So there's no problem. 1053 1054To make this work, we look for the RHS free vars only for 1055*active* rules. That's the reason for the occ_rule_act field 1056of the OccEnv. 1057 1058Note [loopBreakNodes] 1059~~~~~~~~~~~~~~~~~~~~~ 1060loopBreakNodes is applied to the list of nodes for a cyclic strongly 1061connected component (there's guaranteed to be a cycle). It returns 1062the same nodes, but 1063 a) in a better order, 1064 b) with some of the Ids having a IAmALoopBreaker pragma 1065 1066The "loop-breaker" Ids are sufficient to break all cycles in the SCC. This means 1067that the simplifier can guarantee not to loop provided it never records an inlining 1068for these no-inline guys. 1069 1070Furthermore, the order of the binds is such that if we neglect dependencies 1071on the no-inline Ids then the binds are topologically sorted. This means 1072that the simplifier will generally do a good job if it works from top bottom, 1073recording inlinings for any Ids which aren't marked as "no-inline" as it goes. 1074-} 1075 1076type Binding = (Id,CoreExpr) 1077 1078-- See Note [loopBreakNodes] 1079loopBreakNodes :: Int 1080 -> VarSet -- Binders whose dependencies may be "missing" 1081 -- See Note [Weak loop breakers] 1082 -> [LetrecNode] 1083 -> [Binding] -- Append these to the end 1084 -> [Binding] 1085 1086-- Return the bindings sorted into a plausible order, and marked with loop breakers. 1087-- See Note [loopBreakNodes] 1088loopBreakNodes depth weak_fvs nodes binds 1089 = -- pprTrace "loopBreakNodes" (ppr nodes) $ 1090 go (stronglyConnCompFromEdgedVerticesUniqR nodes) 1091 where 1092 go [] = binds 1093 go (scc:sccs) = loop_break_scc scc (go sccs) 1094 1095 loop_break_scc scc binds 1096 = case scc of 1097 AcyclicSCC node -> nodeBinding (mk_non_loop_breaker weak_fvs) node : binds 1098 CyclicSCC nodes -> reOrderNodes depth weak_fvs nodes binds 1099 1100---------------------------------- 1101reOrderNodes :: Int -> VarSet -> [LetrecNode] -> [Binding] -> [Binding] 1102 -- Choose a loop breaker, mark it no-inline, 1103 -- and call loopBreakNodes on the rest 1104reOrderNodes _ _ [] _ = panic "reOrderNodes" 1105reOrderNodes _ _ [node] binds = nodeBinding mk_loop_breaker node : binds 1106reOrderNodes depth weak_fvs (node : nodes) binds 1107 = -- pprTrace "reOrderNodes" (vcat [ text "unchosen" <+> ppr unchosen 1108 -- , text "chosen" <+> ppr chosen_nodes ]) $ 1109 loopBreakNodes new_depth weak_fvs unchosen $ 1110 (map (nodeBinding mk_loop_breaker) chosen_nodes ++ binds) 1111 where 1112 (chosen_nodes, unchosen) = chooseLoopBreaker approximate_lb 1113 (nd_score (node_payload node)) 1114 [node] [] nodes 1115 1116 approximate_lb = depth >= 2 1117 new_depth | approximate_lb = 0 1118 | otherwise = depth+1 1119 -- After two iterations (d=0, d=1) give up 1120 -- and approximate, returning to d=0 1121 1122nodeBinding :: (Id -> Id) -> LetrecNode -> Binding 1123nodeBinding set_id_occ (node_payload -> ND { nd_bndr = bndr, nd_rhs = rhs}) 1124 = (set_id_occ bndr, rhs) 1125 1126mk_loop_breaker :: Id -> Id 1127mk_loop_breaker bndr 1128 = bndr `setIdOccInfo` occ' 1129 where 1130 occ' = strongLoopBreaker { occ_tail = tail_info } 1131 tail_info = tailCallInfo (idOccInfo bndr) 1132 1133mk_non_loop_breaker :: VarSet -> Id -> Id 1134-- See Note [Weak loop breakers] 1135mk_non_loop_breaker weak_fvs bndr 1136 | bndr `elemVarSet` weak_fvs = setIdOccInfo bndr occ' 1137 | otherwise = bndr 1138 where 1139 occ' = weakLoopBreaker { occ_tail = tail_info } 1140 tail_info = tailCallInfo (idOccInfo bndr) 1141 1142---------------------------------- 1143chooseLoopBreaker :: Bool -- True <=> Too many iterations, 1144 -- so approximate 1145 -> NodeScore -- Best score so far 1146 -> [LetrecNode] -- Nodes with this score 1147 -> [LetrecNode] -- Nodes with higher scores 1148 -> [LetrecNode] -- Unprocessed nodes 1149 -> ([LetrecNode], [LetrecNode]) 1150 -- This loop looks for the bind with the lowest score 1151 -- to pick as the loop breaker. The rest accumulate in 1152chooseLoopBreaker _ _ loop_nodes acc [] 1153 = (loop_nodes, acc) -- Done 1154 1155 -- If approximate_loop_breaker is True, we pick *all* 1156 -- nodes with lowest score, else just one 1157 -- See Note [Complexity of loop breaking] 1158chooseLoopBreaker approx_lb loop_sc loop_nodes acc (node : nodes) 1159 | approx_lb 1160 , rank sc == rank loop_sc 1161 = chooseLoopBreaker approx_lb loop_sc (node : loop_nodes) acc nodes 1162 1163 | sc `betterLB` loop_sc -- Better score so pick this new one 1164 = chooseLoopBreaker approx_lb sc [node] (loop_nodes ++ acc) nodes 1165 1166 | otherwise -- Worse score so don't pick it 1167 = chooseLoopBreaker approx_lb loop_sc loop_nodes (node : acc) nodes 1168 where 1169 sc = nd_score (node_payload node) 1170 1171{- 1172Note [Complexity of loop breaking] 1173~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1174The loop-breaking algorithm knocks out one binder at a time, and 1175performs a new SCC analysis on the remaining binders. That can 1176behave very badly in tightly-coupled groups of bindings; in the 1177worst case it can be (N**2)*log N, because it does a full SCC 1178on N, then N-1, then N-2 and so on. 1179 1180To avoid this, we switch plans after 2 (or whatever) attempts: 1181 Plan A: pick one binder with the lowest score, make it 1182 a loop breaker, and try again 1183 Plan B: pick *all* binders with the lowest score, make them 1184 all loop breakers, and try again 1185Since there are only a small finite number of scores, this will 1186terminate in a constant number of iterations, rather than O(N) 1187iterations. 1188 1189You might thing that it's very unlikely, but RULES make it much 1190more likely. Here's a real example from #1969: 1191 Rec { $dm = \d.\x. op d 1192 {-# RULES forall d. $dm Int d = $s$dm1 1193 forall d. $dm Bool d = $s$dm2 #-} 1194 1195 dInt = MkD .... opInt ... 1196 dInt = MkD .... opBool ... 1197 opInt = $dm dInt 1198 opBool = $dm dBool 1199 1200 $s$dm1 = \x. op dInt 1201 $s$dm2 = \x. op dBool } 1202The RULES stuff means that we can't choose $dm as a loop breaker 1203(Note [Choosing loop breakers]), so we must choose at least (say) 1204opInt *and* opBool, and so on. The number of loop breakders is 1205linear in the number of instance declarations. 1206 1207Note [Loop breakers and INLINE/INLINABLE pragmas] 1208~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1209Avoid choosing a function with an INLINE pramga as the loop breaker! 1210If such a function is mutually-recursive with a non-INLINE thing, 1211then the latter should be the loop-breaker. 1212 1213It's vital to distinguish between INLINE and INLINABLE (the 1214Bool returned by hasStableCoreUnfolding_maybe). If we start with 1215 Rec { {-# INLINABLE f #-} 1216 f x = ...f... } 1217and then worker/wrapper it through strictness analysis, we'll get 1218 Rec { {-# INLINABLE $wf #-} 1219 $wf p q = let x = (p,q) in ...f... 1220 1221 {-# INLINE f #-} 1222 f x = case x of (p,q) -> $wf p q } 1223 1224Now it is vital that we choose $wf as the loop breaker, so we can 1225inline 'f' in '$wf'. 1226 1227Note [DFuns should not be loop breakers] 1228~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1229It's particularly bad to make a DFun into a loop breaker. See 1230Note [How instance declarations are translated] in GHC.Tc.TyCl.Instance 1231 1232We give DFuns a higher score than ordinary CONLIKE things because 1233if there's a choice we want the DFun to be the non-loop breaker. Eg 1234 1235rec { sc = /\ a \$dC. $fBWrap (T a) ($fCT @ a $dC) 1236 1237 $fCT :: forall a_afE. (Roman.C a_afE) => Roman.C (Roman.T a_afE) 1238 {-# DFUN #-} 1239 $fCT = /\a \$dC. MkD (T a) ((sc @ a $dC) |> blah) ($ctoF @ a $dC) 1240 } 1241 1242Here 'sc' (the superclass) looks CONLIKE, but we'll never get to it 1243if we can't unravel the DFun first. 1244 1245Note [Constructor applications] 1246~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1247It's really really important to inline dictionaries. Real 1248example (the Enum Ordering instance from GHC.Base): 1249 1250 rec f = \ x -> case d of (p,q,r) -> p x 1251 g = \ x -> case d of (p,q,r) -> q x 1252 d = (v, f, g) 1253 1254Here, f and g occur just once; but we can't inline them into d. 1255On the other hand we *could* simplify those case expressions if 1256we didn't stupidly choose d as the loop breaker. 1257But we won't because constructor args are marked "Many". 1258Inlining dictionaries is really essential to unravelling 1259the loops in static numeric dictionaries, see GHC.Float. 1260 1261Note [Closure conversion] 1262~~~~~~~~~~~~~~~~~~~~~~~~~ 1263We treat (\x. C p q) as a high-score candidate in the letrec scoring algorithm. 1264The immediate motivation came from the result of a closure-conversion transformation 1265which generated code like this: 1266 1267 data Clo a b = forall c. Clo (c -> a -> b) c 1268 1269 ($:) :: Clo a b -> a -> b 1270 Clo f env $: x = f env x 1271 1272 rec { plus = Clo plus1 () 1273 1274 ; plus1 _ n = Clo plus2 n 1275 1276 ; plus2 Zero n = n 1277 ; plus2 (Succ m) n = Succ (plus $: m $: n) } 1278 1279If we inline 'plus' and 'plus1', everything unravels nicely. But if 1280we choose 'plus1' as the loop breaker (which is entirely possible 1281otherwise), the loop does not unravel nicely. 1282 1283 1284@occAnalUnfolding@ deals with the question of bindings where the Id is marked 1285by an INLINE pragma. For these we record that anything which occurs 1286in its RHS occurs many times. This pessimistically assumes that this 1287inlined binder also occurs many times in its scope, but if it doesn't 1288we'll catch it next time round. At worst this costs an extra simplifier pass. 1289ToDo: try using the occurrence info for the inline'd binder. 1290 1291[March 97] We do the same for atomic RHSs. Reason: see notes with loopBreakSCC. 1292[June 98, SLPJ] I've undone this change; I don't understand it. See notes with loopBreakSCC. 1293 1294 1295************************************************************************ 1296* * 1297 Making nodes 1298* * 1299************************************************************************ 1300-} 1301 1302type LetrecNode = Node Unique Details -- Node comes from Digraph 1303 -- The Unique key is gotten from the Id 1304data Details 1305 = ND { nd_bndr :: Id -- Binder 1306 1307 , nd_rhs :: CoreExpr -- RHS, already occ-analysed 1308 1309 , nd_rhs_bndrs :: [CoreBndr] -- Outer lambdas of RHS 1310 -- INVARIANT: (nd_rhs_bndrs nd, _) == 1311 -- collectBinders (nd_rhs nd) 1312 1313 , nd_uds :: UsageDetails -- Usage from RHS, and RULES, and stable unfoldings 1314 -- ignoring phase (ie assuming all are active) 1315 -- See Note [Forming Rec groups] 1316 1317 , nd_inl :: IdSet -- Free variables of the stable unfolding and the RHS 1318 -- but excluding any RULES 1319 -- This is the IdSet that may be used if the Id is inlined 1320 1321 , nd_simple :: Bool -- True iff this binding has no local RULES 1322 -- If all nodes are simple we don't need a loop-breaker 1323 -- dep-anal before reconstructing. 1324 1325 , nd_active_rule_fvs :: IdSet -- Variables bound in this Rec group that are free 1326 -- in the RHS of an active rule for this bndr 1327 1328 , nd_score :: NodeScore 1329 } 1330 1331instance Outputable Details where 1332 ppr nd = text "ND" <> braces 1333 (sep [ text "bndr =" <+> ppr (nd_bndr nd) 1334 , text "uds =" <+> ppr (nd_uds nd) 1335 , text "inl =" <+> ppr (nd_inl nd) 1336 , text "simple =" <+> ppr (nd_simple nd) 1337 , text "active_rule_fvs =" <+> ppr (nd_active_rule_fvs nd) 1338 , text "score =" <+> ppr (nd_score nd) 1339 ]) 1340 1341-- The NodeScore is compared lexicographically; 1342-- e.g. lower rank wins regardless of size 1343type NodeScore = ( Int -- Rank: lower => more likely to be picked as loop breaker 1344 , Int -- Size of rhs: higher => more likely to be picked as LB 1345 -- Maxes out at maxExprSize; we just use it to prioritise 1346 -- small functions 1347 , Bool ) -- Was it a loop breaker before? 1348 -- True => more likely to be picked 1349 -- Note [Loop breakers, node scoring, and stability] 1350 1351rank :: NodeScore -> Int 1352rank (r, _, _) = r 1353 1354makeNode :: OccEnv -> ImpRuleEdges -> VarSet 1355 -> (Var, CoreExpr) -> LetrecNode 1356-- See Note [Recursive bindings: the grand plan] 1357makeNode env imp_rule_edges bndr_set (bndr, rhs) 1358 = DigraphNode { node_payload = details 1359 , node_key = varUnique bndr 1360 , node_dependencies = nonDetKeysUniqSet scope_fvs } 1361 -- It's OK to use nonDetKeysUniqSet here as stronglyConnCompFromEdgedVerticesR 1362 -- is still deterministic with edges in nondeterministic order as 1363 -- explained in Note [Deterministic SCC] in GHC.Data.Graph.Directed. 1364 where 1365 details = ND { nd_bndr = bndr' 1366 , nd_rhs = rhs' 1367 , nd_rhs_bndrs = bndrs' 1368 , nd_uds = scope_uds 1369 , nd_inl = inl_fvs 1370 , nd_simple = null rules_w_uds && null imp_rule_info 1371 , nd_active_rule_fvs = active_rule_fvs 1372 , nd_score = pprPanic "makeNodeDetails" (ppr bndr) } 1373 1374 bndr' = bndr `setIdUnfolding` unf' 1375 `setIdSpecialisation` mkRuleInfo rules' 1376 1377 inl_uds = rhs_uds `andUDs` unf_uds 1378 scope_uds = inl_uds `andUDs` rule_uds 1379 -- Note [Rules are extra RHSs] 1380 -- Note [Rule dependency info] 1381 scope_fvs = udFreeVars bndr_set scope_uds 1382 -- scope_fvs: all occurrences from this binder: RHS, unfolding, 1383 -- and RULES, both LHS and RHS thereof, active or inactive 1384 1385 inl_fvs = udFreeVars bndr_set inl_uds 1386 -- inl_fvs: vars that would become free if the function was inlined. 1387 -- We conservatively approximate that by thefree vars from the RHS 1388 -- and the unfolding together. 1389 -- See Note [inl_fvs] 1390 1391 mb_join_arity = isJoinId_maybe bndr 1392 -- Get join point info from the *current* decision 1393 -- We don't know what the new decision will be! 1394 -- Using the old decision at least allows us to 1395 -- preserve existing join point, even RULEs are added 1396 -- See Note [Join points and unfoldings/rules] 1397 1398 --------- Right hand side --------- 1399 -- Constructing the edges for the main Rec computation 1400 -- See Note [Forming Rec groups] 1401 -- Do not use occAnalRhs because we don't yet know 1402 -- the final answer for mb_join_arity 1403 (bndrs, body) = collectBinders rhs 1404 rhs_env = rhsCtxt env 1405 (rhs_uds, bndrs', body') = occAnalLamOrRhs rhs_env bndrs body 1406 rhs' = mkLams bndrs' body' 1407 1408 --------- Unfolding --------- 1409 -- See Note [Unfoldings and join points] 1410 unf = realIdUnfolding bndr -- realIdUnfolding: Ignore loop-breaker-ness 1411 -- here because that is what we are setting! 1412 (unf_uds, unf') = occAnalUnfolding rhs_env Recursive mb_join_arity unf 1413 1414 --------- IMP-RULES -------- 1415 is_active = occ_rule_act env :: Activation -> Bool 1416 imp_rule_info = lookupImpRules imp_rule_edges bndr 1417 imp_rule_uds = impRulesScopeUsage imp_rule_info 1418 imp_rule_fvs = impRulesActiveFvs is_active bndr_set imp_rule_info 1419 1420 --------- All rules -------- 1421 rules_w_uds :: [(CoreRule, UsageDetails, UsageDetails)] 1422 rules_w_uds = occAnalRules rhs_env mb_join_arity bndr 1423 rules' = map fstOf3 rules_w_uds 1424 1425 rule_uds = foldr add_rule_uds imp_rule_uds rules_w_uds 1426 add_rule_uds (_, l, r) uds = l `andUDs` r `andUDs` uds 1427 1428 active_rule_fvs = foldr add_active_rule imp_rule_fvs rules_w_uds 1429 add_active_rule (rule, _, rhs_uds) fvs 1430 | is_active (ruleActivation rule) 1431 = udFreeVars bndr_set rhs_uds `unionVarSet` fvs 1432 | otherwise 1433 = fvs 1434 1435 1436mkLoopBreakerNodes :: OccEnv -> TopLevelFlag 1437 -> UsageDetails -- for BODY of let 1438 -> [Details] 1439 -> (UsageDetails, -- adjusted 1440 [LetrecNode]) 1441-- See Note [Choosing loop breakers] 1442-- This function primarily creates the Nodes for the 1443-- loop-breaker SCC analysis. More specifically: 1444-- a) tag each binder with its occurrence info 1445-- b) add a NodeScore to each node 1446-- c) make a Node with the right dependency edges for 1447-- the loop-breaker SCC analysis 1448-- d) adjust each RHS's usage details according to 1449-- the binder's (new) shotness and join-point-hood 1450mkLoopBreakerNodes env lvl body_uds details_s 1451 = (final_uds, zipWithEqual "mkLoopBreakerNodes" mk_lb_node details_s bndrs') 1452 where 1453 (final_uds, bndrs') 1454 = tagRecBinders lvl body_uds 1455 [ (bndr, uds, rhs_bndrs) 1456 | ND { nd_bndr = bndr, nd_uds = uds, nd_rhs_bndrs = rhs_bndrs } 1457 <- details_s ] 1458 1459 mk_lb_node nd@(ND { nd_bndr = old_bndr, nd_inl = inl_fvs }) new_bndr 1460 = DigraphNode { node_payload = new_nd 1461 , node_key = varUnique old_bndr 1462 , node_dependencies = nonDetKeysUniqSet lb_deps } 1463 -- It's OK to use nonDetKeysUniqSet here as 1464 -- stronglyConnCompFromEdgedVerticesR is still deterministic with edges 1465 -- in nondeterministic order as explained in 1466 -- Note [Deterministic SCC] in GHC.Data.Graph.Directed. 1467 where 1468 new_nd = nd { nd_bndr = new_bndr, nd_score = score } 1469 score = nodeScore env new_bndr lb_deps nd 1470 lb_deps = extendFvs_ rule_fv_env inl_fvs 1471 -- See Note [Loop breaker dependencies] 1472 1473 rule_fv_env :: IdEnv IdSet 1474 -- Maps a variable f to the variables from this group 1475 -- reachable by a sequence of RULES starting with f 1476 -- Domain is *subset* of bound vars (others have no rule fvs) 1477 -- See Note [Finding rule RHS free vars] 1478 -- Why transClosureFV? See Note [Loop breaker dependencies] 1479 rule_fv_env = transClosureFV $ mkVarEnv $ 1480 [ (b, rule_fvs) 1481 | ND { nd_bndr = b, nd_active_rule_fvs = rule_fvs } <- details_s 1482 , not (isEmptyVarSet rule_fvs) ] 1483 1484{- Note [Loop breaker dependencies] 1485~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1486The loop breaker dependencies of x in a recursive 1487group { f1 = e1; ...; fn = en } are: 1488 1489- The "inline free variables" of f: the fi free in 1490 f's stable unfolding and RHS; see Note [inl_fvs] 1491 1492- Any fi reachable from those inline free variables by a sequence 1493 of RULE rewrites. Remember, rule rewriting is not affected 1494 by fi being a loop breaker, so we have to take the transitive 1495 closure in case f is the only possible loop breaker in the loop. 1496 1497 Hence rule_fv_env. We need only account for /active/ rules. 1498-} 1499 1500------------------------------------------ 1501nodeScore :: OccEnv 1502 -> Id -- Binder with new occ-info 1503 -> VarSet -- Loop-breaker dependencies 1504 -> Details 1505 -> NodeScore 1506nodeScore env new_bndr lb_deps 1507 (ND { nd_bndr = old_bndr, nd_rhs = bind_rhs }) 1508 1509 | not (isId old_bndr) -- A type or coercion variable is never a loop breaker 1510 = (100, 0, False) 1511 1512 | old_bndr `elemVarSet` lb_deps -- Self-recursive things are great loop breakers 1513 = (0, 0, True) -- See Note [Self-recursion and loop breakers] 1514 1515 | not (occ_unf_act env old_bndr) -- A binder whose inlining is inactive (e.g. has 1516 = (0, 0, True) -- a NOINLINE pragma) makes a great loop breaker 1517 1518 | exprIsTrivial rhs 1519 = mk_score 10 -- Practically certain to be inlined 1520 -- Used to have also: && not (isExportedId bndr) 1521 -- But I found this sometimes cost an extra iteration when we have 1522 -- rec { d = (a,b); a = ...df...; b = ...df...; df = d } 1523 -- where df is the exported dictionary. Then df makes a really 1524 -- bad choice for loop breaker 1525 1526 | DFunUnfolding { df_args = args } <- old_unf 1527 -- Never choose a DFun as a loop breaker 1528 -- Note [DFuns should not be loop breakers] 1529 = (9, length args, is_lb) 1530 1531 -- Data structures are more important than INLINE pragmas 1532 -- so that dictionary/method recursion unravels 1533 1534 | CoreUnfolding { uf_guidance = UnfWhen {} } <- old_unf 1535 = mk_score 6 1536 1537 | is_con_app rhs -- Data types help with cases: 1538 = mk_score 5 -- Note [Constructor applications] 1539 1540 | isStableUnfolding old_unf 1541 , can_unfold 1542 = mk_score 3 1543 1544 | isOneOcc (idOccInfo new_bndr) 1545 = mk_score 2 -- Likely to be inlined 1546 1547 | can_unfold -- The Id has some kind of unfolding 1548 = mk_score 1 1549 1550 | otherwise 1551 = (0, 0, is_lb) 1552 1553 where 1554 mk_score :: Int -> NodeScore 1555 mk_score rank = (rank, rhs_size, is_lb) 1556 1557 -- is_lb: see Note [Loop breakers, node scoring, and stability] 1558 is_lb = isStrongLoopBreaker (idOccInfo old_bndr) 1559 1560 old_unf = realIdUnfolding old_bndr 1561 can_unfold = canUnfold old_unf 1562 rhs = case old_unf of 1563 CoreUnfolding { uf_src = src, uf_tmpl = unf_rhs } 1564 | isStableSource src 1565 -> unf_rhs 1566 _ -> bind_rhs 1567 -- 'bind_rhs' is irrelevant for inlining things with a stable unfolding 1568 rhs_size = case old_unf of 1569 CoreUnfolding { uf_guidance = guidance } 1570 | UnfIfGoodArgs { ug_size = size } <- guidance 1571 -> size 1572 _ -> cheapExprSize rhs 1573 1574 1575 -- Checking for a constructor application 1576 -- Cheap and cheerful; the simplifier moves casts out of the way 1577 -- The lambda case is important to spot x = /\a. C (f a) 1578 -- which comes up when C is a dictionary constructor and 1579 -- f is a default method. 1580 -- Example: the instance for Show (ST s a) in GHC.ST 1581 -- 1582 -- However we *also* treat (\x. C p q) as a con-app-like thing, 1583 -- Note [Closure conversion] 1584 is_con_app (Var v) = isConLikeId v 1585 is_con_app (App f _) = is_con_app f 1586 is_con_app (Lam _ e) = is_con_app e 1587 is_con_app (Tick _ e) = is_con_app e 1588 is_con_app _ = False 1589 1590maxExprSize :: Int 1591maxExprSize = 20 -- Rather arbitrary 1592 1593cheapExprSize :: CoreExpr -> Int 1594-- Maxes out at maxExprSize 1595cheapExprSize e 1596 = go 0 e 1597 where 1598 go n e | n >= maxExprSize = n 1599 | otherwise = go1 n e 1600 1601 go1 n (Var {}) = n+1 1602 go1 n (Lit {}) = n+1 1603 go1 n (Type {}) = n 1604 go1 n (Coercion {}) = n 1605 go1 n (Tick _ e) = go1 n e 1606 go1 n (Cast e _) = go1 n e 1607 go1 n (App f a) = go (go1 n f) a 1608 go1 n (Lam b e) 1609 | isTyVar b = go1 n e 1610 | otherwise = go (n+1) e 1611 go1 n (Let b e) = gos (go1 n e) (rhssOfBind b) 1612 go1 n (Case e _ _ as) = gos (go1 n e) (rhssOfAlts as) 1613 1614 gos n [] = n 1615 gos n (e:es) | n >= maxExprSize = n 1616 | otherwise = gos (go1 n e) es 1617 1618betterLB :: NodeScore -> NodeScore -> Bool 1619-- If n1 `betterLB` n2 then choose n1 as the loop breaker 1620betterLB (rank1, size1, lb1) (rank2, size2, _) 1621 | rank1 < rank2 = True 1622 | rank1 > rank2 = False 1623 | size1 < size2 = False -- Make the bigger n2 into the loop breaker 1624 | size1 > size2 = True 1625 | lb1 = True -- Tie-break: if n1 was a loop breaker before, choose it 1626 | otherwise = False -- See Note [Loop breakers, node scoring, and stability] 1627 1628{- Note [Self-recursion and loop breakers] 1629~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1630If we have 1631 rec { f = ...f...g... 1632 ; g = .....f... } 1633then 'f' has to be a loop breaker anyway, so we may as well choose it 1634right away, so that g can inline freely. 1635 1636This is really just a cheap hack. Consider 1637 rec { f = ...g... 1638 ; g = ..f..h... 1639 ; h = ...f....} 1640Here f or g are better loop breakers than h; but we might accidentally 1641choose h. Finding the minimal set of loop breakers is hard. 1642 1643Note [Loop breakers, node scoring, and stability] 1644~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1645To choose a loop breaker, we give a NodeScore to each node in the SCC, 1646and pick the one with the best score (according to 'betterLB'). 1647 1648We need to be jolly careful (#12425, #12234) about the stability 1649of this choice. Suppose we have 1650 1651 let rec { f = ...g...g... 1652 ; g = ...f...f... } 1653 in 1654 case x of 1655 True -> ...f.. 1656 False -> ..f... 1657 1658In each iteration of the simplifier the occurrence analyser OccAnal 1659chooses a loop breaker. Suppose in iteration 1 it choose g as the loop 1660breaker. That means it is free to inline f. 1661 1662Suppose that GHC decides to inline f in the branches of the case, but 1663(for some reason; eg it is not saturated) in the rhs of g. So we get 1664 1665 let rec { f = ...g...g... 1666 ; g = ...f...f... } 1667 in 1668 case x of 1669 True -> ...g...g..... 1670 False -> ..g..g.... 1671 1672Now suppose that, for some reason, in the next iteration the occurrence 1673analyser chooses f as the loop breaker, so it can freely inline g. And 1674again for some reason the simplifier inlines g at its calls in the case 1675branches, but not in the RHS of f. Then we get 1676 1677 let rec { f = ...g...g... 1678 ; g = ...f...f... } 1679 in 1680 case x of 1681 True -> ...(...f...f...)...(...f..f..)..... 1682 False -> ..(...f...f...)...(..f..f...).... 1683 1684You can see where this is going! Each iteration of the simplifier 1685doubles the number of calls to f or g. No wonder GHC is slow! 1686 1687(In the particular example in comment:3 of #12425, f and g are the two 1688mutually recursive fmap instances for CondT and Result. They are both 1689marked INLINE which, oddly, is why they don't inline in each other's 1690RHS, because the call there is not saturated.) 1691 1692The root cause is that we flip-flop on our choice of loop breaker. I 1693always thought it didn't matter, and indeed for any single iteration 1694to terminate, it doesn't matter. But when we iterate, it matters a 1695lot!! 1696 1697So The Plan is this: 1698 If there is a tie, choose the node that 1699 was a loop breaker last time round 1700 1701Hence the is_lb field of NodeScore 1702 1703************************************************************************ 1704* * 1705 Right hand sides 1706* * 1707************************************************************************ 1708-} 1709 1710occAnalRhs :: OccEnv -> RecFlag -> Maybe JoinArity 1711 -> CoreExpr -- RHS 1712 -> (UsageDetails, CoreExpr) 1713occAnalRhs env is_rec mb_join_arity rhs 1714 = case occAnalLamOrRhs env bndrs body of { (body_usage, bndrs', body') -> 1715 let final_bndrs | isRec is_rec = bndrs' 1716 | otherwise = markJoinOneShots mb_join_arity bndrs' 1717 -- For a /non-recursive/ join point we can mark all 1718 -- its join-lambda as one-shot; and it's a good idea to do so 1719 1720 -- Final adjustment 1721 rhs_usage = adjustRhsUsage is_rec mb_join_arity final_bndrs body_usage 1722 1723 in (rhs_usage, mkLams final_bndrs body') } 1724 where 1725 (bndrs, body) = collectBinders rhs 1726 1727occAnalUnfolding :: OccEnv 1728 -> RecFlag 1729 -> Maybe JoinArity -- See Note [Join points and unfoldings/rules] 1730 -> Unfolding 1731 -> (UsageDetails, Unfolding) 1732-- Occurrence-analyse a stable unfolding; 1733-- discard a non-stable one altogether. 1734occAnalUnfolding env is_rec mb_join_arity unf 1735 = case unf of 1736 unf@(CoreUnfolding { uf_tmpl = rhs, uf_src = src }) 1737 | isStableSource src -> (markAllMany usage, unf') 1738 -- markAllMany: see Note [Occurrences in stable unfoldings] 1739 | otherwise -> (emptyDetails, unf) 1740 -- For non-Stable unfoldings we leave them undisturbed, but 1741 -- don't count their usage because the simplifier will discard them. 1742 -- We leave them undisturbed because nodeScore uses their size info 1743 -- to guide its decisions. It's ok to leave un-substituted 1744 -- expressions in the tree because all the variables that were in 1745 -- scope remain in scope; there is no cloning etc. 1746 where 1747 (usage, rhs') = occAnalRhs env is_rec mb_join_arity rhs 1748 1749 unf' | noBinderSwaps env = unf -- Note [Unfoldings and rules] 1750 | otherwise = unf { uf_tmpl = rhs' } 1751 1752 unf@(DFunUnfolding { df_bndrs = bndrs, df_args = args }) 1753 -> ( final_usage, unf { df_args = args' } ) 1754 where 1755 env' = env `addInScope` bndrs 1756 (usage, args') = occAnalList env' args 1757 final_usage = markAllManyNonTail (delDetailsList usage bndrs) 1758 1759 unf -> (emptyDetails, unf) 1760 1761occAnalRules :: OccEnv 1762 -> Maybe JoinArity -- See Note [Join points and unfoldings/rules] 1763 -> Id -- Get rules from here 1764 -> [(CoreRule, -- Each (non-built-in) rule 1765 UsageDetails, -- Usage details for LHS 1766 UsageDetails)] -- Usage details for RHS 1767occAnalRules env mb_join_arity bndr 1768 = map occ_anal_rule (idCoreRules bndr) 1769 where 1770 occ_anal_rule rule@(Rule { ru_bndrs = bndrs, ru_args = args, ru_rhs = rhs }) 1771 = (rule', lhs_uds', rhs_uds') 1772 where 1773 env' = env `addInScope` bndrs 1774 rule' | noBinderSwaps env = rule -- Note [Unfoldings and rules] 1775 | otherwise = rule { ru_args = args', ru_rhs = rhs' } 1776 1777 (lhs_uds, args') = occAnalList env' args 1778 lhs_uds' = markAllManyNonTail $ 1779 lhs_uds `delDetailsList` bndrs 1780 1781 (rhs_uds, rhs') = occAnal env' rhs 1782 -- Note [Rules are extra RHSs] 1783 -- Note [Rule dependency info] 1784 rhs_uds' = markAllNonTailIf (not exact_join) $ 1785 markAllMany $ 1786 rhs_uds `delDetailsList` bndrs 1787 1788 exact_join = exactJoin mb_join_arity args 1789 -- See Note [Join points and unfoldings/rules] 1790 1791 occ_anal_rule other_rule = (other_rule, emptyDetails, emptyDetails) 1792 1793{- Note [Join point RHSs] 1794~~~~~~~~~~~~~~~~~~~~~~~~~ 1795Consider 1796 x = e 1797 join j = Just x 1798 1799We want to inline x into j right away, so we don't want to give 1800the join point a RhsCtxt (#14137). It's not a huge deal, because 1801the FloatIn pass knows to float into join point RHSs; and the simplifier 1802does not float things out of join point RHSs. But it's a simple, cheap 1803thing to do. See #14137. 1804 1805Note [Occurrences in stable unfoldings] 1806~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1807Consider 1808 f p = BIG 1809 {-# INLINE g #-} 1810 g y = not (f y) 1811where this is the /only/ occurrence of 'f'. So 'g' will get a stable 1812unfolding. Now suppose that g's RHS gets optimised (perhaps by a rule 1813or inlining f) so that it doesn't mention 'f' any more. Now the last 1814remaining call to f is in g's Stable unfolding. But, even though there 1815is only one syntactic occurrence of f, we do /not/ want to do 1816preinlineUnconditionally here! 1817 1818The INLINE pragma says "inline exactly this RHS"; perhaps the 1819programmer wants to expose that 'not', say. If we inline f that will make 1820the Stable unfoldign big, and that wasn't what the programmer wanted. 1821 1822Another way to think about it: if we inlined g as-is into multiple 1823call sites, now there's be multiple calls to f. 1824 1825Bottom line: treat all occurrences in a stable unfolding as "Many". 1826 1827Note [Unfoldings and rules] 1828~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1829Generally unfoldings and rules are already occurrence-analysed, so we 1830don't want to reconstruct their trees; we just want to analyse them to 1831find how they use their free variables. 1832 1833EXCEPT if there is a binder-swap going on, in which case we do want to 1834produce a new tree. 1835 1836So we have a fast-path that keeps the old tree if the occ_bs_env is 1837empty. This just saves a bit of allocation and reconstruction; not 1838a big deal. 1839 1840Note [Cascading inlines] 1841~~~~~~~~~~~~~~~~~~~~~~~~ 1842By default we use an rhsCtxt for the RHS of a binding. This tells the 1843occ anal n that it's looking at an RHS, which has an effect in 1844occAnalApp. In particular, for constructor applications, it makes 1845the arguments appear to have NoOccInfo, so that we don't inline into 1846them. Thus x = f y 1847 k = Just x 1848we do not want to inline x. 1849 1850But there's a problem. Consider 1851 x1 = a0 : [] 1852 x2 = a1 : x1 1853 x3 = a2 : x2 1854 g = f x3 1855First time round, it looks as if x1 and x2 occur as an arg of a 1856let-bound constructor ==> give them a many-occurrence. 1857But then x3 is inlined (unconditionally as it happens) and 1858next time round, x2 will be, and the next time round x1 will be 1859Result: multiple simplifier iterations. Sigh. 1860 1861So, when analysing the RHS of x3 we notice that x3 will itself 1862definitely inline the next time round, and so we analyse x3's rhs in 1863an ordinary context, not rhsCtxt. Hence the "certainly_inline" stuff. 1864 1865Annoyingly, we have to approximate GHC.Core.Opt.Simplify.Utils.preInlineUnconditionally. 1866If (a) the RHS is expandable (see isExpandableApp in occAnalApp), and 1867 (b) certainly_inline says "yes" when preInlineUnconditionally says "no" 1868then the simplifier iterates indefinitely: 1869 x = f y 1870 k = Just x -- We decide that k is 'certainly_inline' 1871 v = ...k... -- but preInlineUnconditionally doesn't inline it 1872inline ==> 1873 k = Just (f y) 1874 v = ...k... 1875float ==> 1876 x1 = f y 1877 k = Just x1 1878 v = ...k... 1879 1880This is worse than the slow cascade, so we only want to say "certainly_inline" 1881if it really is certain. Look at the note with preInlineUnconditionally 1882for the various clauses. 1883 1884 1885************************************************************************ 1886* * 1887 Expressions 1888* * 1889************************************************************************ 1890-} 1891 1892occAnalList :: OccEnv -> [CoreExpr] -> (UsageDetails, [CoreExpr]) 1893occAnalList _ [] = (emptyDetails, []) 1894occAnalList env (e:es) = case occAnal env e of { (uds1, e') -> 1895 case occAnalList env es of { (uds2, es') -> 1896 (uds1 `andUDs` uds2, e' : es') } } 1897 1898occAnal :: OccEnv 1899 -> CoreExpr 1900 -> (UsageDetails, -- Gives info only about the "interesting" Ids 1901 CoreExpr) 1902 1903occAnal _ expr@(Type _) = (emptyDetails, expr) 1904occAnal _ expr@(Lit _) = (emptyDetails, expr) 1905occAnal env expr@(Var _) = occAnalApp env (expr, [], []) 1906 -- At one stage, I gathered the idRuleVars for the variable here too, 1907 -- which in a way is the right thing to do. 1908 -- But that went wrong right after specialisation, when 1909 -- the *occurrences* of the overloaded function didn't have any 1910 -- rules in them, so the *specialised* versions looked as if they 1911 -- weren't used at all. 1912 1913occAnal _ (Coercion co) 1914 = (addManyOccs emptyDetails (coVarsOfCo co), Coercion co) 1915 -- See Note [Gather occurrences of coercion variables] 1916 1917{- 1918Note [Gather occurrences of coercion variables] 1919~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1920We need to gather info about what coercion variables appear, so that 1921we can sort them into the right place when doing dependency analysis. 1922-} 1923 1924occAnal env (Tick tickish body) 1925 | SourceNote{} <- tickish 1926 = (usage, Tick tickish body') 1927 -- SourceNotes are best-effort; so we just proceed as usual. 1928 -- If we drop a tick due to the issues described below it's 1929 -- not the end of the world. 1930 1931 | tickish `tickishScopesLike` SoftScope 1932 = (markAllNonTail usage, Tick tickish body') 1933 1934 | Breakpoint _ _ ids <- tickish 1935 = (usage_lam `andUDs` foldr addManyOcc emptyDetails ids, Tick tickish body') 1936 -- never substitute for any of the Ids in a Breakpoint 1937 1938 | otherwise 1939 = (usage_lam, Tick tickish body') 1940 where 1941 !(usage,body') = occAnal env body 1942 -- for a non-soft tick scope, we can inline lambdas only 1943 usage_lam = markAllNonTail (markAllInsideLam usage) 1944 -- TODO There may be ways to make ticks and join points play 1945 -- nicer together, but right now there are problems: 1946 -- let j x = ... in tick<t> (j 1) 1947 -- Making j a join point may cause the simplifier to drop t 1948 -- (if the tick is put into the continuation). So we don't 1949 -- count j 1 as a tail call. 1950 -- See #14242. 1951 1952occAnal env (Cast expr co) 1953 = case occAnal env expr of { (usage, expr') -> 1954 let usage1 = markAllManyNonTailIf (isRhsEnv env) usage 1955 -- usage1: if we see let x = y `cast` co 1956 -- then mark y as 'Many' so that we don't 1957 -- immediately inline y again. 1958 usage2 = addManyOccs usage1 (coVarsOfCo co) 1959 -- usage2: see Note [Gather occurrences of coercion variables] 1960 in (markAllNonTail usage2, Cast expr' co) 1961 } 1962 1963occAnal env app@(App _ _) 1964 = occAnalApp env (collectArgsTicks tickishFloatable app) 1965 1966-- Ignore type variables altogether 1967-- (a) occurrences inside type lambdas only not marked as InsideLam 1968-- (b) type variables not in environment 1969 1970occAnal env (Lam x body) 1971 | isTyVar x 1972 = case occAnal env body of { (body_usage, body') -> 1973 (markAllNonTail body_usage, Lam x body') 1974 } 1975 1976{- Note [Occurrence analysis for lambda binders] 1977~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1978For value lambdas we do a special hack. Consider 1979 (\x. \y. ...x...) 1980If we did nothing, x is used inside the \y, so would be marked 1981as dangerous to dup. But in the common case where the abstraction 1982is applied to two arguments this is over-pessimistic, which delays 1983inlining x, which forces more simplifier iterations. 1984 1985So instead, we just mark each binder with its occurrence info in the 1986*body* of the multiple lambda. Then, the simplifier is careful when 1987partially applying lambdas. See the calls to zapLamBndrs in 1988 GHC.Core.Opt.Simplify.simplExprF1 1989 GHC.Core.SimpleOpt.simple_app 1990-} 1991 1992occAnal env expr@(Lam _ _) 1993 = -- See Note [Occurrence analysis for lambda binders] 1994 case occAnalLamOrRhs env bndrs body of { (usage, tagged_bndrs, body') -> 1995 let 1996 expr' = mkLams tagged_bndrs body' 1997 usage1 = markAllNonTail usage 1998 one_shot_gp = all isOneShotBndr tagged_bndrs 1999 final_usage = markAllInsideLamIf (not one_shot_gp) usage1 2000 in 2001 (final_usage, expr') } 2002 where 2003 (bndrs, body) = collectBinders expr 2004 2005occAnal env (Case scrut bndr ty alts) 2006 = case occAnal (scrutCtxt env alts) scrut of { (scrut_usage, scrut') -> 2007 let alt_env = addBndrSwap scrut' bndr $ 2008 env { occ_encl = OccVanilla } `addInScope` [bndr] 2009 in 2010 case mapAndUnzip (occAnalAlt alt_env) alts of { (alts_usage_s, alts') -> 2011 let 2012 alts_usage = foldr orUDs emptyDetails alts_usage_s 2013 (alts_usage1, tagged_bndr) = tagLamBinder alts_usage bndr 2014 total_usage = markAllNonTail scrut_usage `andUDs` alts_usage1 2015 -- Alts can have tail calls, but the scrutinee can't 2016 in 2017 total_usage `seq` (total_usage, Case scrut' tagged_bndr ty alts') }} 2018 2019occAnal env (Let bind body) 2020 = case occAnal (env `addInScope` bindersOf bind) 2021 body of { (body_usage, body') -> 2022 case occAnalBind env NotTopLevel 2023 noImpRuleEdges bind 2024 body_usage of { (final_usage, new_binds) -> 2025 (final_usage, mkLets new_binds body') }} 2026 2027occAnalArgs :: OccEnv -> [CoreExpr] -> [OneShots] -> (UsageDetails, [CoreExpr]) 2028occAnalArgs _ [] _ 2029 = (emptyDetails, []) 2030 2031occAnalArgs env (arg:args) one_shots 2032 | isTypeArg arg 2033 = case occAnalArgs env args one_shots of { (uds, args') -> 2034 (uds, arg:args') } 2035 2036 | otherwise 2037 = case argCtxt env one_shots of { (arg_env, one_shots') -> 2038 case occAnal arg_env arg of { (uds1, arg') -> 2039 case occAnalArgs env args one_shots' of { (uds2, args') -> 2040 (uds1 `andUDs` uds2, arg':args') }}} 2041 2042{- 2043Applications are dealt with specially because we want 2044the "build hack" to work. 2045 2046Note [Arguments of let-bound constructors] 2047~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 2048Consider 2049 f x = let y = expensive x in 2050 let z = (True,y) in 2051 (case z of {(p,q)->q}, case z of {(p,q)->q}) 2052We feel free to duplicate the WHNF (True,y), but that means 2053that y may be duplicated thereby. 2054 2055If we aren't careful we duplicate the (expensive x) call! 2056Constructors are rather like lambdas in this way. 2057-} 2058 2059occAnalApp :: OccEnv 2060 -> (Expr CoreBndr, [Arg CoreBndr], [CoreTickish]) 2061 -> (UsageDetails, Expr CoreBndr) 2062-- Naked variables (not applied) end up here too 2063occAnalApp env (Var fun, args, ticks) 2064 -- Account for join arity of runRW# continuation 2065 -- See Note [Simplification of runRW#] 2066 -- 2067 -- NB: Do not be tempted to make the next (Var fun, args, tick) 2068 -- equation into an 'otherwise' clause for this equation 2069 -- The former has a bang-pattern to occ-anal the args, and 2070 -- we don't want to occ-anal them twice in the runRW# case! 2071 -- This caused #18296 2072 | fun `hasKey` runRWKey 2073 , [t1, t2, arg] <- args 2074 , let (usage, arg') = occAnalRhs env NonRecursive (Just 1) arg 2075 = (usage, mkTicks ticks $ mkApps (Var fun) [t1, t2, arg']) 2076 2077occAnalApp env (Var fun_id, args, ticks) 2078 = (all_uds, mkTicks ticks $ mkApps fun' args') 2079 where 2080 (fun', fun_id') = lookupBndrSwap env fun_id 2081 2082 fun_uds = mkOneOcc fun_id' int_cxt n_args 2083 -- NB: fun_uds is computed for fun_id', not fun_id 2084 -- See (BS1) in Note [The binder-swap substitution] 2085 2086 all_uds = fun_uds `andUDs` final_args_uds 2087 2088 !(args_uds, args') = occAnalArgs env args one_shots 2089 !final_args_uds = markAllNonTail $ 2090 markAllInsideLamIf (isRhsEnv env && is_exp) $ 2091 args_uds 2092 -- We mark the free vars of the argument of a constructor or PAP 2093 -- as "inside-lambda", if it is the RHS of a let(rec). 2094 -- This means that nothing gets inlined into a constructor or PAP 2095 -- argument position, which is what we want. Typically those 2096 -- constructor arguments are just variables, or trivial expressions. 2097 -- We use inside-lam because it's like eta-expanding the PAP. 2098 -- 2099 -- This is the *whole point* of the isRhsEnv predicate 2100 -- See Note [Arguments of let-bound constructors] 2101 2102 n_val_args = valArgCount args 2103 n_args = length args 2104 int_cxt = case occ_encl env of 2105 OccScrut -> IsInteresting 2106 _other | n_val_args > 0 -> IsInteresting 2107 | otherwise -> NotInteresting 2108 2109 is_exp = isExpandableApp fun_id n_val_args 2110 -- See Note [CONLIKE pragma] in GHC.Types.Basic 2111 -- The definition of is_exp should match that in GHC.Core.Opt.Simplify.prepareRhs 2112 2113 one_shots = argsOneShots (idStrictness fun_id) guaranteed_val_args 2114 guaranteed_val_args = n_val_args + length (takeWhile isOneShotInfo 2115 (occ_one_shots env)) 2116 -- See Note [Sources of one-shot information], bullet point A'] 2117 2118occAnalApp env (fun, args, ticks) 2119 = (markAllNonTail (fun_uds `andUDs` args_uds), 2120 mkTicks ticks $ mkApps fun' args') 2121 where 2122 !(fun_uds, fun') = occAnal (addAppCtxt env args) fun 2123 -- The addAppCtxt is a bit cunning. One iteration of the simplifier 2124 -- often leaves behind beta redexs like 2125 -- (\x y -> e) a1 a2 2126 -- Here we would like to mark x,y as one-shot, and treat the whole 2127 -- thing much like a let. We do this by pushing some True items 2128 -- onto the context stack. 2129 !(args_uds, args') = occAnalArgs env args [] 2130 2131 2132{- 2133Note [Sources of one-shot information] 2134~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 2135The occurrence analyser obtains one-shot-lambda information from two sources: 2136 2137A: Saturated applications: eg f e1 .. en 2138 2139 In general, given a call (f e1 .. en) we can propagate one-shot info from 2140 f's strictness signature into e1 .. en, but /only/ if n is enough to 2141 saturate the strictness signature. A strictness signature like 2142 2143 f :: C1(C1(L))LS 2144 2145 means that *if f is applied to three arguments* then it will guarantee to 2146 call its first argument at most once, and to call the result of that at 2147 most once. But if f has fewer than three arguments, all bets are off; e.g. 2148 2149 map (f (\x y. expensive) e2) xs 2150 2151 Here the \x y abstraction may be called many times (once for each element of 2152 xs) so we should not mark x and y as one-shot. But if it was 2153 2154 map (f (\x y. expensive) 3 2) xs 2155 2156 then the first argument of f will be called at most once. 2157 2158 The one-shot info, derived from f's strictness signature, is 2159 computed by 'argsOneShots', called in occAnalApp. 2160 2161A': Non-obviously saturated applications: eg build (f (\x y -> expensive)) 2162 where f is as above. 2163 2164 In this case, f is only manifestly applied to one argument, so it does not 2165 look saturated. So by the previous point, we should not use its strictness 2166 signature to learn about the one-shotness of \x y. But in this case we can: 2167 build is fully applied, so we may use its strictness signature; and from 2168 that we learn that build calls its argument with two arguments *at most once*. 2169 2170 So there is really only one call to f, and it will have three arguments. In 2171 that sense, f is saturated, and we may proceed as described above. 2172 2173 Hence the computation of 'guaranteed_val_args' in occAnalApp, using 2174 '(occ_one_shots env)'. See also #13227, comment:9 2175 2176B: Let-bindings: eg let f = \c. let ... in \n -> blah 2177 in (build f, build f) 2178 2179 Propagate one-shot info from the demanand-info on 'f' to the 2180 lambdas in its RHS (which may not be syntactically at the top) 2181 2182 This information must have come from a previous run of the demanand 2183 analyser. 2184 2185Previously, the demand analyser would *also* set the one-shot information, but 2186that code was buggy (see #11770), so doing it only in on place, namely here, is 2187saner. 2188 2189Note [OneShots] 2190~~~~~~~~~~~~~~~ 2191When analysing an expression, the occ_one_shots argument contains information 2192about how the function is being used. The length of the list indicates 2193how many arguments will eventually be passed to the analysed expression, 2194and the OneShotInfo indicates whether this application is once or multiple times. 2195 2196Example: 2197 2198 Context of f occ_one_shots when analysing f 2199 2200 f 1 2 [OneShot, OneShot] 2201 map (f 1) [OneShot, NoOneShotInfo] 2202 build f [OneShot, OneShot] 2203 f 1 2 `seq` f 2 1 [NoOneShotInfo, OneShot] 2204 2205Note [Binders in case alternatives] 2206~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 2207Consider 2208 case x of y { (a,b) -> f y } 2209We treat 'a', 'b' as dead, because they don't physically occur in the 2210case alternative. (Indeed, a variable is dead iff it doesn't occur in 2211its scope in the output of OccAnal.) It really helps to know when 2212binders are unused. See esp the call to isDeadBinder in 2213Simplify.mkDupableAlt 2214 2215In this example, though, the Simplifier will bring 'a' and 'b' back to 2216life, because it binds 'y' to (a,b) (imagine got inlined and 2217scrutinised y). 2218-} 2219 2220occAnalLamOrRhs :: OccEnv -> [CoreBndr] -> CoreExpr 2221 -> (UsageDetails, [CoreBndr], CoreExpr) 2222-- Tags the returned binders with their OccInfo, but does 2223-- not do any markInsideLam to the returned usage details 2224occAnalLamOrRhs env [] body 2225 = case occAnal env body of (body_usage, body') -> (body_usage, [], body') 2226 -- RHS of thunk or nullary join point 2227 2228occAnalLamOrRhs env (bndr:bndrs) body 2229 | isTyVar bndr 2230 = -- Important: Keep the environment so that we don't inline into an RHS like 2231 -- \(@ x) -> C @x (f @x) 2232 -- (see the beginning of Note [Cascading inlines]). 2233 case occAnalLamOrRhs env bndrs body of 2234 (body_usage, bndrs', body') -> (body_usage, bndr:bndrs', body') 2235 2236occAnalLamOrRhs env binders body 2237 = case occAnal env_body body of { (body_usage, body') -> 2238 let 2239 (final_usage, tagged_binders) = tagLamBinders body_usage binders' 2240 -- Use binders' to put one-shot info on the lambdas 2241 in 2242 (final_usage, tagged_binders, body') } 2243 where 2244 env1 = env `addInScope` binders 2245 (env_body, binders') = oneShotGroup env1 binders 2246 2247occAnalAlt :: OccEnv 2248 -> CoreAlt -> (UsageDetails, Alt IdWithOccInfo) 2249occAnalAlt env (Alt con bndrs rhs) 2250 = case occAnal (env `addInScope` bndrs) rhs of { (rhs_usage1, rhs1) -> 2251 let 2252 (alt_usg, tagged_bndrs) = tagLamBinders rhs_usage1 bndrs 2253 in -- See Note [Binders in case alternatives] 2254 (alt_usg, Alt con tagged_bndrs rhs1) } 2255 2256{- 2257************************************************************************ 2258* * 2259 OccEnv 2260* * 2261************************************************************************ 2262-} 2263 2264data OccEnv 2265 = OccEnv { occ_encl :: !OccEncl -- Enclosing context information 2266 , occ_one_shots :: !OneShots -- See Note [OneShots] 2267 , occ_unf_act :: Id -> Bool -- Which Id unfoldings are active 2268 , occ_rule_act :: Activation -> Bool -- Which rules are active 2269 -- See Note [Finding rule RHS free vars] 2270 2271 -- See Note [The binder-swap substitution] 2272 -- If x :-> (y, co) is in the env, 2273 -- then please replace x by (y |> sym mco) 2274 -- Invariant of course: idType x = exprType (y |> sym mco) 2275 , occ_bs_env :: VarEnv (OutId, MCoercion) 2276 , occ_bs_rng :: VarSet -- Vars free in the range of occ_bs_env 2277 -- Domain is Global and Local Ids 2278 -- Range is just Local Ids 2279 } 2280 2281 2282----------------------------- 2283-- OccEncl is used to control whether to inline into constructor arguments 2284-- For example: 2285-- x = (p,q) -- Don't inline p or q 2286-- y = /\a -> (p a, q a) -- Still don't inline p or q 2287-- z = f (p,q) -- Do inline p,q; it may make a rule fire 2288-- So OccEncl tells enough about the context to know what to do when 2289-- we encounter a constructor application or PAP. 2290-- 2291-- OccScrut is used to set the "interesting context" field of OncOcc 2292 2293data OccEncl 2294 = OccRhs -- RHS of let(rec), albeit perhaps inside a type lambda 2295 -- Don't inline into constructor args here 2296 2297 | OccScrut -- Scrutintee of a case 2298 -- Can inline into constructor args 2299 2300 | OccVanilla -- Argument of function, body of lambda, etc 2301 -- Do inline into constructor args here 2302 2303instance Outputable OccEncl where 2304 ppr OccRhs = text "occRhs" 2305 ppr OccScrut = text "occScrut" 2306 ppr OccVanilla = text "occVanilla" 2307 2308-- See note [OneShots] 2309type OneShots = [OneShotInfo] 2310 2311initOccEnv :: OccEnv 2312initOccEnv 2313 = OccEnv { occ_encl = OccVanilla 2314 , occ_one_shots = [] 2315 2316 -- To be conservative, we say that all 2317 -- inlines and rules are active 2318 , occ_unf_act = \_ -> True 2319 , occ_rule_act = \_ -> True 2320 2321 , occ_bs_env = emptyVarEnv 2322 , occ_bs_rng = emptyVarSet } 2323 2324noBinderSwaps :: OccEnv -> Bool 2325noBinderSwaps (OccEnv { occ_bs_env = bs_env }) = isEmptyVarEnv bs_env 2326 2327scrutCtxt :: OccEnv -> [CoreAlt] -> OccEnv 2328scrutCtxt env alts 2329 | interesting_alts = env { occ_encl = OccScrut, occ_one_shots = [] } 2330 | otherwise = env { occ_encl = OccVanilla, occ_one_shots = [] } 2331 where 2332 interesting_alts = case alts of 2333 [] -> False 2334 [alt] -> not (isDefaultAlt alt) 2335 _ -> True 2336 -- 'interesting_alts' is True if the case has at least one 2337 -- non-default alternative. That in turn influences 2338 -- pre/postInlineUnconditionally. Grep for "occ_int_cxt"! 2339 2340rhsCtxt :: OccEnv -> OccEnv 2341rhsCtxt env = env { occ_encl = OccRhs, occ_one_shots = [] } 2342 2343argCtxt :: OccEnv -> [OneShots] -> (OccEnv, [OneShots]) 2344argCtxt env [] 2345 = (env { occ_encl = OccVanilla, occ_one_shots = [] }, []) 2346argCtxt env (one_shots:one_shots_s) 2347 = (env { occ_encl = OccVanilla, occ_one_shots = one_shots }, one_shots_s) 2348 2349isRhsEnv :: OccEnv -> Bool 2350isRhsEnv (OccEnv { occ_encl = cxt }) = case cxt of 2351 OccRhs -> True 2352 _ -> False 2353 2354addInScope :: OccEnv -> [Var] -> OccEnv 2355-- See Note [The binder-swap substitution] 2356addInScope env@(OccEnv { occ_bs_env = swap_env, occ_bs_rng = rng_vars }) bndrs 2357 | any (`elemVarSet` rng_vars) bndrs = env { occ_bs_env = emptyVarEnv, occ_bs_rng = emptyVarSet } 2358 | otherwise = env { occ_bs_env = swap_env `delVarEnvList` bndrs } 2359 2360oneShotGroup :: OccEnv -> [CoreBndr] 2361 -> ( OccEnv 2362 , [CoreBndr] ) 2363 -- The result binders have one-shot-ness set that they might not have had originally. 2364 -- This happens in (build (\c n -> e)). Here the occurrence analyser 2365 -- linearity context knows that c,n are one-shot, and it records that fact in 2366 -- the binder. This is useful to guide subsequent float-in/float-out transformations 2367 2368oneShotGroup env@(OccEnv { occ_one_shots = ctxt }) bndrs 2369 = go ctxt bndrs [] 2370 where 2371 go ctxt [] rev_bndrs 2372 = ( env { occ_one_shots = ctxt, occ_encl = OccVanilla } 2373 , reverse rev_bndrs ) 2374 2375 go [] bndrs rev_bndrs 2376 = ( env { occ_one_shots = [], occ_encl = OccVanilla } 2377 , reverse rev_bndrs ++ bndrs ) 2378 2379 go ctxt@(one_shot : ctxt') (bndr : bndrs) rev_bndrs 2380 | isId bndr = go ctxt' bndrs (bndr': rev_bndrs) 2381 | otherwise = go ctxt bndrs (bndr : rev_bndrs) 2382 where 2383 bndr' = updOneShotInfo bndr one_shot 2384 -- Use updOneShotInfo, not setOneShotInfo, as pre-existing 2385 -- one-shot info might be better than what we can infer, e.g. 2386 -- due to explicit use of the magic 'oneShot' function. 2387 -- See Note [The oneShot function] 2388 2389 2390markJoinOneShots :: Maybe JoinArity -> [Var] -> [Var] 2391-- Mark the lambdas of a non-recursive join point as one-shot. 2392-- This is good to prevent gratuitous float-out etc 2393markJoinOneShots mb_join_arity bndrs 2394 = case mb_join_arity of 2395 Nothing -> bndrs 2396 Just n -> go n bndrs 2397 where 2398 go 0 bndrs = bndrs 2399 go _ [] = [] -- This can legitimately happen. 2400 -- e.g. let j = case ... in j True 2401 -- This will become an arity-1 join point after the 2402 -- simplifier has eta-expanded it; but it may not have 2403 -- enough lambdas /yet/. (Lint checks that JoinIds do 2404 -- have enough lambdas.) 2405 go n (b:bs) = b' : go (n-1) bs 2406 where 2407 b' | isId b = setOneShotLambda b 2408 | otherwise = b 2409 2410addAppCtxt :: OccEnv -> [Arg CoreBndr] -> OccEnv 2411addAppCtxt env@(OccEnv { occ_one_shots = ctxt }) args 2412 = env { occ_one_shots = replicate (valArgCount args) OneShotLam ++ ctxt } 2413 2414-------------------- 2415transClosureFV :: VarEnv VarSet -> VarEnv VarSet 2416-- If (f,g), (g,h) are in the input, then (f,h) is in the output 2417-- as well as (f,g), (g,h) 2418transClosureFV env 2419 | no_change = env 2420 | otherwise = transClosureFV (listToUFM_Directly new_fv_list) 2421 where 2422 (no_change, new_fv_list) = mapAccumL bump True (nonDetUFMToList env) 2423 -- It's OK to use nonDetUFMToList here because we'll forget the 2424 -- ordering by creating a new set with listToUFM 2425 bump no_change (b,fvs) 2426 | no_change_here = (no_change, (b,fvs)) 2427 | otherwise = (False, (b,new_fvs)) 2428 where 2429 (new_fvs, no_change_here) = extendFvs env fvs 2430 2431------------- 2432extendFvs_ :: VarEnv VarSet -> VarSet -> VarSet 2433extendFvs_ env s = fst (extendFvs env s) -- Discard the Bool flag 2434 2435extendFvs :: VarEnv VarSet -> VarSet -> (VarSet, Bool) 2436-- (extendFVs env s) returns 2437-- (s `union` env(s), env(s) `subset` s) 2438extendFvs env s 2439 | isNullUFM env 2440 = (s, True) 2441 | otherwise 2442 = (s `unionVarSet` extras, extras `subVarSet` s) 2443 where 2444 extras :: VarSet -- env(s) 2445 extras = nonDetStrictFoldUFM unionVarSet emptyVarSet $ 2446 -- It's OK to use nonDetStrictFoldUFM here because unionVarSet commutes 2447 intersectUFM_C (\x _ -> x) env (getUniqSet s) 2448 2449{- 2450************************************************************************ 2451* * 2452 Binder swap 2453* * 2454************************************************************************ 2455 2456Note [Binder swap] 2457~~~~~~~~~~~~~~~~~~ 2458The "binder swap" transformation swaps occurrence of the 2459scrutinee of a case for occurrences of the case-binder: 2460 2461 (1) case x of b { pi -> ri } 2462 ==> 2463 case x of b { pi -> ri[b/x] } 2464 2465 (2) case (x |> co) of b { pi -> ri } 2466 ==> 2467 case (x |> co) of b { pi -> ri[b |> sym co/x] } 2468 2469The substitution ri[b/x] etc is done by the occurrence analyser. 2470See Note [The binder-swap substitution]. 2471 2472There are two reasons for making this swap: 2473 2474(A) It reduces the number of occurrences of the scrutinee, x. 2475 That in turn might reduce its occurrences to one, so we 2476 can inline it and save an allocation. E.g. 2477 let x = factorial y in case x of b { I# v -> ...x... } 2478 If we replace 'x' by 'b' in the alternative we get 2479 let x = factorial y in case x of b { I# v -> ...b... } 2480 and now we can inline 'x', thus 2481 case (factorial y) of b { I# v -> ...b... } 2482 2483(B) The case-binder b has unfolding information; in the 2484 example above we know that b = I# v. That in turn allows 2485 nested cases to simplify. Consider 2486 case x of b { I# v -> 2487 ...(case x of b2 { I# v2 -> rhs })... 2488 If we replace 'x' by 'b' in the alternative we get 2489 case x of b { I# v -> 2490 ...(case b of b2 { I# v2 -> rhs })... 2491 and now it is trivial to simplify the inner case: 2492 case x of b { I# v -> 2493 ...(let b2 = b in rhs)... 2494 2495 The same can happen even if the scrutinee is a variable 2496 with a cast: see Note [Case of cast] 2497 2498The reason for doing these transformations /here in the occurrence 2499analyser/ is because it allows us to adjust the OccInfo for 'x' and 2500'b' as we go. 2501 2502 * Suppose the only occurrences of 'x' are the scrutinee and in the 2503 ri; then this transformation makes it occur just once, and hence 2504 get inlined right away. 2505 2506 * If instead the Simplifier replaces occurrences of x with 2507 occurrences of b, that will mess up b's occurrence info. That in 2508 turn might have consequences. 2509 2510There is a danger though. Consider 2511 let v = x +# y 2512 in case (f v) of w -> ...v...v... 2513And suppose that (f v) expands to just v. Then we'd like to 2514use 'w' instead of 'v' in the alternative. But it may be too 2515late; we may have substituted the (cheap) x+#y for v in the 2516same simplifier pass that reduced (f v) to v. 2517 2518I think this is just too bad. CSE will recover some of it. 2519 2520Note [The binder-swap substitution] 2521~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 2522The binder-swap is implemented by the occ_bs_env field of OccEnv. 2523There are two main pieces: 2524 2525* Given case x |> co of b { alts } 2526 we add [x :-> (b, co)] to the occ_bs_env environment; this is 2527 done by addBndrSwap. 2528 2529* Then, at an occurrence of a variable, we look up in the occ_bs_env 2530 to perform the swap. This is done by lookupBndrSwap. 2531 2532Some tricky corners: 2533 2534(BS1) We do the substitution before gathering occurrence info. So in 2535 the above example, an occurrence of x turns into an occurrence 2536 of b, and that's what we gather in the UsageDetails. It's as 2537 if the binder-swap occurred before occurrence analysis. See 2538 the computation of fun_uds in occAnalApp. 2539 2540(BS2) When doing a lookup in occ_bs_env, we may need to iterate, 2541 as you can see implemented in lookupBndrSwap. Why? 2542 Consider case x of a { 1# -> e1; DEFAULT -> 2543 case x of b { 2# -> e2; DEFAULT -> 2544 case x of c { 3# -> e3; DEFAULT -> ..x..a..b.. }}} 2545 At the first case addBndrSwap will extend occ_bs_env with 2546 [x :-> a] 2547 At the second case we occ-anal the scrutinee 'x', which looks up 2548 'x in occ_bs_env, returning 'a', as it should. 2549 Then addBndrSwap will add [a :-> b] to occ_bs_env, yielding 2550 occ_bs_env = [x :-> a, a :-> b] 2551 At the third case we'll again look up 'x' which returns 'a'. 2552 But we don't want to stop the lookup there, else we'll end up with 2553 case x of a { 1# -> e1; DEFAULT -> 2554 case a of b { 2# -> e2; DEFAULT -> 2555 case a of c { 3# -> e3; DEFAULT -> ..a..b..c.. }}} 2556 Instead, we want iterate the lookup in addBndrSwap, to give 2557 case x of a { 1# -> e1; DEFAULT -> 2558 case a of b { 2# -> e2; DEFAULT -> 2559 case b of c { 3# -> e3; DEFAULT -> ..c..c..c.. }}} 2560 This makes a particular difference for case-merge, which works 2561 only if the scrutinee is the case-binder of the immediately enclosing 2562 case (Note [Merge Nested Cases] in GHC.Core.Opt.Simplify.Utils 2563 See #19581 for the bug report that showed this up. 2564 2565(BS3) We need care when shadowing. Suppose [x :-> b] is in occ_bs_env, 2566 and we encounter: 2567 - \x. blah 2568 Here we want to delete the x-binding from occ_bs_env 2569 2570 - \b. blah 2571 This is harder: we really want to delete all bindings that 2572 have 'b' free in the range. That is a bit tiresome to implement, 2573 so we compromise. We keep occ_bs_rng, which is the set of 2574 free vars of rng(occc_bs_env). If a binder shadows any of these 2575 variables, we discard all of occ_bs_env. Safe, if a bit 2576 brutal. NB, however: the simplifer de-shadows the code, so the 2577 next time around this won't happen. 2578 2579 These checks are implemented in addInScope. 2580 2581 The occurrence analyser itself does /not/ do cloning. It could, in 2582 principle, but it'd make it a bit more complicated and there is no 2583 great benefit. The simplifer uses cloning to get a no-shadowing 2584 situation, the care-when-shadowing behaviour above isn't needed for 2585 long. 2586 2587(BS4) The domain of occ_bs_env can include GlobaIds. Eg 2588 case M.foo of b { alts } 2589 We extend occ_bs_env with [M.foo :-> b]. That's fine. 2590 2591(BS5) We have to apply the occ_bs_env substitution uniformly, 2592 including to (local) rules and unfoldings. 2593 2594Historical note 2595--------------- 2596We used to do the binder-swap transformation by introducing 2597a proxy let-binding, thus; 2598 2599 case x of b { pi -> ri } 2600 ==> 2601 case x of b { pi -> let x = b in ri } 2602 2603But that had two problems: 2604 26051. If 'x' is an imported GlobalId, we'd end up with a GlobalId 2606 on the LHS of a let-binding which isn't allowed. We worked 2607 around this for a while by "localising" x, but it turned 2608 out to be very painful #16296, 2609 26102. In CorePrep we use the occurrence analyser to do dead-code 2611 elimination (see Note [Dead code in CorePrep]). But that 2612 occasionally led to an unlifted let-binding 2613 case x of b { DEFAULT -> let x::Int# = b in ... } 2614 which disobeys one of CorePrep's output invariants (no unlifted 2615 let-bindings) -- see #5433. 2616 2617Doing a substitution (via occ_bs_env) is much better. 2618 2619Note [Case of cast] 2620~~~~~~~~~~~~~~~~~~~ 2621Consider case (x `cast` co) of b { I# -> 2622 ... (case (x `cast` co) of {...}) ... 2623We'd like to eliminate the inner case. That is the motivation for 2624equation (2) in Note [Binder swap]. When we get to the inner case, we 2625inline x, cancel the casts, and away we go. 2626 2627Note [Zap case binders in proxy bindings] 2628~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 2629From the original 2630 case x of cb(dead) { p -> ...x... } 2631we will get 2632 case x of cb(live) { p -> ...cb... } 2633 2634Core Lint never expects to find an *occurrence* of an Id marked 2635as Dead, so we must zap the OccInfo on cb before making the 2636binding x = cb. See #5028. 2637 2638NB: the OccInfo on /occurrences/ really doesn't matter much; the simplifier 2639doesn't use it. So this is only to satisfy the perhaps-over-picky Lint. 2640 2641Historical note [no-case-of-case] 2642~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 2643We *used* to suppress the binder-swap in case expressions when 2644-fno-case-of-case is on. Old remarks: 2645 "This happens in the first simplifier pass, 2646 and enhances full laziness. Here's the bad case: 2647 f = \ y -> ...(case x of I# v -> ...(case x of ...) ... ) 2648 If we eliminate the inner case, we trap it inside the I# v -> arm, 2649 which might prevent some full laziness happening. I've seen this 2650 in action in spectral/cichelli/Prog.hs: 2651 [(m,n) | m <- [1..max], n <- [1..max]] 2652 Hence the check for NoCaseOfCase." 2653However, now the full-laziness pass itself reverses the binder-swap, so this 2654check is no longer necessary. 2655 2656Historical note [Suppressing the case binder-swap] 2657~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 2658This old note describes a problem that is also fixed by doing the 2659binder-swap in OccAnal: 2660 2661 There is another situation when it might make sense to suppress the 2662 case-expression binde-swap. If we have 2663 2664 case x of w1 { DEFAULT -> case x of w2 { A -> e1; B -> e2 } 2665 ...other cases .... } 2666 2667 We'll perform the binder-swap for the outer case, giving 2668 2669 case x of w1 { DEFAULT -> case w1 of w2 { A -> e1; B -> e2 } 2670 ...other cases .... } 2671 2672 But there is no point in doing it for the inner case, because w1 can't 2673 be inlined anyway. Furthermore, doing the case-swapping involves 2674 zapping w2's occurrence info (see paragraphs that follow), and that 2675 forces us to bind w2 when doing case merging. So we get 2676 2677 case x of w1 { A -> let w2 = w1 in e1 2678 B -> let w2 = w1 in e2 2679 ...other cases .... } 2680 2681 This is plain silly in the common case where w2 is dead. 2682 2683 Even so, I can't see a good way to implement this idea. I tried 2684 not doing the binder-swap if the scrutinee was already evaluated 2685 but that failed big-time: 2686 2687 data T = MkT !Int 2688 2689 case v of w { MkT x -> 2690 case x of x1 { I# y1 -> 2691 case x of x2 { I# y2 -> ... 2692 2693 Notice that because MkT is strict, x is marked "evaluated". But to 2694 eliminate the last case, we must either make sure that x (as well as 2695 x1) has unfolding MkT y1. The straightforward thing to do is to do 2696 the binder-swap. So this whole note is a no-op. 2697 2698It's fixed by doing the binder-swap in OccAnal because we can do the 2699binder-swap unconditionally and still get occurrence analysis 2700information right. 2701-} 2702 2703addBndrSwap :: OutExpr -> Id -> OccEnv -> OccEnv 2704-- See Note [The binder-swap substitution] 2705addBndrSwap scrut case_bndr 2706 env@(OccEnv { occ_bs_env = swap_env, occ_bs_rng = rng_vars }) 2707 | Just (scrut_var, mco) <- get_scrut_var (stripTicksTopE (const True) scrut) 2708 , scrut_var /= case_bndr 2709 -- Consider: case x of x { ... } 2710 -- Do not add [x :-> x] to occ_bs_env, else lookupBndrSwap will loop 2711 = env { occ_bs_env = extendVarEnv swap_env scrut_var (case_bndr', mco) 2712 , occ_bs_rng = rng_vars `extendVarSet` case_bndr' 2713 `unionVarSet` tyCoVarsOfMCo mco } 2714 2715 | otherwise 2716 = env 2717 where 2718 get_scrut_var :: OutExpr -> Maybe (OutVar, MCoercion) 2719 get_scrut_var (Var v) = Just (v, MRefl) 2720 get_scrut_var (Cast (Var v) co) = Just (v, MCo co) -- See Note [Case of cast] 2721 get_scrut_var _ = Nothing 2722 2723 case_bndr' = zapIdOccInfo case_bndr 2724 -- See Note [Zap case binders in proxy bindings] 2725 2726lookupBndrSwap :: OccEnv -> Id -> (CoreExpr, Id) 2727-- See Note [The binder-swap substitution] 2728-- Returns an expression of the same type as Id 2729lookupBndrSwap env@(OccEnv { occ_bs_env = bs_env }) bndr 2730 = case lookupVarEnv bs_env bndr of { 2731 Nothing -> (Var bndr, bndr) ; 2732 Just (bndr1, mco) -> 2733 2734 -- Why do we iterate here? 2735 -- See (BS2) in Note [The binder-swap substitution] 2736 case lookupBndrSwap env bndr1 of 2737 (fun, fun_id) -> (add_cast fun mco, fun_id) } 2738 2739 where 2740 add_cast fun MRefl = fun 2741 add_cast fun (MCo co) = Cast fun (mkSymCo co) 2742 -- We must switch that 'co' to 'sym co'; 2743 -- see the comment with occ_bs_env 2744 -- No need to test for isReflCo, because 'co' came from 2745 -- a (Cast e co) and hence is unlikely to be Refl 2746 2747{- 2748************************************************************************ 2749* * 2750\subsection[OccurAnal-types]{OccEnv} 2751* * 2752************************************************************************ 2753 2754Note [UsageDetails and zapping] 2755~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 2756On many occasions, we must modify all gathered occurrence data at once. For 2757instance, all occurrences underneath a (non-one-shot) lambda set the 2758'occ_in_lam' flag to become 'True'. We could use 'mapVarEnv' to do this, but 2759that takes O(n) time and we will do this often---in particular, there are many 2760places where tail calls are not allowed, and each of these causes all variables 2761to get marked with 'NoTailCallInfo'. 2762 2763Instead of relying on `mapVarEnv`, then, we carry three 'IdEnv's around along 2764with the 'OccInfoEnv'. Each of these extra environments is a "zapped set" 2765recording which variables have been zapped in some way. Zapping all occurrence 2766info then simply means setting the corresponding zapped set to the whole 2767'OccInfoEnv', a fast O(1) operation. 2768-} 2769 2770type OccInfoEnv = IdEnv OccInfo -- A finite map from ids to their usage 2771 -- INVARIANT: never IAmDead 2772 -- (Deadness is signalled by not being in the map at all) 2773 2774type ZappedSet = OccInfoEnv -- Values are ignored 2775 2776data UsageDetails 2777 = UD { ud_env :: !OccInfoEnv 2778 , ud_z_many :: ZappedSet -- apply 'markMany' to these 2779 , ud_z_in_lam :: ZappedSet -- apply 'markInsideLam' to these 2780 , ud_z_no_tail :: ZappedSet } -- apply 'markNonTail' to these 2781 -- INVARIANT: All three zapped sets are subsets of the OccInfoEnv 2782 2783instance Outputable UsageDetails where 2784 ppr ud = ppr (ud_env (flattenUsageDetails ud)) 2785 2786------------------- 2787-- UsageDetails API 2788 2789andUDs, orUDs 2790 :: UsageDetails -> UsageDetails -> UsageDetails 2791andUDs = combineUsageDetailsWith addOccInfo 2792orUDs = combineUsageDetailsWith orOccInfo 2793 2794mkOneOcc ::Id -> InterestingCxt -> JoinArity -> UsageDetails 2795mkOneOcc id int_cxt arity 2796 | isLocalId id 2797 = emptyDetails { ud_env = unitVarEnv id occ_info } 2798 | otherwise 2799 = emptyDetails 2800 where 2801 occ_info = OneOcc { occ_in_lam = NotInsideLam 2802 , occ_n_br = oneBranch 2803 , occ_int_cxt = int_cxt 2804 , occ_tail = AlwaysTailCalled arity } 2805 2806addManyOccId :: UsageDetails -> Id -> UsageDetails 2807-- Add the non-committal (id :-> noOccInfo) to the usage details 2808addManyOccId ud id = ud { ud_env = extendVarEnv (ud_env ud) id noOccInfo } 2809 2810-- Add several occurrences, assumed not to be tail calls 2811addManyOcc :: Var -> UsageDetails -> UsageDetails 2812addManyOcc v u | isId v = addManyOccId u v 2813 | otherwise = u 2814 -- Give a non-committal binder info (i.e noOccInfo) because 2815 -- a) Many copies of the specialised thing can appear 2816 -- b) We don't want to substitute a BIG expression inside a RULE 2817 -- even if that's the only occurrence of the thing 2818 -- (Same goes for INLINE.) 2819 2820addManyOccs :: UsageDetails -> VarSet -> UsageDetails 2821addManyOccs usage id_set = nonDetStrictFoldUniqSet addManyOcc usage id_set 2822 -- It's OK to use nonDetStrictFoldUniqSet here because addManyOcc commutes 2823 2824delDetails :: UsageDetails -> Id -> UsageDetails 2825delDetails ud bndr 2826 = ud `alterUsageDetails` (`delVarEnv` bndr) 2827 2828delDetailsList :: UsageDetails -> [Id] -> UsageDetails 2829delDetailsList ud bndrs 2830 = ud `alterUsageDetails` (`delVarEnvList` bndrs) 2831 2832emptyDetails :: UsageDetails 2833emptyDetails = UD { ud_env = emptyVarEnv 2834 , ud_z_many = emptyVarEnv 2835 , ud_z_in_lam = emptyVarEnv 2836 , ud_z_no_tail = emptyVarEnv } 2837 2838isEmptyDetails :: UsageDetails -> Bool 2839isEmptyDetails = isEmptyVarEnv . ud_env 2840 2841markAllMany, markAllInsideLam, markAllNonTail, markAllManyNonTail 2842 :: UsageDetails -> UsageDetails 2843markAllMany ud = ud { ud_z_many = ud_env ud } 2844markAllInsideLam ud = ud { ud_z_in_lam = ud_env ud } 2845markAllNonTail ud = ud { ud_z_no_tail = ud_env ud } 2846 2847markAllInsideLamIf, markAllNonTailIf :: Bool -> UsageDetails -> UsageDetails 2848 2849markAllInsideLamIf True ud = markAllInsideLam ud 2850markAllInsideLamIf False ud = ud 2851 2852markAllNonTailIf True ud = markAllNonTail ud 2853markAllNonTailIf False ud = ud 2854 2855 2856markAllManyNonTail = markAllMany . markAllNonTail -- effectively sets to noOccInfo 2857 2858markAllManyNonTailIf :: Bool -- If this is true 2859 -> UsageDetails -- Then do markAllManyNonTail on this 2860 -> UsageDetails 2861markAllManyNonTailIf True uds = markAllManyNonTail uds 2862markAllManyNonTailIf False uds = uds 2863 2864lookupDetails :: UsageDetails -> Id -> OccInfo 2865lookupDetails ud id 2866 | isCoVar id -- We do not currently gather occurrence info (from types) 2867 = noOccInfo -- for CoVars, so we must conservatively mark them as used 2868 -- See Note [DoO not mark CoVars as dead] 2869 | otherwise 2870 = case lookupVarEnv (ud_env ud) id of 2871 Just occ -> doZapping ud id occ 2872 Nothing -> IAmDead 2873 2874usedIn :: Id -> UsageDetails -> Bool 2875v `usedIn` ud = isExportedId v || v `elemVarEnv` ud_env ud 2876 2877udFreeVars :: VarSet -> UsageDetails -> VarSet 2878-- Find the subset of bndrs that are mentioned in uds 2879udFreeVars bndrs ud = restrictFreeVars bndrs (ud_env ud) 2880 2881restrictFreeVars :: VarSet -> OccInfoEnv -> VarSet 2882restrictFreeVars bndrs fvs = restrictUniqSetToUFM bndrs fvs 2883 2884{- Note [Do not mark CoVars as dead] 2885~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 2886It's obviously wrong to mark CoVars as dead if they are used. 2887Currently we don't traverse types to gather usase info for CoVars, 2888so we had better treat them as having noOccInfo. 2889 2890This showed up in #15696 we had something like 2891 case eq_sel d of co -> ...(typeError @(...co...) "urk")... 2892 2893Then 'd' was substituted by a dictionary, so the expression 2894simpified to 2895 case (Coercion <blah>) of co -> ...(typeError @(...co...) "urk")... 2896 2897But then the "drop the case altogether" equation of rebuildCase 2898thought that 'co' was dead, and discarded the entire case. Urk! 2899 2900I have no idea how we managed to avoid this pitfall for so long! 2901-} 2902 2903------------------- 2904-- Auxiliary functions for UsageDetails implementation 2905 2906combineUsageDetailsWith :: (OccInfo -> OccInfo -> OccInfo) 2907 -> UsageDetails -> UsageDetails -> UsageDetails 2908combineUsageDetailsWith plus_occ_info ud1 ud2 2909 | isEmptyDetails ud1 = ud2 2910 | isEmptyDetails ud2 = ud1 2911 | otherwise 2912 = UD { ud_env = plusVarEnv_C plus_occ_info (ud_env ud1) (ud_env ud2) 2913 , ud_z_many = plusVarEnv (ud_z_many ud1) (ud_z_many ud2) 2914 , ud_z_in_lam = plusVarEnv (ud_z_in_lam ud1) (ud_z_in_lam ud2) 2915 , ud_z_no_tail = plusVarEnv (ud_z_no_tail ud1) (ud_z_no_tail ud2) } 2916 2917doZapping :: UsageDetails -> Var -> OccInfo -> OccInfo 2918doZapping ud var occ 2919 = doZappingByUnique ud (varUnique var) occ 2920 2921doZappingByUnique :: UsageDetails -> Unique -> OccInfo -> OccInfo 2922doZappingByUnique (UD { ud_z_many = many 2923 , ud_z_in_lam = in_lam 2924 , ud_z_no_tail = no_tail }) 2925 uniq occ 2926 = occ2 2927 where 2928 occ1 | uniq `elemVarEnvByKey` many = markMany occ 2929 | uniq `elemVarEnvByKey` in_lam = markInsideLam occ 2930 | otherwise = occ 2931 occ2 | uniq `elemVarEnvByKey` no_tail = markNonTail occ1 2932 | otherwise = occ1 2933 2934alterZappedSets :: UsageDetails -> (ZappedSet -> ZappedSet) -> UsageDetails 2935alterZappedSets ud f 2936 = ud { ud_z_many = f (ud_z_many ud) 2937 , ud_z_in_lam = f (ud_z_in_lam ud) 2938 , ud_z_no_tail = f (ud_z_no_tail ud) } 2939 2940alterUsageDetails :: UsageDetails -> (OccInfoEnv -> OccInfoEnv) -> UsageDetails 2941alterUsageDetails ud f 2942 = ud { ud_env = f (ud_env ud) } `alterZappedSets` f 2943 2944flattenUsageDetails :: UsageDetails -> UsageDetails 2945flattenUsageDetails ud 2946 = ud { ud_env = mapUFM_Directly (doZappingByUnique ud) (ud_env ud) } 2947 `alterZappedSets` const emptyVarEnv 2948 2949------------------- 2950-- See Note [Adjusting right-hand sides] 2951adjustRhsUsage :: RecFlag -> Maybe JoinArity 2952 -> [CoreBndr] -- Outer lambdas, AFTER occ anal 2953 -> UsageDetails -- From body of lambda 2954 -> UsageDetails 2955adjustRhsUsage is_rec mb_join_arity bndrs usage 2956 = markAllInsideLamIf (not one_shot) $ 2957 markAllNonTailIf (not exact_join) $ 2958 usage 2959 where 2960 one_shot = case mb_join_arity of 2961 Just join_arity 2962 | isRec is_rec -> False 2963 | otherwise -> all isOneShotBndr (drop join_arity bndrs) 2964 Nothing -> all isOneShotBndr bndrs 2965 2966 exact_join = exactJoin mb_join_arity bndrs 2967 2968exactJoin :: Maybe JoinArity -> [a] -> Bool 2969exactJoin Nothing _ = False 2970exactJoin (Just join_arity) args = args `lengthIs` join_arity 2971 -- Remember join_arity includes type binders 2972 2973type IdWithOccInfo = Id 2974 2975tagLamBinders :: UsageDetails -- Of scope 2976 -> [Id] -- Binders 2977 -> (UsageDetails, -- Details with binders removed 2978 [IdWithOccInfo]) -- Tagged binders 2979tagLamBinders usage binders 2980 = usage' `seq` (usage', bndrs') 2981 where 2982 (usage', bndrs') = mapAccumR tagLamBinder usage binders 2983 2984tagLamBinder :: UsageDetails -- Of scope 2985 -> Id -- Binder 2986 -> (UsageDetails, -- Details with binder removed 2987 IdWithOccInfo) -- Tagged binders 2988-- Used for lambda and case binders 2989-- It copes with the fact that lambda bindings can have a 2990-- stable unfolding, used for join points 2991tagLamBinder usage bndr 2992 = (usage2, bndr') 2993 where 2994 occ = lookupDetails usage bndr 2995 bndr' = setBinderOcc (markNonTail occ) bndr 2996 -- Don't try to make an argument into a join point 2997 usage1 = usage `delDetails` bndr 2998 usage2 | isId bndr = addManyOccs usage1 (idUnfoldingVars bndr) 2999 -- This is effectively the RHS of a 3000 -- non-join-point binding, so it's okay to use 3001 -- addManyOccsSet, which assumes no tail calls 3002 | otherwise = usage1 3003 3004tagNonRecBinder :: TopLevelFlag -- At top level? 3005 -> UsageDetails -- Of scope 3006 -> CoreBndr -- Binder 3007 -> (UsageDetails, -- Details with binder removed 3008 IdWithOccInfo) -- Tagged binder 3009 3010tagNonRecBinder lvl usage binder 3011 = let 3012 occ = lookupDetails usage binder 3013 will_be_join = decideJoinPointHood lvl usage [binder] 3014 occ' | will_be_join = -- must already be marked AlwaysTailCalled 3015 ASSERT(isAlwaysTailCalled occ) occ 3016 | otherwise = markNonTail occ 3017 binder' = setBinderOcc occ' binder 3018 usage' = usage `delDetails` binder 3019 in 3020 usage' `seq` (usage', binder') 3021 3022tagRecBinders :: TopLevelFlag -- At top level? 3023 -> UsageDetails -- Of body of let ONLY 3024 -> [(CoreBndr, -- Binder 3025 UsageDetails, -- RHS usage details 3026 [CoreBndr])] -- Lambdas in new RHS 3027 -> (UsageDetails, -- Adjusted details for whole scope, 3028 -- with binders removed 3029 [IdWithOccInfo]) -- Tagged binders 3030-- Substantially more complicated than non-recursive case. Need to adjust RHS 3031-- details *before* tagging binders (because the tags depend on the RHSes). 3032tagRecBinders lvl body_uds triples 3033 = let 3034 (bndrs, rhs_udss, _) = unzip3 triples 3035 3036 -- 1. Determine join-point-hood of whole group, as determined by 3037 -- the *unadjusted* usage details 3038 unadj_uds = foldr andUDs body_uds rhs_udss 3039 will_be_joins = decideJoinPointHood lvl unadj_uds bndrs 3040 3041 -- 2. Adjust usage details of each RHS, taking into account the 3042 -- join-point-hood decision 3043 rhs_udss' = map adjust triples 3044 adjust (bndr, rhs_uds, rhs_bndrs) 3045 = adjustRhsUsage Recursive mb_join_arity rhs_bndrs rhs_uds 3046 where 3047 -- Can't use willBeJoinId_maybe here because we haven't tagged the 3048 -- binder yet (the tag depends on these adjustments!) 3049 mb_join_arity 3050 | will_be_joins 3051 , let occ = lookupDetails unadj_uds bndr 3052 , AlwaysTailCalled arity <- tailCallInfo occ 3053 = Just arity 3054 | otherwise 3055 = ASSERT(not will_be_joins) -- Should be AlwaysTailCalled if 3056 Nothing -- we are making join points! 3057 3058 -- 3. Compute final usage details from adjusted RHS details 3059 adj_uds = foldr andUDs body_uds rhs_udss' 3060 3061 -- 4. Tag each binder with its adjusted details 3062 bndrs' = [ setBinderOcc (lookupDetails adj_uds bndr) bndr 3063 | bndr <- bndrs ] 3064 3065 -- 5. Drop the binders from the adjusted details and return 3066 usage' = adj_uds `delDetailsList` bndrs 3067 in 3068 (usage', bndrs') 3069 3070setBinderOcc :: OccInfo -> CoreBndr -> CoreBndr 3071setBinderOcc occ_info bndr 3072 | isTyVar bndr = bndr 3073 | isExportedId bndr = if isManyOccs (idOccInfo bndr) 3074 then bndr 3075 else setIdOccInfo bndr noOccInfo 3076 -- Don't use local usage info for visible-elsewhere things 3077 -- BUT *do* erase any IAmALoopBreaker annotation, because we're 3078 -- about to re-generate it and it shouldn't be "sticky" 3079 3080 | otherwise = setIdOccInfo bndr occ_info 3081 3082-- | Decide whether some bindings should be made into join points or not. 3083-- Returns `False` if they can't be join points. Note that it's an 3084-- all-or-nothing decision, as if multiple binders are given, they're 3085-- assumed to be mutually recursive. 3086-- 3087-- It must, however, be a final decision. If we say "True" for 'f', 3088-- and then subsequently decide /not/ make 'f' into a join point, then 3089-- the decision about another binding 'g' might be invalidated if (say) 3090-- 'f' tail-calls 'g'. 3091-- 3092-- See Note [Invariants on join points] in "GHC.Core". 3093decideJoinPointHood :: TopLevelFlag -> UsageDetails 3094 -> [CoreBndr] 3095 -> Bool 3096decideJoinPointHood TopLevel _ _ 3097 = False 3098decideJoinPointHood NotTopLevel usage bndrs 3099 | isJoinId (head bndrs) 3100 = WARN(not all_ok, text "OccurAnal failed to rediscover join point(s):" <+> 3101 ppr bndrs) 3102 all_ok 3103 | otherwise 3104 = all_ok 3105 where 3106 -- See Note [Invariants on join points]; invariants cited by number below. 3107 -- Invariant 2 is always satisfiable by the simplifier by eta expansion. 3108 all_ok = -- Invariant 3: Either all are join points or none are 3109 all ok bndrs 3110 3111 ok bndr 3112 | -- Invariant 1: Only tail calls, all same join arity 3113 AlwaysTailCalled arity <- tailCallInfo (lookupDetails usage bndr) 3114 3115 , -- Invariant 1 as applied to LHSes of rules 3116 all (ok_rule arity) (idCoreRules bndr) 3117 3118 -- Invariant 2a: stable unfoldings 3119 -- See Note [Join points and INLINE pragmas] 3120 , ok_unfolding arity (realIdUnfolding bndr) 3121 3122 -- Invariant 4: Satisfies polymorphism rule 3123 , isValidJoinPointType arity (idType bndr) 3124 = True 3125 3126 | otherwise 3127 = False 3128 3129 ok_rule _ BuiltinRule{} = False -- only possible with plugin shenanigans 3130 ok_rule join_arity (Rule { ru_args = args }) 3131 = args `lengthIs` join_arity 3132 -- Invariant 1 as applied to LHSes of rules 3133 3134 -- ok_unfolding returns False if we should /not/ convert a non-join-id 3135 -- into a join-id, even though it is AlwaysTailCalled 3136 ok_unfolding join_arity (CoreUnfolding { uf_src = src, uf_tmpl = rhs }) 3137 = not (isStableSource src && join_arity > joinRhsArity rhs) 3138 ok_unfolding _ (DFunUnfolding {}) 3139 = False 3140 ok_unfolding _ _ 3141 = True 3142 3143willBeJoinId_maybe :: CoreBndr -> Maybe JoinArity 3144willBeJoinId_maybe bndr 3145 = case tailCallInfo (idOccInfo bndr) of 3146 AlwaysTailCalled arity -> Just arity 3147 _ -> isJoinId_maybe bndr 3148 3149 3150{- Note [Join points and INLINE pragmas] 3151~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 3152Consider 3153 f x = let g = \x. not -- Arity 1 3154 {-# INLINE g #-} 3155 in case x of 3156 A -> g True True 3157 B -> g True False 3158 C -> blah2 3159 3160Here 'g' is always tail-called applied to 2 args, but the stable 3161unfolding captured by the INLINE pragma has arity 1. If we try to 3162convert g to be a join point, its unfolding will still have arity 1 3163(since it is stable, and we don't meddle with stable unfoldings), and 3164Lint will complain (see Note [Invariants on join points], (2a), in 3165GHC.Core. #13413. 3166 3167Moreover, since g is going to be inlined anyway, there is no benefit 3168from making it a join point. 3169 3170If it is recursive, and uselessly marked INLINE, this will stop us 3171making it a join point, which is annoying. But occasionally 3172(notably in class methods; see Note [Instances and loop breakers] in 3173GHC.Tc.TyCl.Instance) we mark recursive things as INLINE but the recursion 3174unravels; so ignoring INLINE pragmas on recursive things isn't good 3175either. 3176 3177See Invariant 2a of Note [Invariants on join points] in GHC.Core 3178 3179 3180************************************************************************ 3181* * 3182\subsection{Operations over OccInfo} 3183* * 3184************************************************************************ 3185-} 3186 3187markMany, markInsideLam, markNonTail :: OccInfo -> OccInfo 3188 3189markMany IAmDead = IAmDead 3190markMany occ = ManyOccs { occ_tail = occ_tail occ } 3191 3192markInsideLam occ@(OneOcc {}) = occ { occ_in_lam = IsInsideLam } 3193markInsideLam occ = occ 3194 3195markNonTail IAmDead = IAmDead 3196markNonTail occ = occ { occ_tail = NoTailCallInfo } 3197 3198addOccInfo, orOccInfo :: OccInfo -> OccInfo -> OccInfo 3199 3200addOccInfo a1 a2 = ASSERT( not (isDeadOcc a1 || isDeadOcc a2) ) 3201 ManyOccs { occ_tail = tailCallInfo a1 `andTailCallInfo` 3202 tailCallInfo a2 } 3203 -- Both branches are at least One 3204 -- (Argument is never IAmDead) 3205 3206-- (orOccInfo orig new) is used 3207-- when combining occurrence info from branches of a case 3208 3209orOccInfo (OneOcc { occ_in_lam = in_lam1 3210 , occ_n_br = nbr1 3211 , occ_int_cxt = int_cxt1 3212 , occ_tail = tail1 }) 3213 (OneOcc { occ_in_lam = in_lam2 3214 , occ_n_br = nbr2 3215 , occ_int_cxt = int_cxt2 3216 , occ_tail = tail2 }) 3217 = OneOcc { occ_n_br = nbr1 + nbr2 3218 , occ_in_lam = in_lam1 `mappend` in_lam2 3219 , occ_int_cxt = int_cxt1 `mappend` int_cxt2 3220 , occ_tail = tail1 `andTailCallInfo` tail2 } 3221 3222orOccInfo a1 a2 = ASSERT( not (isDeadOcc a1 || isDeadOcc a2) ) 3223 ManyOccs { occ_tail = tailCallInfo a1 `andTailCallInfo` 3224 tailCallInfo a2 } 3225 3226andTailCallInfo :: TailCallInfo -> TailCallInfo -> TailCallInfo 3227andTailCallInfo info@(AlwaysTailCalled arity1) (AlwaysTailCalled arity2) 3228 | arity1 == arity2 = info 3229andTailCallInfo _ _ = NoTailCallInfo 3230