1{- 2(c) The GRASP/AQUA Project, Glasgow University, 1992-1998 3 4************************************************************************ 5* * 6\section[OccurAnal]{Occurrence analysis pass} 7* * 8************************************************************************ 9 10The occurrence analyser re-typechecks a core expression, returning a new 11core expression with (hopefully) improved usage information. 12-} 13 14{-# LANGUAGE CPP, BangPatterns, MultiWayIf, ViewPatterns #-} 15 16module OccurAnal ( 17 occurAnalysePgm, occurAnalyseExpr, occurAnalyseExpr_NoBinderSwap 18 ) where 19 20#include "HsVersions.h" 21 22import GhcPrelude 23 24import CoreSyn 25import CoreFVs 26import CoreUtils ( exprIsTrivial, isDefaultAlt, isExpandableApp, 27 stripTicksTopE, mkTicks ) 28import CoreArity ( joinRhsArity ) 29import Id 30import IdInfo 31import Name( localiseName ) 32import BasicTypes 33import Module( Module ) 34import Coercion 35import Type 36 37import VarSet 38import VarEnv 39import Var 40import Demand ( argOneShots, argsOneShots ) 41import Digraph ( SCC(..), Node(..) 42 , stronglyConnCompFromEdgedVerticesUniq 43 , stronglyConnCompFromEdgedVerticesUniqR ) 44import Unique 45import UniqFM 46import UniqSet 47import Util 48import Outputable 49import Data.List 50import Control.Arrow ( second ) 51 52{- 53************************************************************************ 54* * 55 occurAnalysePgm, occurAnalyseExpr, occurAnalyseExpr_NoBinderSwap 56* * 57************************************************************************ 58 59Here's the externally-callable interface: 60-} 61 62occurAnalysePgm :: Module -- Used only in debug output 63 -> (Id -> Bool) -- Active unfoldings 64 -> (Activation -> Bool) -- Active rules 65 -> [CoreRule] 66 -> CoreProgram -> CoreProgram 67occurAnalysePgm this_mod active_unf active_rule imp_rules binds 68 | isEmptyDetails final_usage 69 = occ_anald_binds 70 71 | otherwise -- See Note [Glomming] 72 = WARN( True, hang (text "Glomming in" <+> ppr this_mod <> colon) 73 2 (ppr final_usage ) ) 74 occ_anald_glommed_binds 75 where 76 init_env = initOccEnv { occ_rule_act = active_rule 77 , occ_unf_act = active_unf } 78 79 (final_usage, occ_anald_binds) = go init_env binds 80 (_, occ_anald_glommed_binds) = occAnalRecBind init_env TopLevel 81 imp_rule_edges 82 (flattenBinds binds) 83 initial_uds 84 -- It's crucial to re-analyse the glommed-together bindings 85 -- so that we establish the right loop breakers. Otherwise 86 -- we can easily create an infinite loop (#9583 is an example) 87 -- 88 -- Also crucial to re-analyse the /original/ bindings 89 -- in case the first pass accidentally discarded as dead code 90 -- a binding that was actually needed (albeit before its 91 -- definition site). #17724 threw this up. 92 93 initial_uds = addManyOccsSet emptyDetails 94 (rulesFreeVars imp_rules) 95 -- The RULES declarations keep things alive! 96 97 -- Note [Preventing loops due to imported functions rules] 98 imp_rule_edges = foldr (plusVarEnv_C unionVarSet) emptyVarEnv 99 [ mapVarEnv (const maps_to) $ 100 getUniqSet (exprFreeIds arg `delVarSetList` ru_bndrs imp_rule) 101 | imp_rule <- imp_rules 102 , not (isBuiltinRule imp_rule) -- See Note [Plugin rules] 103 , let maps_to = exprFreeIds (ru_rhs imp_rule) 104 `delVarSetList` ru_bndrs imp_rule 105 , arg <- ru_args imp_rule ] 106 107 go :: OccEnv -> [CoreBind] -> (UsageDetails, [CoreBind]) 108 go _ [] 109 = (initial_uds, []) 110 go env (bind:binds) 111 = (final_usage, bind' ++ binds') 112 where 113 (bs_usage, binds') = go env binds 114 (final_usage, bind') = occAnalBind env TopLevel imp_rule_edges bind 115 bs_usage 116 117occurAnalyseExpr :: CoreExpr -> CoreExpr 118 -- Do occurrence analysis, and discard occurrence info returned 119occurAnalyseExpr = occurAnalyseExpr' True -- do binder swap 120 121occurAnalyseExpr_NoBinderSwap :: CoreExpr -> CoreExpr 122occurAnalyseExpr_NoBinderSwap = occurAnalyseExpr' False -- do not do binder swap 123 124occurAnalyseExpr' :: Bool -> CoreExpr -> CoreExpr 125occurAnalyseExpr' enable_binder_swap expr 126 = snd (occAnal env expr) 127 where 128 env = initOccEnv { occ_binder_swap = enable_binder_swap } 129 130{- Note [Plugin rules] 131~~~~~~~~~~~~~~~~~~~~~~ 132Conal Elliott (#11651) built a GHC plugin that added some 133BuiltinRules (for imported Ids) to the mg_rules field of ModGuts, to 134do some domain-specific transformations that could not be expressed 135with an ordinary pattern-matching CoreRule. But then we can't extract 136the dependencies (in imp_rule_edges) from ru_rhs etc, because a 137BuiltinRule doesn't have any of that stuff. 138 139So we simply assume that BuiltinRules have no dependencies, and filter 140them out from the imp_rule_edges comprehension. 141-} 142 143{- 144************************************************************************ 145* * 146 Bindings 147* * 148************************************************************************ 149 150Note [Recursive bindings: the grand plan] 151~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 152When we come across a binding group 153 Rec { x1 = r1; ...; xn = rn } 154we treat it like this (occAnalRecBind): 155 1561. Occurrence-analyse each right hand side, and build a 157 "Details" for each binding to capture the results. 158 159 Wrap the details in a Node (details, node-id, dep-node-ids), 160 where node-id is just the unique of the binder, and 161 dep-node-ids lists all binders on which this binding depends. 162 We'll call these the "scope edges". 163 See Note [Forming the Rec groups]. 164 165 All this is done by makeNode. 166 1672. Do SCC-analysis on these Nodes. Each SCC will become a new Rec or 168 NonRec. The key property is that every free variable of a binding 169 is accounted for by the scope edges, so that when we are done 170 everything is still in scope. 171 1723. For each Cyclic SCC of the scope-edge SCC-analysis in (2), we 173 identify suitable loop-breakers to ensure that inlining terminates. 174 This is done by occAnalRec. 175 1764. To do so we form a new set of Nodes, with the same details, but 177 different edges, the "loop-breaker nodes". The loop-breaker nodes 178 have both more and fewer dependencies than the scope edges 179 (see Note [Choosing loop breakers]) 180 181 More edges: if f calls g, and g has an active rule that mentions h 182 then we add an edge from f -> h 183 184 Fewer edges: we only include dependencies on active rules, on rule 185 RHSs (not LHSs) and if there is an INLINE pragma only 186 on the stable unfolding (and vice versa). The scope 187 edges must be much more inclusive. 188 1895. The "weak fvs" of a node are, by definition: 190 the scope fvs - the loop-breaker fvs 191 See Note [Weak loop breakers], and the nd_weak field of Details 192 1936. Having formed the loop-breaker nodes 194 195Note [Dead code] 196~~~~~~~~~~~~~~~~ 197Dropping dead code for a cyclic Strongly Connected Component is done 198in a very simple way: 199 200 the entire SCC is dropped if none of its binders are mentioned 201 in the body; otherwise the whole thing is kept. 202 203The key observation is that dead code elimination happens after 204dependency analysis: so 'occAnalBind' processes SCCs instead of the 205original term's binding groups. 206 207Thus 'occAnalBind' does indeed drop 'f' in an example like 208 209 letrec f = ...g... 210 g = ...(...g...)... 211 in 212 ...g... 213 214when 'g' no longer uses 'f' at all (eg 'f' does not occur in a RULE in 215'g'). 'occAnalBind' first consumes 'CyclicSCC g' and then it consumes 216'AcyclicSCC f', where 'body_usage' won't contain 'f'. 217 218------------------------------------------------------------ 219Note [Forming Rec groups] 220~~~~~~~~~~~~~~~~~~~~~~~~~ 221We put bindings {f = ef; g = eg } in a Rec group if "f uses g" 222and "g uses f", no matter how indirectly. We do a SCC analysis 223with an edge f -> g if "f uses g". 224 225More precisely, "f uses g" iff g should be in scope wherever f is. 226That is, g is free in: 227 a) the rhs 'ef' 228 b) or the RHS of a rule for f (Note [Rules are extra RHSs]) 229 c) or the LHS or a rule for f (Note [Rule dependency info]) 230 231These conditions apply regardless of the activation of the RULE (eg it might be 232inactive in this phase but become active later). Once a Rec is broken up 233it can never be put back together, so we must be conservative. 234 235The principle is that, regardless of rule firings, every variable is 236always in scope. 237 238 * Note [Rules are extra RHSs] 239 ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 240 A RULE for 'f' is like an extra RHS for 'f'. That way the "parent" 241 keeps the specialised "children" alive. If the parent dies 242 (because it isn't referenced any more), then the children will die 243 too (unless they are already referenced directly). 244 245 To that end, we build a Rec group for each cyclic strongly 246 connected component, 247 *treating f's rules as extra RHSs for 'f'*. 248 More concretely, the SCC analysis runs on a graph with an edge 249 from f -> g iff g is mentioned in 250 (a) f's rhs 251 (b) f's RULES 252 These are rec_edges. 253 254 Under (b) we include variables free in *either* LHS *or* RHS of 255 the rule. The former might seems silly, but see Note [Rule 256 dependency info]. So in Example [eftInt], eftInt and eftIntFB 257 will be put in the same Rec, even though their 'main' RHSs are 258 both non-recursive. 259 260 * Note [Rule dependency info] 261 ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 262 The VarSet in a RuleInfo is used for dependency analysis in the 263 occurrence analyser. We must track free vars in *both* lhs and rhs. 264 Hence use of idRuleVars, rather than idRuleRhsVars in occAnalBind. 265 Why both? Consider 266 x = y 267 RULE f x = v+4 268 Then if we substitute y for x, we'd better do so in the 269 rule's LHS too, so we'd better ensure the RULE appears to mention 'x' 270 as well as 'v' 271 272 * Note [Rules are visible in their own rec group] 273 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 274 We want the rules for 'f' to be visible in f's right-hand side. 275 And we'd like them to be visible in other functions in f's Rec 276 group. E.g. in Note [Specialisation rules] we want f' rule 277 to be visible in both f's RHS, and fs's RHS. 278 279 This means that we must simplify the RULEs first, before looking 280 at any of the definitions. This is done by Simplify.simplRecBind, 281 when it calls addLetIdInfo. 282 283------------------------------------------------------------ 284Note [Choosing loop breakers] 285~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 286Loop breaking is surprisingly subtle. First read the section 4 of 287"Secrets of the GHC inliner". This describes our basic plan. 288We avoid infinite inlinings by choosing loop breakers, and 289ensuring that a loop breaker cuts each loop. 290 291See also Note [Inlining and hs-boot files] in ToIface, which deals 292with a closely related source of infinite loops. 293 294Fundamentally, we do SCC analysis on a graph. For each recursive 295group we choose a loop breaker, delete all edges to that node, 296re-analyse the SCC, and iterate. 297 298But what is the graph? NOT the same graph as was used for Note 299[Forming Rec groups]! In particular, a RULE is like an equation for 300'f' that is *always* inlined if it is applicable. We do *not* disable 301rules for loop-breakers. It's up to whoever makes the rules to make 302sure that the rules themselves always terminate. See Note [Rules for 303recursive functions] in Simplify.hs 304 305Hence, if 306 f's RHS (or its INLINE template if it has one) mentions g, and 307 g has a RULE that mentions h, and 308 h has a RULE that mentions f 309 310then we *must* choose f to be a loop breaker. Example: see Note 311[Specialisation rules]. 312 313In general, take the free variables of f's RHS, and augment it with 314all the variables reachable by RULES from those starting points. That 315is the whole reason for computing rule_fv_env in occAnalBind. (Of 316course we only consider free vars that are also binders in this Rec 317group.) See also Note [Finding rule RHS free vars] 318 319Note that when we compute this rule_fv_env, we only consider variables 320free in the *RHS* of the rule, in contrast to the way we build the 321Rec group in the first place (Note [Rule dependency info]) 322 323Note that if 'g' has RHS that mentions 'w', we should add w to 324g's loop-breaker edges. More concretely there is an edge from f -> g 325iff 326 (a) g is mentioned in f's RHS `xor` f's INLINE rhs 327 (see Note [Inline rules]) 328 (b) or h is mentioned in f's RHS, and 329 g appears in the RHS of an active RULE of h 330 or a transitive sequence of active rules starting with h 331 332Why "active rules"? See Note [Finding rule RHS free vars] 333 334Note that in Example [eftInt], *neither* eftInt *nor* eftIntFB is 335chosen as a loop breaker, because their RHSs don't mention each other. 336And indeed both can be inlined safely. 337 338Note again that the edges of the graph we use for computing loop breakers 339are not the same as the edges we use for computing the Rec blocks. 340That's why we compute 341 342- rec_edges for the Rec block analysis 343- loop_breaker_nodes for the loop breaker analysis 344 345 * Note [Finding rule RHS free vars] 346 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 347 Consider this real example from Data Parallel Haskell 348 tagZero :: Array Int -> Array Tag 349 {-# INLINE [1] tagZeroes #-} 350 tagZero xs = pmap (\x -> fromBool (x==0)) xs 351 352 {-# RULES "tagZero" [~1] forall xs n. 353 pmap fromBool <blah blah> = tagZero xs #-} 354 So tagZero's RHS mentions pmap, and pmap's RULE mentions tagZero. 355 However, tagZero can only be inlined in phase 1 and later, while 356 the RULE is only active *before* phase 1. So there's no problem. 357 358 To make this work, we look for the RHS free vars only for 359 *active* rules. That's the reason for the occ_rule_act field 360 of the OccEnv. 361 362 * Note [Weak loop breakers] 363 ~~~~~~~~~~~~~~~~~~~~~~~~~ 364 There is a last nasty wrinkle. Suppose we have 365 366 Rec { f = f_rhs 367 RULE f [] = g 368 369 h = h_rhs 370 g = h 371 ...more... 372 } 373 374 Remember that we simplify the RULES before any RHS (see Note 375 [Rules are visible in their own rec group] above). 376 377 So we must *not* postInlineUnconditionally 'g', even though 378 its RHS turns out to be trivial. (I'm assuming that 'g' is 379 not choosen as a loop breaker.) Why not? Because then we 380 drop the binding for 'g', which leaves it out of scope in the 381 RULE! 382 383 Here's a somewhat different example of the same thing 384 Rec { g = h 385 ; h = ...f... 386 ; f = f_rhs 387 RULE f [] = g } 388 Here the RULE is "below" g, but we *still* can't postInlineUnconditionally 389 g, because the RULE for f is active throughout. So the RHS of h 390 might rewrite to h = ...g... 391 So g must remain in scope in the output program! 392 393 We "solve" this by: 394 395 Make g a "weak" loop breaker (OccInfo = IAmLoopBreaker True) 396 iff g is a "missing free variable" of the Rec group 397 398 A "missing free variable" x is one that is mentioned in an RHS or 399 INLINE or RULE of a binding in the Rec group, but where the 400 dependency on x may not show up in the loop_breaker_nodes (see 401 note [Choosing loop breakers} above). 402 403 A normal "strong" loop breaker has IAmLoopBreaker False. So 404 405 Inline postInlineUnconditionally 406 strong IAmLoopBreaker False no no 407 weak IAmLoopBreaker True yes no 408 other yes yes 409 410 The **sole** reason for this kind of loop breaker is so that 411 postInlineUnconditionally does not fire. Ugh. (Typically it'll 412 inline via the usual callSiteInline stuff, so it'll be dead in the 413 next pass, so the main Ugh is the tiresome complication.) 414 415Note [Rules for imported functions] 416~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 417Consider this 418 f = /\a. B.g a 419 RULE B.g Int = 1 + f Int 420Note that 421 * The RULE is for an imported function. 422 * f is non-recursive 423Now we 424can get 425 f Int --> B.g Int Inlining f 426 --> 1 + f Int Firing RULE 427and so the simplifier goes into an infinite loop. This 428would not happen if the RULE was for a local function, 429because we keep track of dependencies through rules. But 430that is pretty much impossible to do for imported Ids. Suppose 431f's definition had been 432 f = /\a. C.h a 433where (by some long and devious process), C.h eventually inlines to 434B.g. We could only spot such loops by exhaustively following 435unfoldings of C.h etc, in case we reach B.g, and hence (via the RULE) 436f. 437 438Note that RULES for imported functions are important in practice; they 439occur a lot in the libraries. 440 441We regard this potential infinite loop as a *programmer* error. 442It's up the programmer not to write silly rules like 443 RULE f x = f x 444and the example above is just a more complicated version. 445 446Note [Preventing loops due to imported functions rules] 447~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 448Consider: 449 import GHC.Base (foldr) 450 451 {-# RULES "filterList" forall p. foldr (filterFB (:) p) [] = filter p #-} 452 filter p xs = build (\c n -> foldr (filterFB c p) n xs) 453 filterFB c p = ... 454 455 f = filter p xs 456 457Note that filter is not a loop-breaker, so what happens is: 458 f = filter p xs 459 = {inline} build (\c n -> foldr (filterFB c p) n xs) 460 = {inline} foldr (filterFB (:) p) [] xs 461 = {RULE} filter p xs 462 463We are in an infinite loop. 464 465A more elaborate example (that I actually saw in practice when I went to 466mark GHC.List.filter as INLINABLE) is as follows. Say I have this module: 467 {-# LANGUAGE RankNTypes #-} 468 module GHCList where 469 470 import Prelude hiding (filter) 471 import GHC.Base (build) 472 473 {-# INLINABLE filter #-} 474 filter :: (a -> Bool) -> [a] -> [a] 475 filter p [] = [] 476 filter p (x:xs) = if p x then x : filter p xs else filter p xs 477 478 {-# NOINLINE [0] filterFB #-} 479 filterFB :: (a -> b -> b) -> (a -> Bool) -> a -> b -> b 480 filterFB c p x r | p x = x `c` r 481 | otherwise = r 482 483 {-# RULES 484 "filter" [~1] forall p xs. filter p xs = build (\c n -> foldr 485 (filterFB c p) n xs) 486 "filterList" [1] forall p. foldr (filterFB (:) p) [] = filter p 487 #-} 488 489Then (because RULES are applied inside INLINABLE unfoldings, but inlinings 490are not), the unfolding given to "filter" in the interface file will be: 491 filter p [] = [] 492 filter p (x:xs) = if p x then x : build (\c n -> foldr (filterFB c p) n xs) 493 else build (\c n -> foldr (filterFB c p) n xs 494 495Note that because this unfolding does not mention "filter", filter is not 496marked as a strong loop breaker. Therefore at a use site in another module: 497 filter p xs 498 = {inline} 499 case xs of [] -> [] 500 (x:xs) -> if p x then x : build (\c n -> foldr (filterFB c p) n xs) 501 else build (\c n -> foldr (filterFB c p) n xs) 502 503 build (\c n -> foldr (filterFB c p) n xs) 504 = {inline} foldr (filterFB (:) p) [] xs 505 = {RULE} filter p xs 506 507And we are in an infinite loop again, except that this time the loop is producing an 508infinitely large *term* (an unrolling of filter) and so the simplifier finally 509dies with "ticks exhausted" 510 511Because of this problem, we make a small change in the occurrence analyser 512designed to mark functions like "filter" as strong loop breakers on the basis that: 513 1. The RHS of filter mentions the local function "filterFB" 514 2. We have a rule which mentions "filterFB" on the LHS and "filter" on the RHS 515 516So for each RULE for an *imported* function we are going to add 517dependency edges between the *local* FVS of the rule LHS and the 518*local* FVS of the rule RHS. We don't do anything special for RULES on 519local functions because the standard occurrence analysis stuff is 520pretty good at getting loop-breakerness correct there. 521 522It is important to note that even with this extra hack we aren't always going to get 523things right. For example, it might be that the rule LHS mentions an imported Id, 524and another module has a RULE that can rewrite that imported Id to one of our local 525Ids. 526 527Note [Specialising imported functions] (referred to from Specialise) 528~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 529BUT for *automatically-generated* rules, the programmer can't be 530responsible for the "programmer error" in Note [Rules for imported 531functions]. In paricular, consider specialising a recursive function 532defined in another module. If we specialise a recursive function B.g, 533we get 534 g_spec = .....(B.g Int)..... 535 RULE B.g Int = g_spec 536Here, g_spec doesn't look recursive, but when the rule fires, it 537becomes so. And if B.g was mutually recursive, the loop might 538not be as obvious as it is here. 539 540To avoid this, 541 * When specialising a function that is a loop breaker, 542 give a NOINLINE pragma to the specialised function 543 544Note [Glomming] 545~~~~~~~~~~~~~~~ 546RULES for imported Ids can make something at the top refer to something at the bottom: 547 f = \x -> B.g (q x) 548 h = \y -> 3 549 550 RULE: B.g (q x) = h x 551 552Applying this rule makes f refer to h, although f doesn't appear to 553depend on h. (And, as in Note [Rules for imported functions], the 554dependency might be more indirect. For example, f might mention C.t 555rather than B.g, where C.t eventually inlines to B.g.) 556 557NOTICE that this cannot happen for rules whose head is a 558locally-defined function, because we accurately track dependencies 559through RULES. It only happens for rules whose head is an imported 560function (B.g in the example above). 561 562Solution: 563 - When simplifying, bring all top level identifiers into 564 scope at the start, ignoring the Rec/NonRec structure, so 565 that when 'h' pops up in f's rhs, we find it in the in-scope set 566 (as the simplifier generally expects). This happens in simplTopBinds. 567 568 - In the occurrence analyser, if there are any out-of-scope 569 occurrences that pop out of the top, which will happen after 570 firing the rule: f = \x -> h x 571 h = \y -> 3 572 then just glom all the bindings into a single Rec, so that 573 the *next* iteration of the occurrence analyser will sort 574 them all out. This part happens in occurAnalysePgm. 575 576------------------------------------------------------------ 577Note [Inline rules] 578~~~~~~~~~~~~~~~~~~~ 579None of the above stuff about RULES applies to Inline Rules, 580stored in a CoreUnfolding. The unfolding, if any, is simplified 581at the same time as the regular RHS of the function (ie *not* like 582Note [Rules are visible in their own rec group]), so it should be 583treated *exactly* like an extra RHS. 584 585Or, rather, when computing loop-breaker edges, 586 * If f has an INLINE pragma, and it is active, we treat the 587 INLINE rhs as f's rhs 588 * If it's inactive, we treat f as having no rhs 589 * If it has no INLINE pragma, we look at f's actual rhs 590 591 592There is a danger that we'll be sub-optimal if we see this 593 f = ...f... 594 [INLINE f = ..no f...] 595where f is recursive, but the INLINE is not. This can just about 596happen with a sufficiently odd set of rules; eg 597 598 foo :: Int -> Int 599 {-# INLINE [1] foo #-} 600 foo x = x+1 601 602 bar :: Int -> Int 603 {-# INLINE [1] bar #-} 604 bar x = foo x + 1 605 606 {-# RULES "foo" [~1] forall x. foo x = bar x #-} 607 608Here the RULE makes bar recursive; but it's INLINE pragma remains 609non-recursive. It's tempting to then say that 'bar' should not be 610a loop breaker, but an attempt to do so goes wrong in two ways: 611 a) We may get 612 $df = ...$cfoo... 613 $cfoo = ...$df.... 614 [INLINE $cfoo = ...no-$df...] 615 But we want $cfoo to depend on $df explicitly so that we 616 put the bindings in the right order to inline $df in $cfoo 617 and perhaps break the loop altogether. (Maybe this 618 b) 619 620 621Example [eftInt] 622~~~~~~~~~~~~~~~ 623Example (from GHC.Enum): 624 625 eftInt :: Int# -> Int# -> [Int] 626 eftInt x y = ...(non-recursive)... 627 628 {-# INLINE [0] eftIntFB #-} 629 eftIntFB :: (Int -> r -> r) -> r -> Int# -> Int# -> r 630 eftIntFB c n x y = ...(non-recursive)... 631 632 {-# RULES 633 "eftInt" [~1] forall x y. eftInt x y = build (\ c n -> eftIntFB c n x y) 634 "eftIntList" [1] eftIntFB (:) [] = eftInt 635 #-} 636 637Note [Specialisation rules] 638~~~~~~~~~~~~~~~~~~~~~~~~~~~ 639Consider this group, which is typical of what SpecConstr builds: 640 641 fs a = ....f (C a).... 642 f x = ....f (C a).... 643 {-# RULE f (C a) = fs a #-} 644 645So 'f' and 'fs' are in the same Rec group (since f refers to fs via its RULE). 646 647But watch out! If 'fs' is not chosen as a loop breaker, we may get an infinite loop: 648 - the RULE is applied in f's RHS (see Note [Self-recursive rules] in Simplify 649 - fs is inlined (say it's small) 650 - now there's another opportunity to apply the RULE 651 652This showed up when compiling Control.Concurrent.Chan.getChanContents. 653 654------------------------------------------------------------ 655Note [Finding join points] 656~~~~~~~~~~~~~~~~~~~~~~~~~~ 657It's the occurrence analyser's job to find bindings that we can turn into join 658points, but it doesn't perform that transformation right away. Rather, it marks 659the eligible bindings as part of their occurrence data, leaving it to the 660simplifier (or to simpleOptPgm) to actually change the binder's 'IdDetails'. 661The simplifier then eta-expands the RHS if needed and then updates the 662occurrence sites. Dividing the work this way means that the occurrence analyser 663still only takes one pass, yet one can always tell the difference between a 664function call and a jump by looking at the occurrence (because the same pass 665changes the 'IdDetails' and propagates the binders to their occurrence sites). 666 667To track potential join points, we use the 'occ_tail' field of OccInfo. A value 668of `AlwaysTailCalled n` indicates that every occurrence of the variable is a 669tail call with `n` arguments (counting both value and type arguments). Otherwise 670'occ_tail' will be 'NoTailCallInfo'. The tail call info flows bottom-up with the 671rest of 'OccInfo' until it goes on the binder. 672 673Note [Rules and join points] 674~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 675 676Things get fiddly with rules. Suppose we have: 677 678 let j :: Int -> Int 679 j y = 2 * y 680 k :: Int -> Int -> Int 681 {-# RULES "SPEC k 0" k 0 = j #-} 682 k x y = x + 2 * y 683 in ... 684 685Now suppose that both j and k appear only as saturated tail calls in the body. 686Thus we would like to make them both join points. The rule complicates matters, 687though, as its RHS has an unapplied occurrence of j. *However*, if we were to 688eta-expand the rule, all would be well: 689 690 {-# RULES "SPEC k 0" forall a. k 0 a = j a #-} 691 692So conceivably we could notice that a potential join point would have an 693"undersaturated" rule and account for it. This would mean we could make 694something that's been specialised a join point, for instance. But local bindings 695are rarely specialised, and being overly cautious about rules only 696costs us anything when, for some `j`: 697 698 * Before specialisation, `j` has non-tail calls, so it can't be a join point. 699 * During specialisation, `j` gets specialised and thus acquires rules. 700 * Sometime afterward, the non-tail calls to `j` disappear (as dead code, say), 701 and so now `j` *could* become a join point. 702 703This appears to be very rare in practice. TODO Perhaps we should gather 704statistics to be sure. 705 706------------------------------------------------------------ 707Note [Adjusting right-hand sides] 708~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 709There's a bit of a dance we need to do after analysing a lambda expression or 710a right-hand side. In particular, we need to 711 712 a) call 'markAllInsideLam' *unless* the binding is for a thunk, a one-shot 713 lambda, or a non-recursive join point; and 714 b) call 'markAllNonTailCalled' *unless* the binding is for a join point. 715 716Some examples, with how the free occurrences in e (assumed not to be a value 717lambda) get marked: 718 719 inside lam non-tail-called 720 ------------------------------------------------------------ 721 let x = e No Yes 722 let f = \x -> e Yes Yes 723 let f = \x{OneShot} -> e No Yes 724 \x -> e Yes Yes 725 join j x = e No No 726 joinrec j x = e Yes No 727 728There are a few other caveats; most importantly, if we're marking a binding as 729'AlwaysTailCalled', it's *going* to be a join point, so we treat it as one so 730that the effect cascades properly. Consequently, at the time the RHS is 731analysed, we won't know what adjustments to make; thus 'occAnalLamOrRhs' must 732return the unadjusted 'UsageDetails', to be adjusted by 'adjustRhsUsage' once 733join-point-hood has been decided. 734 735Thus the overall sequence taking place in 'occAnalNonRecBind' and 736'occAnalRecBind' is as follows: 737 738 1. Call 'occAnalLamOrRhs' to find usage information for the RHS. 739 2. Call 'tagNonRecBinder' or 'tagRecBinders', which decides whether to make 740 the binding a join point. 741 3. Call 'adjustRhsUsage' accordingly. (Done as part of 'tagRecBinders' when 742 recursive.) 743 744(In the recursive case, this logic is spread between 'makeNode' and 745'occAnalRec'.) 746-} 747 748------------------------------------------------------------------ 749-- occAnalBind 750------------------------------------------------------------------ 751 752occAnalBind :: OccEnv -- The incoming OccEnv 753 -> TopLevelFlag 754 -> ImpRuleEdges 755 -> CoreBind 756 -> UsageDetails -- Usage details of scope 757 -> (UsageDetails, -- Of the whole let(rec) 758 [CoreBind]) 759 760occAnalBind env lvl top_env (NonRec binder rhs) body_usage 761 = occAnalNonRecBind env lvl top_env binder rhs body_usage 762occAnalBind env lvl top_env (Rec pairs) body_usage 763 = occAnalRecBind env lvl top_env pairs body_usage 764 765----------------- 766occAnalNonRecBind :: OccEnv -> TopLevelFlag -> ImpRuleEdges -> Var -> CoreExpr 767 -> UsageDetails -> (UsageDetails, [CoreBind]) 768occAnalNonRecBind env lvl imp_rule_edges binder rhs body_usage 769 | isTyVar binder -- A type let; we don't gather usage info 770 = (body_usage, [NonRec binder rhs]) 771 772 | not (binder `usedIn` body_usage) -- It's not mentioned 773 = (body_usage, []) 774 775 | otherwise -- It's mentioned in the body 776 = (body_usage' `andUDs` rhs_usage', [NonRec tagged_binder rhs']) 777 where 778 (body_usage', tagged_binder) = tagNonRecBinder lvl body_usage binder 779 mb_join_arity = willBeJoinId_maybe tagged_binder 780 781 (bndrs, body) = collectBinders rhs 782 783 (rhs_usage1, bndrs', body') = occAnalNonRecRhs env tagged_binder bndrs body 784 rhs' = mkLams (markJoinOneShots mb_join_arity bndrs') body' 785 -- For a /non-recursive/ join point we can mark all 786 -- its join-lambda as one-shot; and it's a good idea to do so 787 788 -- Unfoldings 789 -- See Note [Unfoldings and join points] 790 rhs_usage2 = case occAnalUnfolding env NonRecursive binder of 791 Just unf_usage -> rhs_usage1 `andUDs` unf_usage 792 Nothing -> rhs_usage1 793 794 -- Rules 795 -- See Note [Rules are extra RHSs] and Note [Rule dependency info] 796 rules_w_uds = occAnalRules env mb_join_arity NonRecursive tagged_binder 797 rule_uds = map (\(_, l, r) -> l `andUDs` r) rules_w_uds 798 rhs_usage3 = foldr andUDs rhs_usage2 rule_uds 799 rhs_usage4 = case lookupVarEnv imp_rule_edges binder of 800 Nothing -> rhs_usage3 801 Just vs -> addManyOccsSet rhs_usage3 vs 802 -- See Note [Preventing loops due to imported functions rules] 803 804 -- Final adjustment 805 rhs_usage' = adjustRhsUsage mb_join_arity NonRecursive bndrs' rhs_usage4 806 807----------------- 808occAnalRecBind :: OccEnv -> TopLevelFlag -> ImpRuleEdges -> [(Var,CoreExpr)] 809 -> UsageDetails -> (UsageDetails, [CoreBind]) 810occAnalRecBind env lvl imp_rule_edges pairs body_usage 811 = foldr (occAnalRec env lvl) (body_usage, []) sccs 812 -- For a recursive group, we 813 -- * occ-analyse all the RHSs 814 -- * compute strongly-connected components 815 -- * feed those components to occAnalRec 816 -- See Note [Recursive bindings: the grand plan] 817 where 818 sccs :: [SCC Details] 819 sccs = {-# SCC "occAnalBind.scc" #-} 820 stronglyConnCompFromEdgedVerticesUniq nodes 821 822 nodes :: [LetrecNode] 823 nodes = {-# SCC "occAnalBind.assoc" #-} 824 map (makeNode env imp_rule_edges bndr_set) pairs 825 826 bndr_set = mkVarSet (map fst pairs) 827 828{- 829Note [Unfoldings and join points] 830~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 831 832We assume that anything in an unfolding occurs multiple times, since unfoldings 833are often copied (that's the whole point!). But we still need to track tail 834calls for the purpose of finding join points. 835-} 836 837----------------------------- 838occAnalRec :: OccEnv -> TopLevelFlag 839 -> SCC Details 840 -> (UsageDetails, [CoreBind]) 841 -> (UsageDetails, [CoreBind]) 842 843 -- The NonRec case is just like a Let (NonRec ...) above 844occAnalRec _ lvl (AcyclicSCC (ND { nd_bndr = bndr, nd_rhs = rhs 845 , nd_uds = rhs_uds, nd_rhs_bndrs = rhs_bndrs })) 846 (body_uds, binds) 847 | not (bndr `usedIn` body_uds) 848 = (body_uds, binds) -- See Note [Dead code] 849 850 | otherwise -- It's mentioned in the body 851 = (body_uds' `andUDs` rhs_uds', 852 NonRec tagged_bndr rhs : binds) 853 where 854 (body_uds', tagged_bndr) = tagNonRecBinder lvl body_uds bndr 855 rhs_uds' = adjustRhsUsage (willBeJoinId_maybe tagged_bndr) NonRecursive 856 rhs_bndrs rhs_uds 857 858 -- The Rec case is the interesting one 859 -- See Note [Recursive bindings: the grand plan] 860 -- See Note [Loop breaking] 861occAnalRec env lvl (CyclicSCC details_s) (body_uds, binds) 862 | not (any (`usedIn` body_uds) bndrs) -- NB: look at body_uds, not total_uds 863 = (body_uds, binds) -- See Note [Dead code] 864 865 | otherwise -- At this point we always build a single Rec 866 = -- pprTrace "occAnalRec" (vcat 867 -- [ text "weak_fvs" <+> ppr weak_fvs 868 -- , text "lb nodes" <+> ppr loop_breaker_nodes]) 869 (final_uds, Rec pairs : binds) 870 871 where 872 bndrs = map nd_bndr details_s 873 bndr_set = mkVarSet bndrs 874 875 ------------------------------ 876 -- See Note [Choosing loop breakers] for loop_breaker_nodes 877 final_uds :: UsageDetails 878 loop_breaker_nodes :: [LetrecNode] 879 (final_uds, loop_breaker_nodes) 880 = mkLoopBreakerNodes env lvl bndr_set body_uds details_s 881 882 ------------------------------ 883 weak_fvs :: VarSet 884 weak_fvs = mapUnionVarSet nd_weak details_s 885 886 --------------------------- 887 -- Now reconstruct the cycle 888 pairs :: [(Id,CoreExpr)] 889 pairs | isEmptyVarSet weak_fvs = reOrderNodes 0 bndr_set weak_fvs loop_breaker_nodes [] 890 | otherwise = loopBreakNodes 0 bndr_set weak_fvs loop_breaker_nodes [] 891 -- If weak_fvs is empty, the loop_breaker_nodes will include 892 -- all the edges in the original scope edges [remember, 893 -- weak_fvs is the difference between scope edges and 894 -- lb-edges], so a fresh SCC computation would yield a 895 -- single CyclicSCC result; and reOrderNodes deals with 896 -- exactly that case 897 898 899------------------------------------------------------------------ 900-- Loop breaking 901------------------------------------------------------------------ 902 903type Binding = (Id,CoreExpr) 904 905loopBreakNodes :: Int 906 -> VarSet -- All binders 907 -> VarSet -- Binders whose dependencies may be "missing" 908 -- See Note [Weak loop breakers] 909 -> [LetrecNode] 910 -> [Binding] -- Append these to the end 911 -> [Binding] 912{- 913loopBreakNodes is applied to the list of nodes for a cyclic strongly 914connected component (there's guaranteed to be a cycle). It returns 915the same nodes, but 916 a) in a better order, 917 b) with some of the Ids having a IAmALoopBreaker pragma 918 919The "loop-breaker" Ids are sufficient to break all cycles in the SCC. This means 920that the simplifier can guarantee not to loop provided it never records an inlining 921for these no-inline guys. 922 923Furthermore, the order of the binds is such that if we neglect dependencies 924on the no-inline Ids then the binds are topologically sorted. This means 925that the simplifier will generally do a good job if it works from top bottom, 926recording inlinings for any Ids which aren't marked as "no-inline" as it goes. 927-} 928 929-- Return the bindings sorted into a plausible order, and marked with loop breakers. 930loopBreakNodes depth bndr_set weak_fvs nodes binds 931 = -- pprTrace "loopBreakNodes" (ppr nodes) $ 932 go (stronglyConnCompFromEdgedVerticesUniqR nodes) binds 933 where 934 go [] binds = binds 935 go (scc:sccs) binds = loop_break_scc scc (go sccs binds) 936 937 loop_break_scc scc binds 938 = case scc of 939 AcyclicSCC node -> mk_non_loop_breaker weak_fvs node : binds 940 CyclicSCC nodes -> reOrderNodes depth bndr_set weak_fvs nodes binds 941 942---------------------------------- 943reOrderNodes :: Int -> VarSet -> VarSet -> [LetrecNode] -> [Binding] -> [Binding] 944 -- Choose a loop breaker, mark it no-inline, 945 -- and call loopBreakNodes on the rest 946reOrderNodes _ _ _ [] _ = panic "reOrderNodes" 947reOrderNodes _ _ _ [node] binds = mk_loop_breaker node : binds 948reOrderNodes depth bndr_set weak_fvs (node : nodes) binds 949 = -- pprTrace "reOrderNodes" (vcat [ text "unchosen" <+> ppr unchosen 950 -- , text "chosen" <+> ppr chosen_nodes ]) $ 951 loopBreakNodes new_depth bndr_set weak_fvs unchosen $ 952 (map mk_loop_breaker chosen_nodes ++ binds) 953 where 954 (chosen_nodes, unchosen) = chooseLoopBreaker approximate_lb 955 (nd_score (node_payload node)) 956 [node] [] nodes 957 958 approximate_lb = depth >= 2 959 new_depth | approximate_lb = 0 960 | otherwise = depth+1 961 -- After two iterations (d=0, d=1) give up 962 -- and approximate, returning to d=0 963 964mk_loop_breaker :: LetrecNode -> Binding 965mk_loop_breaker (node_payload -> ND { nd_bndr = bndr, nd_rhs = rhs}) 966 = (bndr `setIdOccInfo` strongLoopBreaker { occ_tail = tail_info }, rhs) 967 where 968 tail_info = tailCallInfo (idOccInfo bndr) 969 970mk_non_loop_breaker :: VarSet -> LetrecNode -> Binding 971-- See Note [Weak loop breakers] 972mk_non_loop_breaker weak_fvs (node_payload -> ND { nd_bndr = bndr 973 , nd_rhs = rhs}) 974 | bndr `elemVarSet` weak_fvs = (setIdOccInfo bndr occ', rhs) 975 | otherwise = (bndr, rhs) 976 where 977 occ' = weakLoopBreaker { occ_tail = tail_info } 978 tail_info = tailCallInfo (idOccInfo bndr) 979 980---------------------------------- 981chooseLoopBreaker :: Bool -- True <=> Too many iterations, 982 -- so approximate 983 -> NodeScore -- Best score so far 984 -> [LetrecNode] -- Nodes with this score 985 -> [LetrecNode] -- Nodes with higher scores 986 -> [LetrecNode] -- Unprocessed nodes 987 -> ([LetrecNode], [LetrecNode]) 988 -- This loop looks for the bind with the lowest score 989 -- to pick as the loop breaker. The rest accumulate in 990chooseLoopBreaker _ _ loop_nodes acc [] 991 = (loop_nodes, acc) -- Done 992 993 -- If approximate_loop_breaker is True, we pick *all* 994 -- nodes with lowest score, else just one 995 -- See Note [Complexity of loop breaking] 996chooseLoopBreaker approx_lb loop_sc loop_nodes acc (node : nodes) 997 | approx_lb 998 , rank sc == rank loop_sc 999 = chooseLoopBreaker approx_lb loop_sc (node : loop_nodes) acc nodes 1000 1001 | sc `betterLB` loop_sc -- Better score so pick this new one 1002 = chooseLoopBreaker approx_lb sc [node] (loop_nodes ++ acc) nodes 1003 1004 | otherwise -- Worse score so don't pick it 1005 = chooseLoopBreaker approx_lb loop_sc loop_nodes (node : acc) nodes 1006 where 1007 sc = nd_score (node_payload node) 1008 1009{- 1010Note [Complexity of loop breaking] 1011~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1012The loop-breaking algorithm knocks out one binder at a time, and 1013performs a new SCC analysis on the remaining binders. That can 1014behave very badly in tightly-coupled groups of bindings; in the 1015worst case it can be (N**2)*log N, because it does a full SCC 1016on N, then N-1, then N-2 and so on. 1017 1018To avoid this, we switch plans after 2 (or whatever) attempts: 1019 Plan A: pick one binder with the lowest score, make it 1020 a loop breaker, and try again 1021 Plan B: pick *all* binders with the lowest score, make them 1022 all loop breakers, and try again 1023Since there are only a small finite number of scores, this will 1024terminate in a constant number of iterations, rather than O(N) 1025iterations. 1026 1027You might thing that it's very unlikely, but RULES make it much 1028more likely. Here's a real example from #1969: 1029 Rec { $dm = \d.\x. op d 1030 {-# RULES forall d. $dm Int d = $s$dm1 1031 forall d. $dm Bool d = $s$dm2 #-} 1032 1033 dInt = MkD .... opInt ... 1034 dInt = MkD .... opBool ... 1035 opInt = $dm dInt 1036 opBool = $dm dBool 1037 1038 $s$dm1 = \x. op dInt 1039 $s$dm2 = \x. op dBool } 1040The RULES stuff means that we can't choose $dm as a loop breaker 1041(Note [Choosing loop breakers]), so we must choose at least (say) 1042opInt *and* opBool, and so on. The number of loop breakders is 1043linear in the number of instance declarations. 1044 1045Note [Loop breakers and INLINE/INLINABLE pragmas] 1046~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1047Avoid choosing a function with an INLINE pramga as the loop breaker! 1048If such a function is mutually-recursive with a non-INLINE thing, 1049then the latter should be the loop-breaker. 1050 1051It's vital to distinguish between INLINE and INLINABLE (the 1052Bool returned by hasStableCoreUnfolding_maybe). If we start with 1053 Rec { {-# INLINABLE f #-} 1054 f x = ...f... } 1055and then worker/wrapper it through strictness analysis, we'll get 1056 Rec { {-# INLINABLE $wf #-} 1057 $wf p q = let x = (p,q) in ...f... 1058 1059 {-# INLINE f #-} 1060 f x = case x of (p,q) -> $wf p q } 1061 1062Now it is vital that we choose $wf as the loop breaker, so we can 1063inline 'f' in '$wf'. 1064 1065Note [DFuns should not be loop breakers] 1066~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1067It's particularly bad to make a DFun into a loop breaker. See 1068Note [How instance declarations are translated] in TcInstDcls 1069 1070We give DFuns a higher score than ordinary CONLIKE things because 1071if there's a choice we want the DFun to be the non-loop breaker. Eg 1072 1073rec { sc = /\ a \$dC. $fBWrap (T a) ($fCT @ a $dC) 1074 1075 $fCT :: forall a_afE. (Roman.C a_afE) => Roman.C (Roman.T a_afE) 1076 {-# DFUN #-} 1077 $fCT = /\a \$dC. MkD (T a) ((sc @ a $dC) |> blah) ($ctoF @ a $dC) 1078 } 1079 1080Here 'sc' (the superclass) looks CONLIKE, but we'll never get to it 1081if we can't unravel the DFun first. 1082 1083Note [Constructor applications] 1084~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1085It's really really important to inline dictionaries. Real 1086example (the Enum Ordering instance from GHC.Base): 1087 1088 rec f = \ x -> case d of (p,q,r) -> p x 1089 g = \ x -> case d of (p,q,r) -> q x 1090 d = (v, f, g) 1091 1092Here, f and g occur just once; but we can't inline them into d. 1093On the other hand we *could* simplify those case expressions if 1094we didn't stupidly choose d as the loop breaker. 1095But we won't because constructor args are marked "Many". 1096Inlining dictionaries is really essential to unravelling 1097the loops in static numeric dictionaries, see GHC.Float. 1098 1099Note [Closure conversion] 1100~~~~~~~~~~~~~~~~~~~~~~~~~ 1101We treat (\x. C p q) as a high-score candidate in the letrec scoring algorithm. 1102The immediate motivation came from the result of a closure-conversion transformation 1103which generated code like this: 1104 1105 data Clo a b = forall c. Clo (c -> a -> b) c 1106 1107 ($:) :: Clo a b -> a -> b 1108 Clo f env $: x = f env x 1109 1110 rec { plus = Clo plus1 () 1111 1112 ; plus1 _ n = Clo plus2 n 1113 1114 ; plus2 Zero n = n 1115 ; plus2 (Succ m) n = Succ (plus $: m $: n) } 1116 1117If we inline 'plus' and 'plus1', everything unravels nicely. But if 1118we choose 'plus1' as the loop breaker (which is entirely possible 1119otherwise), the loop does not unravel nicely. 1120 1121 1122@occAnalUnfolding@ deals with the question of bindings where the Id is marked 1123by an INLINE pragma. For these we record that anything which occurs 1124in its RHS occurs many times. This pessimistically assumes that this 1125inlined binder also occurs many times in its scope, but if it doesn't 1126we'll catch it next time round. At worst this costs an extra simplifier pass. 1127ToDo: try using the occurrence info for the inline'd binder. 1128 1129[March 97] We do the same for atomic RHSs. Reason: see notes with loopBreakSCC. 1130[June 98, SLPJ] I've undone this change; I don't understand it. See notes with loopBreakSCC. 1131 1132 1133************************************************************************ 1134* * 1135 Making nodes 1136* * 1137************************************************************************ 1138-} 1139 1140type ImpRuleEdges = IdEnv IdSet -- Mapping from FVs of imported RULE LHSs to RHS FVs 1141 1142noImpRuleEdges :: ImpRuleEdges 1143noImpRuleEdges = emptyVarEnv 1144 1145type LetrecNode = Node Unique Details -- Node comes from Digraph 1146 -- The Unique key is gotten from the Id 1147data Details 1148 = ND { nd_bndr :: Id -- Binder 1149 , nd_rhs :: CoreExpr -- RHS, already occ-analysed 1150 , nd_rhs_bndrs :: [CoreBndr] -- Outer lambdas of RHS 1151 -- INVARIANT: (nd_rhs_bndrs nd, _) == 1152 -- collectBinders (nd_rhs nd) 1153 1154 , nd_uds :: UsageDetails -- Usage from RHS, and RULES, and stable unfoldings 1155 -- ignoring phase (ie assuming all are active) 1156 -- See Note [Forming Rec groups] 1157 1158 , nd_inl :: IdSet -- Free variables of 1159 -- the stable unfolding (if present and active) 1160 -- or the RHS (if not) 1161 -- but excluding any RULES 1162 -- This is the IdSet that may be used if the Id is inlined 1163 1164 , nd_weak :: IdSet -- Binders of this Rec that are mentioned in nd_uds 1165 -- but are *not* in nd_inl. These are the ones whose 1166 -- dependencies might not be respected by loop_breaker_nodes 1167 -- See Note [Weak loop breakers] 1168 1169 , nd_active_rule_fvs :: IdSet -- Free variables of the RHS of active RULES 1170 1171 , nd_score :: NodeScore 1172 } 1173 1174instance Outputable Details where 1175 ppr nd = text "ND" <> braces 1176 (sep [ text "bndr =" <+> ppr (nd_bndr nd) 1177 , text "uds =" <+> ppr (nd_uds nd) 1178 , text "inl =" <+> ppr (nd_inl nd) 1179 , text "weak =" <+> ppr (nd_weak nd) 1180 , text "rule =" <+> ppr (nd_active_rule_fvs nd) 1181 , text "score =" <+> ppr (nd_score nd) 1182 ]) 1183 1184-- The NodeScore is compared lexicographically; 1185-- e.g. lower rank wins regardless of size 1186type NodeScore = ( Int -- Rank: lower => more likely to be picked as loop breaker 1187 , Int -- Size of rhs: higher => more likely to be picked as LB 1188 -- Maxes out at maxExprSize; we just use it to prioritise 1189 -- small functions 1190 , Bool ) -- Was it a loop breaker before? 1191 -- True => more likely to be picked 1192 -- Note [Loop breakers, node scoring, and stability] 1193 1194rank :: NodeScore -> Int 1195rank (r, _, _) = r 1196 1197makeNode :: OccEnv -> ImpRuleEdges -> VarSet 1198 -> (Var, CoreExpr) -> LetrecNode 1199-- See Note [Recursive bindings: the grand plan] 1200makeNode env imp_rule_edges bndr_set (bndr, rhs) 1201 = DigraphNode details (varUnique bndr) (nonDetKeysUniqSet node_fvs) 1202 -- It's OK to use nonDetKeysUniqSet here as stronglyConnCompFromEdgedVerticesR 1203 -- is still deterministic with edges in nondeterministic order as 1204 -- explained in Note [Deterministic SCC] in Digraph. 1205 where 1206 details = ND { nd_bndr = bndr 1207 , nd_rhs = rhs' 1208 , nd_rhs_bndrs = bndrs' 1209 , nd_uds = rhs_usage3 1210 , nd_inl = inl_fvs 1211 , nd_weak = node_fvs `minusVarSet` inl_fvs 1212 , nd_active_rule_fvs = active_rule_fvs 1213 , nd_score = pprPanic "makeNodeDetails" (ppr bndr) } 1214 1215 -- Constructing the edges for the main Rec computation 1216 -- See Note [Forming Rec groups] 1217 (bndrs, body) = collectBinders rhs 1218 (rhs_usage1, bndrs', body') = occAnalRecRhs env bndrs body 1219 rhs' = mkLams bndrs' body' 1220 rhs_usage2 = foldr andUDs rhs_usage1 rule_uds 1221 -- Note [Rules are extra RHSs] 1222 -- Note [Rule dependency info] 1223 rhs_usage3 = case mb_unf_uds of 1224 Just unf_uds -> rhs_usage2 `andUDs` unf_uds 1225 Nothing -> rhs_usage2 1226 node_fvs = udFreeVars bndr_set rhs_usage3 1227 1228 -- Finding the free variables of the rules 1229 is_active = occ_rule_act env :: Activation -> Bool 1230 1231 rules_w_uds :: [(CoreRule, UsageDetails, UsageDetails)] 1232 rules_w_uds = occAnalRules env (Just (length bndrs)) Recursive bndr 1233 1234 rules_w_rhs_fvs :: [(Activation, VarSet)] -- Find the RHS fvs 1235 rules_w_rhs_fvs = maybe id (\ids -> ((AlwaysActive, ids):)) 1236 (lookupVarEnv imp_rule_edges bndr) 1237 -- See Note [Preventing loops due to imported functions rules] 1238 [ (ru_act rule, udFreeVars bndr_set rhs_uds) 1239 | (rule, _, rhs_uds) <- rules_w_uds ] 1240 rule_uds = map (\(_, l, r) -> l `andUDs` r) rules_w_uds 1241 active_rule_fvs = unionVarSets [fvs | (a,fvs) <- rules_w_rhs_fvs 1242 , is_active a] 1243 1244 -- Finding the usage details of the INLINE pragma (if any) 1245 mb_unf_uds = occAnalUnfolding env Recursive bndr 1246 1247 -- Find the "nd_inl" free vars; for the loop-breaker phase 1248 inl_fvs = case mb_unf_uds of 1249 Nothing -> udFreeVars bndr_set rhs_usage1 -- No INLINE, use RHS 1250 Just unf_uds -> udFreeVars bndr_set unf_uds 1251 -- We could check for an *active* INLINE (returning 1252 -- emptyVarSet for an inactive one), but is_active 1253 -- isn't the right thing (it tells about 1254 -- RULE activation), so we'd need more plumbing 1255 1256mkLoopBreakerNodes :: OccEnv -> TopLevelFlag 1257 -> VarSet 1258 -> UsageDetails -- for BODY of let 1259 -> [Details] 1260 -> (UsageDetails, -- adjusted 1261 [LetrecNode]) 1262-- Does four things 1263-- a) tag each binder with its occurrence info 1264-- b) add a NodeScore to each node 1265-- c) make a Node with the right dependency edges for 1266-- the loop-breaker SCC analysis 1267-- d) adjust each RHS's usage details according to 1268-- the binder's (new) shotness and join-point-hood 1269mkLoopBreakerNodes env lvl bndr_set body_uds details_s 1270 = (final_uds, zipWith mk_lb_node details_s bndrs') 1271 where 1272 (final_uds, bndrs') = tagRecBinders lvl body_uds 1273 [ ((nd_bndr nd) 1274 ,(nd_uds nd) 1275 ,(nd_rhs_bndrs nd)) 1276 | nd <- details_s ] 1277 mk_lb_node nd@(ND { nd_bndr = bndr, nd_rhs = rhs, nd_inl = inl_fvs }) bndr' 1278 = DigraphNode nd' (varUnique bndr) (nonDetKeysUniqSet lb_deps) 1279 -- It's OK to use nonDetKeysUniqSet here as 1280 -- stronglyConnCompFromEdgedVerticesR is still deterministic with edges 1281 -- in nondeterministic order as explained in 1282 -- Note [Deterministic SCC] in Digraph. 1283 where 1284 nd' = nd { nd_bndr = bndr', nd_score = score } 1285 score = nodeScore env bndr bndr' rhs lb_deps 1286 lb_deps = extendFvs_ rule_fv_env inl_fvs 1287 1288 rule_fv_env :: IdEnv IdSet 1289 -- Maps a variable f to the variables from this group 1290 -- mentioned in RHS of active rules for f 1291 -- Domain is *subset* of bound vars (others have no rule fvs) 1292 rule_fv_env = transClosureFV (mkVarEnv init_rule_fvs) 1293 init_rule_fvs -- See Note [Finding rule RHS free vars] 1294 = [ (b, trimmed_rule_fvs) 1295 | ND { nd_bndr = b, nd_active_rule_fvs = rule_fvs } <- details_s 1296 , let trimmed_rule_fvs = rule_fvs `intersectVarSet` bndr_set 1297 , not (isEmptyVarSet trimmed_rule_fvs) ] 1298 1299 1300------------------------------------------ 1301nodeScore :: OccEnv 1302 -> Id -- Binder has old occ-info (just for loop-breaker-ness) 1303 -> Id -- Binder with new occ-info 1304 -> CoreExpr -- RHS 1305 -> VarSet -- Loop-breaker dependencies 1306 -> NodeScore 1307nodeScore env old_bndr new_bndr bind_rhs lb_deps 1308 | not (isId old_bndr) -- A type or cercion variable is never a loop breaker 1309 = (100, 0, False) 1310 1311 | old_bndr `elemVarSet` lb_deps -- Self-recursive things are great loop breakers 1312 = (0, 0, True) -- See Note [Self-recursion and loop breakers] 1313 1314 | not (occ_unf_act env old_bndr) -- A binder whose inlining is inactive (e.g. has 1315 = (0, 0, True) -- a NOINLINE pragma) makes a great loop breaker 1316 1317 | exprIsTrivial rhs 1318 = mk_score 10 -- Practically certain to be inlined 1319 -- Used to have also: && not (isExportedId bndr) 1320 -- But I found this sometimes cost an extra iteration when we have 1321 -- rec { d = (a,b); a = ...df...; b = ...df...; df = d } 1322 -- where df is the exported dictionary. Then df makes a really 1323 -- bad choice for loop breaker 1324 1325 | DFunUnfolding { df_args = args } <- id_unfolding 1326 -- Never choose a DFun as a loop breaker 1327 -- Note [DFuns should not be loop breakers] 1328 = (9, length args, is_lb) 1329 1330 -- Data structures are more important than INLINE pragmas 1331 -- so that dictionary/method recursion unravels 1332 1333 | CoreUnfolding { uf_guidance = UnfWhen {} } <- id_unfolding 1334 = mk_score 6 1335 1336 | is_con_app rhs -- Data types help with cases: 1337 = mk_score 5 -- Note [Constructor applications] 1338 1339 | isStableUnfolding id_unfolding 1340 , can_unfold 1341 = mk_score 3 1342 1343 | isOneOcc (idOccInfo new_bndr) 1344 = mk_score 2 -- Likely to be inlined 1345 1346 | can_unfold -- The Id has some kind of unfolding 1347 = mk_score 1 1348 1349 | otherwise 1350 = (0, 0, is_lb) 1351 1352 where 1353 mk_score :: Int -> NodeScore 1354 mk_score rank = (rank, rhs_size, is_lb) 1355 1356 is_lb = isStrongLoopBreaker (idOccInfo old_bndr) 1357 rhs = case id_unfolding of 1358 CoreUnfolding { uf_src = src, uf_tmpl = unf_rhs } 1359 | isStableSource src 1360 -> unf_rhs 1361 _ -> bind_rhs 1362 -- 'bind_rhs' is irrelevant for inlining things with a stable unfolding 1363 rhs_size = case id_unfolding of 1364 CoreUnfolding { uf_guidance = guidance } 1365 | UnfIfGoodArgs { ug_size = size } <- guidance 1366 -> size 1367 _ -> cheapExprSize rhs 1368 1369 can_unfold = canUnfold id_unfolding 1370 id_unfolding = realIdUnfolding old_bndr 1371 -- realIdUnfolding: Ignore loop-breaker-ness here because 1372 -- that is what we are setting! 1373 1374 -- Checking for a constructor application 1375 -- Cheap and cheerful; the simplifier moves casts out of the way 1376 -- The lambda case is important to spot x = /\a. C (f a) 1377 -- which comes up when C is a dictionary constructor and 1378 -- f is a default method. 1379 -- Example: the instance for Show (ST s a) in GHC.ST 1380 -- 1381 -- However we *also* treat (\x. C p q) as a con-app-like thing, 1382 -- Note [Closure conversion] 1383 is_con_app (Var v) = isConLikeId v 1384 is_con_app (App f _) = is_con_app f 1385 is_con_app (Lam _ e) = is_con_app e 1386 is_con_app (Tick _ e) = is_con_app e 1387 is_con_app _ = False 1388 1389maxExprSize :: Int 1390maxExprSize = 20 -- Rather arbitrary 1391 1392cheapExprSize :: CoreExpr -> Int 1393-- Maxes out at maxExprSize 1394cheapExprSize e 1395 = go 0 e 1396 where 1397 go n e | n >= maxExprSize = n 1398 | otherwise = go1 n e 1399 1400 go1 n (Var {}) = n+1 1401 go1 n (Lit {}) = n+1 1402 go1 n (Type {}) = n 1403 go1 n (Coercion {}) = n 1404 go1 n (Tick _ e) = go1 n e 1405 go1 n (Cast e _) = go1 n e 1406 go1 n (App f a) = go (go1 n f) a 1407 go1 n (Lam b e) 1408 | isTyVar b = go1 n e 1409 | otherwise = go (n+1) e 1410 go1 n (Let b e) = gos (go1 n e) (rhssOfBind b) 1411 go1 n (Case e _ _ as) = gos (go1 n e) (rhssOfAlts as) 1412 1413 gos n [] = n 1414 gos n (e:es) | n >= maxExprSize = n 1415 | otherwise = gos (go1 n e) es 1416 1417betterLB :: NodeScore -> NodeScore -> Bool 1418-- If n1 `betterLB` n2 then choose n1 as the loop breaker 1419betterLB (rank1, size1, lb1) (rank2, size2, _) 1420 | rank1 < rank2 = True 1421 | rank1 > rank2 = False 1422 | size1 < size2 = False -- Make the bigger n2 into the loop breaker 1423 | size1 > size2 = True 1424 | lb1 = True -- Tie-break: if n1 was a loop breaker before, choose it 1425 | otherwise = False -- See Note [Loop breakers, node scoring, and stability] 1426 1427{- Note [Self-recursion and loop breakers] 1428~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1429If we have 1430 rec { f = ...f...g... 1431 ; g = .....f... } 1432then 'f' has to be a loop breaker anyway, so we may as well choose it 1433right away, so that g can inline freely. 1434 1435This is really just a cheap hack. Consider 1436 rec { f = ...g... 1437 ; g = ..f..h... 1438 ; h = ...f....} 1439Here f or g are better loop breakers than h; but we might accidentally 1440choose h. Finding the minimal set of loop breakers is hard. 1441 1442Note [Loop breakers, node scoring, and stability] 1443~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1444To choose a loop breaker, we give a NodeScore to each node in the SCC, 1445and pick the one with the best score (according to 'betterLB'). 1446 1447We need to be jolly careful (#12425, #12234) about the stability 1448of this choice. Suppose we have 1449 1450 let rec { f = ...g...g... 1451 ; g = ...f...f... } 1452 in 1453 case x of 1454 True -> ...f.. 1455 False -> ..f... 1456 1457In each iteration of the simplifier the occurrence analyser OccAnal 1458chooses a loop breaker. Suppose in iteration 1 it choose g as the loop 1459breaker. That means it is free to inline f. 1460 1461Suppose that GHC decides to inline f in the branches of the case, but 1462(for some reason; eg it is not saturated) in the rhs of g. So we get 1463 1464 let rec { f = ...g...g... 1465 ; g = ...f...f... } 1466 in 1467 case x of 1468 True -> ...g...g..... 1469 False -> ..g..g.... 1470 1471Now suppose that, for some reason, in the next iteration the occurrence 1472analyser chooses f as the loop breaker, so it can freely inline g. And 1473again for some reason the simplifier inlines g at its calls in the case 1474branches, but not in the RHS of f. Then we get 1475 1476 let rec { f = ...g...g... 1477 ; g = ...f...f... } 1478 in 1479 case x of 1480 True -> ...(...f...f...)...(...f..f..)..... 1481 False -> ..(...f...f...)...(..f..f...).... 1482 1483You can see where this is going! Each iteration of the simplifier 1484doubles the number of calls to f or g. No wonder GHC is slow! 1485 1486(In the particular example in comment:3 of #12425, f and g are the two 1487mutually recursive fmap instances for CondT and Result. They are both 1488marked INLINE which, oddly, is why they don't inline in each other's 1489RHS, because the call there is not saturated.) 1490 1491The root cause is that we flip-flop on our choice of loop breaker. I 1492always thought it didn't matter, and indeed for any single iteration 1493to terminate, it doesn't matter. But when we iterate, it matters a 1494lot!! 1495 1496So The Plan is this: 1497 If there is a tie, choose the node that 1498 was a loop breaker last time round 1499 1500Hence the is_lb field of NodeScore 1501 1502************************************************************************ 1503* * 1504 Right hand sides 1505* * 1506************************************************************************ 1507-} 1508 1509occAnalRhs :: OccEnv -> RecFlag -> Id -> [CoreBndr] -> CoreExpr 1510 -> (UsageDetails, [CoreBndr], CoreExpr) 1511 -- Returned usage details covers only the RHS, 1512 -- and *not* the RULE or INLINE template for the Id 1513occAnalRhs env Recursive _ bndrs body 1514 = occAnalRecRhs env bndrs body 1515occAnalRhs env NonRecursive id bndrs body 1516 = occAnalNonRecRhs env id bndrs body 1517 1518occAnalRecRhs :: OccEnv -> [CoreBndr] -> CoreExpr -- Rhs lambdas, body 1519 -> (UsageDetails, [CoreBndr], CoreExpr) 1520 -- Returned usage details covers only the RHS, 1521 -- and *not* the RULE or INLINE template for the Id 1522occAnalRecRhs env bndrs body = occAnalLamOrRhs (rhsCtxt env) bndrs body 1523 1524occAnalNonRecRhs :: OccEnv 1525 -> Id -> [CoreBndr] -> CoreExpr -- Binder; rhs lams, body 1526 -- Binder is already tagged with occurrence info 1527 -> (UsageDetails, [CoreBndr], CoreExpr) 1528 -- Returned usage details covers only the RHS, 1529 -- and *not* the RULE or INLINE template for the Id 1530occAnalNonRecRhs env bndr bndrs body 1531 = occAnalLamOrRhs rhs_env bndrs body 1532 where 1533 env1 | is_join_point = env -- See Note [Join point RHSs] 1534 | certainly_inline = env -- See Note [Cascading inlines] 1535 | otherwise = rhsCtxt env 1536 1537 -- See Note [Sources of one-shot information] 1538 rhs_env = env1 { occ_one_shots = argOneShots dmd } 1539 1540 certainly_inline -- See Note [Cascading inlines] 1541 = case occ of 1542 OneOcc { occ_in_lam = in_lam, occ_n_br = n_br } 1543 -> not in_lam && n_br == 1 && active && not_stable 1544 _ -> False 1545 1546 is_join_point = isAlwaysTailCalled occ 1547 -- Like (isJoinId bndr) but happens one step earlier 1548 -- c.f. willBeJoinId_maybe 1549 1550 occ = idOccInfo bndr 1551 dmd = idDemandInfo bndr 1552 active = isAlwaysActive (idInlineActivation bndr) 1553 not_stable = not (isStableUnfolding (idUnfolding bndr)) 1554 1555occAnalUnfolding :: OccEnv 1556 -> RecFlag 1557 -> Id 1558 -> Maybe UsageDetails 1559 -- Just the analysis, not a new unfolding. The unfolding 1560 -- got analysed when it was created and we don't need to 1561 -- update it. 1562occAnalUnfolding env rec_flag id 1563 = case realIdUnfolding id of -- ignore previous loop-breaker flag 1564 CoreUnfolding { uf_tmpl = rhs, uf_src = src } 1565 | not (isStableSource src) 1566 -> Nothing 1567 | otherwise 1568 -> Just $ markAllMany usage 1569 where 1570 (bndrs, body) = collectBinders rhs 1571 (usage, _, _) = occAnalRhs env rec_flag id bndrs body 1572 1573 DFunUnfolding { df_bndrs = bndrs, df_args = args } 1574 -> Just $ zapDetails (delDetailsList usage bndrs) 1575 where 1576 usage = andUDsList (map (fst . occAnal env) args) 1577 1578 _ -> Nothing 1579 1580occAnalRules :: OccEnv 1581 -> Maybe JoinArity -- If the binder is (or MAY become) a join 1582 -- point, what its join arity is (or WOULD 1583 -- become). See Note [Rules and join points]. 1584 -> RecFlag 1585 -> Id 1586 -> [(CoreRule, -- Each (non-built-in) rule 1587 UsageDetails, -- Usage details for LHS 1588 UsageDetails)] -- Usage details for RHS 1589occAnalRules env mb_expected_join_arity rec_flag id 1590 = [ (rule, lhs_uds, rhs_uds) | rule@Rule {} <- idCoreRules id 1591 , let (lhs_uds, rhs_uds) = occ_anal_rule rule ] 1592 where 1593 occ_anal_rule (Rule { ru_bndrs = bndrs, ru_args = args, ru_rhs = rhs }) 1594 = (lhs_uds, final_rhs_uds) 1595 where 1596 lhs_uds = addManyOccsSet emptyDetails $ 1597 (exprsFreeVars args `delVarSetList` bndrs) 1598 (rhs_bndrs, rhs_body) = collectBinders rhs 1599 (rhs_uds, _, _) = occAnalRhs env rec_flag id rhs_bndrs rhs_body 1600 -- Note [Rules are extra RHSs] 1601 -- Note [Rule dependency info] 1602 final_rhs_uds = adjust_tail_info args $ markAllMany $ 1603 (rhs_uds `delDetailsList` bndrs) 1604 occ_anal_rule _ 1605 = (emptyDetails, emptyDetails) 1606 1607 adjust_tail_info args uds -- see Note [Rules and join points] 1608 = case mb_expected_join_arity of 1609 Just ar | args `lengthIs` ar -> uds 1610 _ -> markAllNonTailCalled uds 1611{- Note [Join point RHSs] 1612~~~~~~~~~~~~~~~~~~~~~~~~~ 1613Consider 1614 x = e 1615 join j = Just x 1616 1617We want to inline x into j right away, so we don't want to give 1618the join point a RhsCtxt (#14137). It's not a huge deal, because 1619the FloatIn pass knows to float into join point RHSs; and the simplifier 1620does not float things out of join point RHSs. But it's a simple, cheap 1621thing to do. See #14137. 1622 1623Note [Cascading inlines] 1624~~~~~~~~~~~~~~~~~~~~~~~~ 1625By default we use an rhsCtxt for the RHS of a binding. This tells the 1626occ anal n that it's looking at an RHS, which has an effect in 1627occAnalApp. In particular, for constructor applications, it makes 1628the arguments appear to have NoOccInfo, so that we don't inline into 1629them. Thus x = f y 1630 k = Just x 1631we do not want to inline x. 1632 1633But there's a problem. Consider 1634 x1 = a0 : [] 1635 x2 = a1 : x1 1636 x3 = a2 : x2 1637 g = f x3 1638First time round, it looks as if x1 and x2 occur as an arg of a 1639let-bound constructor ==> give them a many-occurrence. 1640But then x3 is inlined (unconditionally as it happens) and 1641next time round, x2 will be, and the next time round x1 will be 1642Result: multiple simplifier iterations. Sigh. 1643 1644So, when analysing the RHS of x3 we notice that x3 will itself 1645definitely inline the next time round, and so we analyse x3's rhs in 1646an ordinary context, not rhsCtxt. Hence the "certainly_inline" stuff. 1647 1648Annoyingly, we have to approximate SimplUtils.preInlineUnconditionally. 1649If (a) the RHS is expandable (see isExpandableApp in occAnalApp), and 1650 (b) certainly_inline says "yes" when preInlineUnconditionally says "no" 1651then the simplifier iterates indefinitely: 1652 x = f y 1653 k = Just x -- We decide that k is 'certainly_inline' 1654 v = ...k... -- but preInlineUnconditionally doesn't inline it 1655inline ==> 1656 k = Just (f y) 1657 v = ...k... 1658float ==> 1659 x1 = f y 1660 k = Just x1 1661 v = ...k... 1662 1663This is worse than the slow cascade, so we only want to say "certainly_inline" 1664if it really is certain. Look at the note with preInlineUnconditionally 1665for the various clauses. 1666 1667 1668************************************************************************ 1669* * 1670 Expressions 1671* * 1672************************************************************************ 1673-} 1674 1675occAnal :: OccEnv 1676 -> CoreExpr 1677 -> (UsageDetails, -- Gives info only about the "interesting" Ids 1678 CoreExpr) 1679 1680occAnal _ expr@(Type _) = (emptyDetails, expr) 1681occAnal _ expr@(Lit _) = (emptyDetails, expr) 1682occAnal env expr@(Var _) = occAnalApp env (expr, [], []) 1683 -- At one stage, I gathered the idRuleVars for the variable here too, 1684 -- which in a way is the right thing to do. 1685 -- But that went wrong right after specialisation, when 1686 -- the *occurrences* of the overloaded function didn't have any 1687 -- rules in them, so the *specialised* versions looked as if they 1688 -- weren't used at all. 1689 1690occAnal _ (Coercion co) 1691 = (addManyOccsSet emptyDetails (coVarsOfCo co), Coercion co) 1692 -- See Note [Gather occurrences of coercion variables] 1693 1694{- 1695Note [Gather occurrences of coercion variables] 1696~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1697We need to gather info about what coercion variables appear, so that 1698we can sort them into the right place when doing dependency analysis. 1699-} 1700 1701occAnal env (Tick tickish body) 1702 | SourceNote{} <- tickish 1703 = (usage, Tick tickish body') 1704 -- SourceNotes are best-effort; so we just proceed as usual. 1705 -- If we drop a tick due to the issues described below it's 1706 -- not the end of the world. 1707 1708 | tickish `tickishScopesLike` SoftScope 1709 = (markAllNonTailCalled usage, Tick tickish body') 1710 1711 | Breakpoint _ ids <- tickish 1712 = (usage_lam `andUDs` foldr addManyOccs emptyDetails ids, Tick tickish body') 1713 -- never substitute for any of the Ids in a Breakpoint 1714 1715 | otherwise 1716 = (usage_lam, Tick tickish body') 1717 where 1718 !(usage,body') = occAnal env body 1719 -- for a non-soft tick scope, we can inline lambdas only 1720 usage_lam = markAllNonTailCalled (markAllInsideLam usage) 1721 -- TODO There may be ways to make ticks and join points play 1722 -- nicer together, but right now there are problems: 1723 -- let j x = ... in tick<t> (j 1) 1724 -- Making j a join point may cause the simplifier to drop t 1725 -- (if the tick is put into the continuation). So we don't 1726 -- count j 1 as a tail call. 1727 -- See #14242. 1728 1729occAnal env (Cast expr co) 1730 = case occAnal env expr of { (usage, expr') -> 1731 let usage1 = zapDetailsIf (isRhsEnv env) usage 1732 -- usage1: if we see let x = y `cast` co 1733 -- then mark y as 'Many' so that we don't 1734 -- immediately inline y again. 1735 usage2 = addManyOccsSet usage1 (coVarsOfCo co) 1736 -- usage2: see Note [Gather occurrences of coercion variables] 1737 in (markAllNonTailCalled usage2, Cast expr' co) 1738 } 1739 1740occAnal env app@(App _ _) 1741 = occAnalApp env (collectArgsTicks tickishFloatable app) 1742 1743-- Ignore type variables altogether 1744-- (a) occurrences inside type lambdas only not marked as InsideLam 1745-- (b) type variables not in environment 1746 1747occAnal env (Lam x body) 1748 | isTyVar x 1749 = case occAnal env body of { (body_usage, body') -> 1750 (markAllNonTailCalled body_usage, Lam x body') 1751 } 1752 1753-- For value lambdas we do a special hack. Consider 1754-- (\x. \y. ...x...) 1755-- If we did nothing, x is used inside the \y, so would be marked 1756-- as dangerous to dup. But in the common case where the abstraction 1757-- is applied to two arguments this is over-pessimistic. 1758-- So instead, we just mark each binder with its occurrence 1759-- info in the *body* of the multiple lambda. 1760-- Then, the simplifier is careful when partially applying lambdas. 1761 1762occAnal env expr@(Lam _ _) 1763 = case occAnalLamOrRhs env binders body of { (usage, tagged_binders, body') -> 1764 let 1765 expr' = mkLams tagged_binders body' 1766 usage1 = markAllNonTailCalled usage 1767 one_shot_gp = all isOneShotBndr tagged_binders 1768 final_usage | one_shot_gp = usage1 1769 | otherwise = markAllInsideLam usage1 1770 in 1771 (final_usage, expr') } 1772 where 1773 (binders, body) = collectBinders expr 1774 1775occAnal env (Case scrut bndr ty alts) 1776 = case occ_anal_scrut scrut alts of { (scrut_usage, scrut') -> 1777 case mapAndUnzip occ_anal_alt alts of { (alts_usage_s, alts') -> 1778 let 1779 alts_usage = foldr orUDs emptyDetails alts_usage_s 1780 (alts_usage1, tagged_bndr) = tagLamBinder alts_usage bndr 1781 total_usage = markAllNonTailCalled scrut_usage `andUDs` alts_usage1 1782 -- Alts can have tail calls, but the scrutinee can't 1783 in 1784 total_usage `seq` (total_usage, Case scrut' tagged_bndr ty alts') }} 1785 where 1786 alt_env = mkAltEnv env scrut bndr 1787 occ_anal_alt = occAnalAlt alt_env 1788 1789 occ_anal_scrut (Var v) (alt1 : other_alts) 1790 | not (null other_alts) || not (isDefaultAlt alt1) 1791 = (mkOneOcc env v True 0, Var v) 1792 -- The 'True' says that the variable occurs in an interesting 1793 -- context; the case has at least one non-default alternative 1794 occ_anal_scrut (Tick t e) alts 1795 | t `tickishScopesLike` SoftScope 1796 -- No reason to not look through all ticks here, but only 1797 -- for soft-scoped ticks we can do so without having to 1798 -- update returned occurance info (see occAnal) 1799 = second (Tick t) $ occ_anal_scrut e alts 1800 1801 occ_anal_scrut scrut _alts 1802 = occAnal (vanillaCtxt env) scrut -- No need for rhsCtxt 1803 1804occAnal env (Let bind body) 1805 = case occAnal env body of { (body_usage, body') -> 1806 case occAnalBind env NotTopLevel 1807 noImpRuleEdges bind 1808 body_usage of { (final_usage, new_binds) -> 1809 (final_usage, mkLets new_binds body') }} 1810 1811occAnalArgs :: OccEnv -> [CoreExpr] -> [OneShots] -> (UsageDetails, [CoreExpr]) 1812occAnalArgs _ [] _ 1813 = (emptyDetails, []) 1814 1815occAnalArgs env (arg:args) one_shots 1816 | isTypeArg arg 1817 = case occAnalArgs env args one_shots of { (uds, args') -> 1818 (uds, arg:args') } 1819 1820 | otherwise 1821 = case argCtxt env one_shots of { (arg_env, one_shots') -> 1822 case occAnal arg_env arg of { (uds1, arg') -> 1823 case occAnalArgs env args one_shots' of { (uds2, args') -> 1824 (uds1 `andUDs` uds2, arg':args') }}} 1825 1826{- 1827Applications are dealt with specially because we want 1828the "build hack" to work. 1829 1830Note [Arguments of let-bound constructors] 1831~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1832Consider 1833 f x = let y = expensive x in 1834 let z = (True,y) in 1835 (case z of {(p,q)->q}, case z of {(p,q)->q}) 1836We feel free to duplicate the WHNF (True,y), but that means 1837that y may be duplicated thereby. 1838 1839If we aren't careful we duplicate the (expensive x) call! 1840Constructors are rather like lambdas in this way. 1841-} 1842 1843occAnalApp :: OccEnv 1844 -> (Expr CoreBndr, [Arg CoreBndr], [Tickish Id]) 1845 -> (UsageDetails, Expr CoreBndr) 1846occAnalApp env (Var fun, args, ticks) 1847 | null ticks = (uds, mkApps (Var fun) args') 1848 | otherwise = (uds, mkTicks ticks $ mkApps (Var fun) args') 1849 where 1850 uds = fun_uds `andUDs` final_args_uds 1851 1852 !(args_uds, args') = occAnalArgs env args one_shots 1853 !final_args_uds 1854 | isRhsEnv env && is_exp = markAllNonTailCalled $ 1855 markAllInsideLam args_uds 1856 | otherwise = markAllNonTailCalled args_uds 1857 -- We mark the free vars of the argument of a constructor or PAP 1858 -- as "inside-lambda", if it is the RHS of a let(rec). 1859 -- This means that nothing gets inlined into a constructor or PAP 1860 -- argument position, which is what we want. Typically those 1861 -- constructor arguments are just variables, or trivial expressions. 1862 -- We use inside-lam because it's like eta-expanding the PAP. 1863 -- 1864 -- This is the *whole point* of the isRhsEnv predicate 1865 -- See Note [Arguments of let-bound constructors] 1866 1867 n_val_args = valArgCount args 1868 n_args = length args 1869 fun_uds = mkOneOcc env fun (n_val_args > 0) n_args 1870 is_exp = isExpandableApp fun n_val_args 1871 -- See Note [CONLIKE pragma] in BasicTypes 1872 -- The definition of is_exp should match that in Simplify.prepareRhs 1873 1874 one_shots = argsOneShots (idStrictness fun) guaranteed_val_args 1875 guaranteed_val_args = n_val_args + length (takeWhile isOneShotInfo 1876 (occ_one_shots env)) 1877 -- See Note [Sources of one-shot information], bullet point A'] 1878 1879occAnalApp env (fun, args, ticks) 1880 = (markAllNonTailCalled (fun_uds `andUDs` args_uds), 1881 mkTicks ticks $ mkApps fun' args') 1882 where 1883 !(fun_uds, fun') = occAnal (addAppCtxt env args) fun 1884 -- The addAppCtxt is a bit cunning. One iteration of the simplifier 1885 -- often leaves behind beta redexs like 1886 -- (\x y -> e) a1 a2 1887 -- Here we would like to mark x,y as one-shot, and treat the whole 1888 -- thing much like a let. We do this by pushing some True items 1889 -- onto the context stack. 1890 !(args_uds, args') = occAnalArgs env args [] 1891 1892zapDetailsIf :: Bool -- If this is true 1893 -> UsageDetails -- Then do zapDetails on this 1894 -> UsageDetails 1895zapDetailsIf True uds = zapDetails uds 1896zapDetailsIf False uds = uds 1897 1898{- 1899Note [Sources of one-shot information] 1900~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1901The occurrence analyser obtains one-shot-lambda information from two sources: 1902 1903A: Saturated applications: eg f e1 .. en 1904 1905 In general, given a call (f e1 .. en) we can propagate one-shot info from 1906 f's strictness signature into e1 .. en, but /only/ if n is enough to 1907 saturate the strictness signature. A strictness signature like 1908 1909 f :: C1(C1(L))LS 1910 1911 means that *if f is applied to three arguments* then it will guarantee to 1912 call its first argument at most once, and to call the result of that at 1913 most once. But if f has fewer than three arguments, all bets are off; e.g. 1914 1915 map (f (\x y. expensive) e2) xs 1916 1917 Here the \x y abstraction may be called many times (once for each element of 1918 xs) so we should not mark x and y as one-shot. But if it was 1919 1920 map (f (\x y. expensive) 3 2) xs 1921 1922 then the first argument of f will be called at most once. 1923 1924 The one-shot info, derived from f's strictness signature, is 1925 computed by 'argsOneShots', called in occAnalApp. 1926 1927A': Non-obviously saturated applications: eg build (f (\x y -> expensive)) 1928 where f is as above. 1929 1930 In this case, f is only manifestly applied to one argument, so it does not 1931 look saturated. So by the previous point, we should not use its strictness 1932 signature to learn about the one-shotness of \x y. But in this case we can: 1933 build is fully applied, so we may use its strictness signature; and from 1934 that we learn that build calls its argument with two arguments *at most once*. 1935 1936 So there is really only one call to f, and it will have three arguments. In 1937 that sense, f is saturated, and we may proceed as described above. 1938 1939 Hence the computation of 'guaranteed_val_args' in occAnalApp, using 1940 '(occ_one_shots env)'. See also #13227, comment:9 1941 1942B: Let-bindings: eg let f = \c. let ... in \n -> blah 1943 in (build f, build f) 1944 1945 Propagate one-shot info from the demanand-info on 'f' to the 1946 lambdas in its RHS (which may not be syntactically at the top) 1947 1948 This information must have come from a previous run of the demanand 1949 analyser. 1950 1951Previously, the demand analyser would *also* set the one-shot information, but 1952that code was buggy (see #11770), so doing it only in on place, namely here, is 1953saner. 1954 1955Note [OneShots] 1956~~~~~~~~~~~~~~~ 1957When analysing an expression, the occ_one_shots argument contains information 1958about how the function is being used. The length of the list indicates 1959how many arguments will eventually be passed to the analysed expression, 1960and the OneShotInfo indicates whether this application is once or multiple times. 1961 1962Example: 1963 1964 Context of f occ_one_shots when analysing f 1965 1966 f 1 2 [OneShot, OneShot] 1967 map (f 1) [OneShot, NoOneShotInfo] 1968 build f [OneShot, OneShot] 1969 f 1 2 `seq` f 2 1 [NoOneShotInfo, OneShot] 1970 1971Note [Binders in case alternatives] 1972~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1973Consider 1974 case x of y { (a,b) -> f y } 1975We treat 'a', 'b' as dead, because they don't physically occur in the 1976case alternative. (Indeed, a variable is dead iff it doesn't occur in 1977its scope in the output of OccAnal.) It really helps to know when 1978binders are unused. See esp the call to isDeadBinder in 1979Simplify.mkDupableAlt 1980 1981In this example, though, the Simplifier will bring 'a' and 'b' back to 1982life, beause it binds 'y' to (a,b) (imagine got inlined and 1983scrutinised y). 1984-} 1985 1986occAnalLamOrRhs :: OccEnv -> [CoreBndr] -> CoreExpr 1987 -> (UsageDetails, [CoreBndr], CoreExpr) 1988occAnalLamOrRhs env [] body 1989 = case occAnal env body of (body_usage, body') -> (body_usage, [], body') 1990 -- RHS of thunk or nullary join point 1991occAnalLamOrRhs env (bndr:bndrs) body 1992 | isTyVar bndr 1993 = -- Important: Keep the environment so that we don't inline into an RHS like 1994 -- \(@ x) -> C @x (f @x) 1995 -- (see the beginning of Note [Cascading inlines]). 1996 case occAnalLamOrRhs env bndrs body of 1997 (body_usage, bndrs', body') -> (body_usage, bndr:bndrs', body') 1998occAnalLamOrRhs env binders body 1999 = case occAnal env_body body of { (body_usage, body') -> 2000 let 2001 (final_usage, tagged_binders) = tagLamBinders body_usage binders' 2002 -- Use binders' to put one-shot info on the lambdas 2003 in 2004 (final_usage, tagged_binders, body') } 2005 where 2006 (env_body, binders') = oneShotGroup env binders 2007 2008occAnalAlt :: (OccEnv, Maybe (Id, CoreExpr)) 2009 -> CoreAlt 2010 -> (UsageDetails, Alt IdWithOccInfo) 2011occAnalAlt (env, scrut_bind) (con, bndrs, rhs) 2012 = case occAnal env rhs of { (rhs_usage1, rhs1) -> 2013 let 2014 (alt_usg, tagged_bndrs) = tagLamBinders rhs_usage1 bndrs 2015 -- See Note [Binders in case alternatives] 2016 (alt_usg', rhs2) = wrapAltRHS env scrut_bind alt_usg tagged_bndrs rhs1 2017 in 2018 (alt_usg', (con, tagged_bndrs, rhs2)) } 2019 2020wrapAltRHS :: OccEnv 2021 -> Maybe (Id, CoreExpr) -- proxy mapping generated by mkAltEnv 2022 -> UsageDetails -- usage for entire alt (p -> rhs) 2023 -> [Var] -- alt binders 2024 -> CoreExpr -- alt RHS 2025 -> (UsageDetails, CoreExpr) 2026wrapAltRHS env (Just (scrut_var, let_rhs)) alt_usg bndrs alt_rhs 2027 | occ_binder_swap env 2028 , scrut_var `usedIn` alt_usg -- bndrs are not be present in alt_usg so this 2029 -- handles condition (a) in Note [Binder swap] 2030 , not captured -- See condition (b) in Note [Binder swap] 2031 = ( alt_usg' `andUDs` let_rhs_usg 2032 , Let (NonRec tagged_scrut_var let_rhs') alt_rhs ) 2033 where 2034 captured = any (`usedIn` let_rhs_usg) bndrs -- Check condition (b) 2035 2036 -- The rhs of the let may include coercion variables 2037 -- if the scrutinee was a cast, so we must gather their 2038 -- usage. See Note [Gather occurrences of coercion variables] 2039 -- Moreover, the rhs of the let may mention the case-binder, and 2040 -- we want to gather its occ-info as well 2041 (let_rhs_usg, let_rhs') = occAnal env let_rhs 2042 2043 (alt_usg', tagged_scrut_var) = tagLamBinder alt_usg scrut_var 2044 2045wrapAltRHS _ _ alt_usg _ alt_rhs 2046 = (alt_usg, alt_rhs) 2047 2048{- 2049************************************************************************ 2050* * 2051 OccEnv 2052* * 2053************************************************************************ 2054-} 2055 2056data OccEnv 2057 = OccEnv { occ_encl :: !OccEncl -- Enclosing context information 2058 , occ_one_shots :: !OneShots -- See Note [OneShots] 2059 , occ_gbl_scrut :: GlobalScruts 2060 2061 , occ_unf_act :: Id -> Bool -- Which Id unfoldings are active 2062 2063 , occ_rule_act :: Activation -> Bool -- Which rules are active 2064 -- See Note [Finding rule RHS free vars] 2065 2066 , occ_binder_swap :: !Bool -- enable the binder_swap 2067 -- See CorePrep Note [Dead code in CorePrep] 2068 } 2069 2070type GlobalScruts = IdSet -- See Note [Binder swap on GlobalId scrutinees] 2071 2072----------------------------- 2073-- OccEncl is used to control whether to inline into constructor arguments 2074-- For example: 2075-- x = (p,q) -- Don't inline p or q 2076-- y = /\a -> (p a, q a) -- Still don't inline p or q 2077-- z = f (p,q) -- Do inline p,q; it may make a rule fire 2078-- So OccEncl tells enough about the context to know what to do when 2079-- we encounter a constructor application or PAP. 2080 2081data OccEncl 2082 = OccRhs -- RHS of let(rec), albeit perhaps inside a type lambda 2083 -- Don't inline into constructor args here 2084 | OccVanilla -- Argument of function, body of lambda, scruintee of case etc. 2085 -- Do inline into constructor args here 2086 2087instance Outputable OccEncl where 2088 ppr OccRhs = text "occRhs" 2089 ppr OccVanilla = text "occVanilla" 2090 2091-- See note [OneShots] 2092type OneShots = [OneShotInfo] 2093 2094initOccEnv :: OccEnv 2095initOccEnv 2096 = OccEnv { occ_encl = OccVanilla 2097 , occ_one_shots = [] 2098 , occ_gbl_scrut = emptyVarSet 2099 -- To be conservative, we say that all 2100 -- inlines and rules are active 2101 , occ_unf_act = \_ -> True 2102 , occ_rule_act = \_ -> True 2103 , occ_binder_swap = True } 2104 2105vanillaCtxt :: OccEnv -> OccEnv 2106vanillaCtxt env = env { occ_encl = OccVanilla, occ_one_shots = [] } 2107 2108rhsCtxt :: OccEnv -> OccEnv 2109rhsCtxt env = env { occ_encl = OccRhs, occ_one_shots = [] } 2110 2111argCtxt :: OccEnv -> [OneShots] -> (OccEnv, [OneShots]) 2112argCtxt env [] 2113 = (env { occ_encl = OccVanilla, occ_one_shots = [] }, []) 2114argCtxt env (one_shots:one_shots_s) 2115 = (env { occ_encl = OccVanilla, occ_one_shots = one_shots }, one_shots_s) 2116 2117isRhsEnv :: OccEnv -> Bool 2118isRhsEnv (OccEnv { occ_encl = OccRhs }) = True 2119isRhsEnv (OccEnv { occ_encl = OccVanilla }) = False 2120 2121oneShotGroup :: OccEnv -> [CoreBndr] 2122 -> ( OccEnv 2123 , [CoreBndr] ) 2124 -- The result binders have one-shot-ness set that they might not have had originally. 2125 -- This happens in (build (\c n -> e)). Here the occurrence analyser 2126 -- linearity context knows that c,n are one-shot, and it records that fact in 2127 -- the binder. This is useful to guide subsequent float-in/float-out tranformations 2128 2129oneShotGroup env@(OccEnv { occ_one_shots = ctxt }) bndrs 2130 = go ctxt bndrs [] 2131 where 2132 go ctxt [] rev_bndrs 2133 = ( env { occ_one_shots = ctxt, occ_encl = OccVanilla } 2134 , reverse rev_bndrs ) 2135 2136 go [] bndrs rev_bndrs 2137 = ( env { occ_one_shots = [], occ_encl = OccVanilla } 2138 , reverse rev_bndrs ++ bndrs ) 2139 2140 go ctxt@(one_shot : ctxt') (bndr : bndrs) rev_bndrs 2141 | isId bndr = go ctxt' bndrs (bndr': rev_bndrs) 2142 | otherwise = go ctxt bndrs (bndr : rev_bndrs) 2143 where 2144 bndr' = updOneShotInfo bndr one_shot 2145 -- Use updOneShotInfo, not setOneShotInfo, as pre-existing 2146 -- one-shot info might be better than what we can infer, e.g. 2147 -- due to explicit use of the magic 'oneShot' function. 2148 -- See Note [The oneShot function] 2149 2150 2151markJoinOneShots :: Maybe JoinArity -> [Var] -> [Var] 2152-- Mark the lambdas of a non-recursive join point as one-shot. 2153-- This is good to prevent gratuitous float-out etc 2154markJoinOneShots mb_join_arity bndrs 2155 = case mb_join_arity of 2156 Nothing -> bndrs 2157 Just n -> go n bndrs 2158 where 2159 go 0 bndrs = bndrs 2160 go _ [] = [] -- This can legitimately happen. 2161 -- e.g. let j = case ... in j True 2162 -- This will become an arity-1 join point after the 2163 -- simplifier has eta-expanded it; but it may not have 2164 -- enough lambdas /yet/. (Lint checks that JoinIds do 2165 -- have enough lambdas.) 2166 go n (b:bs) = b' : go (n-1) bs 2167 where 2168 b' | isId b = setOneShotLambda b 2169 | otherwise = b 2170 2171addAppCtxt :: OccEnv -> [Arg CoreBndr] -> OccEnv 2172addAppCtxt env@(OccEnv { occ_one_shots = ctxt }) args 2173 = env { occ_one_shots = replicate (valArgCount args) OneShotLam ++ ctxt } 2174 2175transClosureFV :: UniqFM VarSet -> UniqFM VarSet 2176-- If (f,g), (g,h) are in the input, then (f,h) is in the output 2177-- as well as (f,g), (g,h) 2178transClosureFV env 2179 | no_change = env 2180 | otherwise = transClosureFV (listToUFM new_fv_list) 2181 where 2182 (no_change, new_fv_list) = mapAccumL bump True (nonDetUFMToList env) 2183 -- It's OK to use nonDetUFMToList here because we'll forget the 2184 -- ordering by creating a new set with listToUFM 2185 bump no_change (b,fvs) 2186 | no_change_here = (no_change, (b,fvs)) 2187 | otherwise = (False, (b,new_fvs)) 2188 where 2189 (new_fvs, no_change_here) = extendFvs env fvs 2190 2191------------- 2192extendFvs_ :: UniqFM VarSet -> VarSet -> VarSet 2193extendFvs_ env s = fst (extendFvs env s) -- Discard the Bool flag 2194 2195extendFvs :: UniqFM VarSet -> VarSet -> (VarSet, Bool) 2196-- (extendFVs env s) returns 2197-- (s `union` env(s), env(s) `subset` s) 2198extendFvs env s 2199 | isNullUFM env 2200 = (s, True) 2201 | otherwise 2202 = (s `unionVarSet` extras, extras `subVarSet` s) 2203 where 2204 extras :: VarSet -- env(s) 2205 extras = nonDetFoldUFM unionVarSet emptyVarSet $ 2206 -- It's OK to use nonDetFoldUFM here because unionVarSet commutes 2207 intersectUFM_C (\x _ -> x) env (getUniqSet s) 2208 2209{- 2210************************************************************************ 2211* * 2212 Binder swap 2213* * 2214************************************************************************ 2215 2216Note [Binder swap] 2217~~~~~~~~~~~~~~~~~~ 2218The "binder swap" tranformation swaps occurence of the 2219scrutinee of a case for occurrences of the case-binder: 2220 2221 (1) case x of b { pi -> ri } 2222 ==> 2223 case x of b { pi -> let x=b in ri } 2224 2225 (2) case (x |> co) of b { pi -> ri } 2226 ==> 2227 case (x |> co) of b { pi -> let x = b |> sym co in ri } 2228 2229In both cases, the trivial 'let' can be eliminated by the 2230immediately following simplifier pass. 2231 2232There are two reasons for making this swap: 2233 2234(A) It reduces the number of occurrences of the scrutinee, x. 2235 That in turn might reduce its occurrences to one, so we 2236 can inline it and save an allocation. E.g. 2237 let x = factorial y in case x of b { I# v -> ...x... } 2238 If we replace 'x' by 'b' in the alternative we get 2239 let x = factorial y in case x of b { I# v -> ...b... } 2240 and now we can inline 'x', thus 2241 case (factorial y) of b { I# v -> ...b... } 2242 2243(B) The case-binder b has unfolding information; in the 2244 example above we know that b = I# v. That in turn allows 2245 nested cases to simplify. Consider 2246 case x of b { I# v -> 2247 ...(case x of b2 { I# v2 -> rhs })... 2248 If we replace 'x' by 'b' in the alternative we get 2249 case x of b { I# v -> 2250 ...(case b of b2 { I# v2 -> rhs })... 2251 and now it is trivial to simplify the inner case: 2252 case x of b { I# v -> 2253 ...(let b2 = b in rhs)... 2254 2255 The same can happen even if the scrutinee is a variable 2256 with a cast: see Note [Case of cast] 2257 2258In both cases, in a particular alternative (pi -> ri), we only 2259add the binding if 2260 (a) x occurs free in (pi -> ri) 2261 (ie it occurs in ri, but is not bound in pi) 2262 (b) the pi does not bind b (or the free vars of co) 2263We need (a) and (b) for the inserted binding to be correct. 2264 2265For the alternatives where we inject the binding, we can transfer 2266all x's OccInfo to b. And that is the point. 2267 2268Notice that 2269 * The deliberate shadowing of 'x'. 2270 * That (a) rapidly becomes false, so no bindings are injected. 2271 2272The reason for doing these transformations /here in the occurrence 2273analyser/ is because it allows us to adjust the OccInfo for 'x' and 2274'b' as we go. 2275 2276 * Suppose the only occurrences of 'x' are the scrutinee and in the 2277 ri; then this transformation makes it occur just once, and hence 2278 get inlined right away. 2279 2280 * If instead we do this in the Simplifier, we don't know whether 'x' 2281 is used in ri, so we are forced to pessimistically zap b's OccInfo 2282 even though it is typically dead (ie neither it nor x appear in 2283 the ri). There's nothing actually wrong with zapping it, except 2284 that it's kind of nice to know which variables are dead. My nose 2285 tells me to keep this information as robustly as possible. 2286 2287The Maybe (Id,CoreExpr) passed to occAnalAlt is the extra let-binding 2288{x=b}; it's Nothing if the binder-swap doesn't happen. 2289 2290There is a danger though. Consider 2291 let v = x +# y 2292 in case (f v) of w -> ...v...v... 2293And suppose that (f v) expands to just v. Then we'd like to 2294use 'w' instead of 'v' in the alternative. But it may be too 2295late; we may have substituted the (cheap) x+#y for v in the 2296same simplifier pass that reduced (f v) to v. 2297 2298I think this is just too bad. CSE will recover some of it. 2299 2300Note [Case of cast] 2301~~~~~~~~~~~~~~~~~~~ 2302Consider case (x `cast` co) of b { I# -> 2303 ... (case (x `cast` co) of {...}) ... 2304We'd like to eliminate the inner case. That is the motivation for 2305equation (2) in Note [Binder swap]. When we get to the inner case, we 2306inline x, cancel the casts, and away we go. 2307 2308Note [Binder swap on GlobalId scrutinees] 2309~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 2310When the scrutinee is a GlobalId we must take care in two ways 2311 2312 i) In order to *know* whether 'x' occurs free in the RHS, we need its 2313 occurrence info. BUT, we don't gather occurrence info for 2314 GlobalIds. That's the reason for the (small) occ_gbl_scrut env in 2315 OccEnv is for: it says "gather occurrence info for these". 2316 2317 ii) We must call localiseId on 'x' first, in case it's a GlobalId, or 2318 has an External Name. See, for example, SimplEnv Note [Global Ids in 2319 the substitution]. 2320 2321Note [Zap case binders in proxy bindings] 2322~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 2323From the original 2324 case x of cb(dead) { p -> ...x... } 2325we will get 2326 case x of cb(live) { p -> let x = cb in ...x... } 2327 2328Core Lint never expects to find an *occurrence* of an Id marked 2329as Dead, so we must zap the OccInfo on cb before making the 2330binding x = cb. See #5028. 2331 2332NB: the OccInfo on /occurrences/ really doesn't matter much; the simplifier 2333doesn't use it. So this is only to satisfy the perhpas-over-picky Lint. 2334 2335Historical note [no-case-of-case] 2336~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 2337We *used* to suppress the binder-swap in case expressions when 2338-fno-case-of-case is on. Old remarks: 2339 "This happens in the first simplifier pass, 2340 and enhances full laziness. Here's the bad case: 2341 f = \ y -> ...(case x of I# v -> ...(case x of ...) ... ) 2342 If we eliminate the inner case, we trap it inside the I# v -> arm, 2343 which might prevent some full laziness happening. I've seen this 2344 in action in spectral/cichelli/Prog.hs: 2345 [(m,n) | m <- [1..max], n <- [1..max]] 2346 Hence the check for NoCaseOfCase." 2347However, now the full-laziness pass itself reverses the binder-swap, so this 2348check is no longer necessary. 2349 2350Historical note [Suppressing the case binder-swap] 2351~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 2352This old note describes a problem that is also fixed by doing the 2353binder-swap in OccAnal: 2354 2355 There is another situation when it might make sense to suppress the 2356 case-expression binde-swap. If we have 2357 2358 case x of w1 { DEFAULT -> case x of w2 { A -> e1; B -> e2 } 2359 ...other cases .... } 2360 2361 We'll perform the binder-swap for the outer case, giving 2362 2363 case x of w1 { DEFAULT -> case w1 of w2 { A -> e1; B -> e2 } 2364 ...other cases .... } 2365 2366 But there is no point in doing it for the inner case, because w1 can't 2367 be inlined anyway. Furthermore, doing the case-swapping involves 2368 zapping w2's occurrence info (see paragraphs that follow), and that 2369 forces us to bind w2 when doing case merging. So we get 2370 2371 case x of w1 { A -> let w2 = w1 in e1 2372 B -> let w2 = w1 in e2 2373 ...other cases .... } 2374 2375 This is plain silly in the common case where w2 is dead. 2376 2377 Even so, I can't see a good way to implement this idea. I tried 2378 not doing the binder-swap if the scrutinee was already evaluated 2379 but that failed big-time: 2380 2381 data T = MkT !Int 2382 2383 case v of w { MkT x -> 2384 case x of x1 { I# y1 -> 2385 case x of x2 { I# y2 -> ... 2386 2387 Notice that because MkT is strict, x is marked "evaluated". But to 2388 eliminate the last case, we must either make sure that x (as well as 2389 x1) has unfolding MkT y1. The straightforward thing to do is to do 2390 the binder-swap. So this whole note is a no-op. 2391 2392It's fixed by doing the binder-swap in OccAnal because we can do the 2393binder-swap unconditionally and still get occurrence analysis 2394information right. 2395-} 2396 2397mkAltEnv :: OccEnv -> CoreExpr -> Id -> (OccEnv, Maybe (Id, CoreExpr)) 2398-- Does three things: a) makes the occ_one_shots = OccVanilla 2399-- b) extends the GlobalScruts if possible 2400-- c) returns a proxy mapping, binding the scrutinee 2401-- to the case binder, if possible 2402mkAltEnv env@(OccEnv { occ_gbl_scrut = pe }) scrut case_bndr 2403 = case stripTicksTopE (const True) scrut of 2404 Var v -> add_scrut v case_bndr' 2405 Cast (Var v) co -> add_scrut v (Cast case_bndr' (mkSymCo co)) 2406 -- See Note [Case of cast] 2407 _ -> (env { occ_encl = OccVanilla }, Nothing) 2408 2409 where 2410 add_scrut v rhs 2411 | isGlobalId v = (env { occ_encl = OccVanilla }, Nothing) 2412 | otherwise = ( env { occ_encl = OccVanilla 2413 , occ_gbl_scrut = pe `extendVarSet` v } 2414 , Just (localise v, rhs) ) 2415 -- ToDO: this isGlobalId stuff is a TEMPORARY FIX 2416 -- to avoid the binder-swap for GlobalIds 2417 -- See #16346 2418 2419 case_bndr' = Var (zapIdOccInfo case_bndr) 2420 -- See Note [Zap case binders in proxy bindings] 2421 2422 -- Localise the scrut_var before shadowing it; we're making a 2423 -- new binding for it, and it might have an External Name, or 2424 -- even be a GlobalId; Note [Binder swap on GlobalId scrutinees] 2425 -- Also we don't want any INLINE or NOINLINE pragmas! 2426 localise scrut_var = mkLocalIdOrCoVar (localiseName (idName scrut_var)) 2427 (idType scrut_var) 2428 2429{- 2430************************************************************************ 2431* * 2432\subsection[OccurAnal-types]{OccEnv} 2433* * 2434************************************************************************ 2435 2436Note [UsageDetails and zapping] 2437~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 2438 2439On many occasions, we must modify all gathered occurrence data at once. For 2440instance, all occurrences underneath a (non-one-shot) lambda set the 2441'occ_in_lam' flag to become 'True'. We could use 'mapVarEnv' to do this, but 2442that takes O(n) time and we will do this often---in particular, there are many 2443places where tail calls are not allowed, and each of these causes all variables 2444to get marked with 'NoTailCallInfo'. 2445 2446Instead of relying on `mapVarEnv`, then, we carry three 'IdEnv's around along 2447with the 'OccInfoEnv'. Each of these extra environments is a "zapped set" 2448recording which variables have been zapped in some way. Zapping all occurrence 2449info then simply means setting the corresponding zapped set to the whole 2450'OccInfoEnv', a fast O(1) operation. 2451-} 2452 2453type OccInfoEnv = IdEnv OccInfo -- A finite map from ids to their usage 2454 -- INVARIANT: never IAmDead 2455 -- (Deadness is signalled by not being in the map at all) 2456 2457type ZappedSet = OccInfoEnv -- Values are ignored 2458 2459data UsageDetails 2460 = UD { ud_env :: !OccInfoEnv 2461 , ud_z_many :: ZappedSet -- apply 'markMany' to these 2462 , ud_z_in_lam :: ZappedSet -- apply 'markInsideLam' to these 2463 , ud_z_no_tail :: ZappedSet } -- apply 'markNonTailCalled' to these 2464 -- INVARIANT: All three zapped sets are subsets of the OccInfoEnv 2465 2466instance Outputable UsageDetails where 2467 ppr ud = ppr (ud_env (flattenUsageDetails ud)) 2468 2469------------------- 2470-- UsageDetails API 2471 2472andUDs, orUDs 2473 :: UsageDetails -> UsageDetails -> UsageDetails 2474andUDs = combineUsageDetailsWith addOccInfo 2475orUDs = combineUsageDetailsWith orOccInfo 2476 2477andUDsList :: [UsageDetails] -> UsageDetails 2478andUDsList = foldl' andUDs emptyDetails 2479 2480mkOneOcc :: OccEnv -> Id -> InterestingCxt -> JoinArity -> UsageDetails 2481mkOneOcc env id int_cxt arity 2482 | isLocalId id 2483 = singleton $ OneOcc { occ_in_lam = False 2484 , occ_n_br = 1 2485 , occ_int_cxt = int_cxt 2486 , occ_tail = AlwaysTailCalled arity } 2487 | id `elemVarSet` occ_gbl_scrut env 2488 = singleton noOccInfo 2489 2490 | otherwise 2491 = emptyDetails 2492 where 2493 singleton info = emptyDetails { ud_env = unitVarEnv id info } 2494 2495addOneOcc :: UsageDetails -> Id -> OccInfo -> UsageDetails 2496addOneOcc ud id info 2497 = ud { ud_env = extendVarEnv_C plus_zapped (ud_env ud) id info } 2498 `alterZappedSets` (`delVarEnv` id) 2499 where 2500 plus_zapped old new = doZapping ud id old `addOccInfo` new 2501 2502addManyOccsSet :: UsageDetails -> VarSet -> UsageDetails 2503addManyOccsSet usage id_set = nonDetFoldUniqSet addManyOccs usage id_set 2504 -- It's OK to use nonDetFoldUFM here because addManyOccs commutes 2505 2506-- Add several occurrences, assumed not to be tail calls 2507addManyOccs :: Var -> UsageDetails -> UsageDetails 2508addManyOccs v u | isId v = addOneOcc u v noOccInfo 2509 | otherwise = u 2510 -- Give a non-committal binder info (i.e noOccInfo) because 2511 -- a) Many copies of the specialised thing can appear 2512 -- b) We don't want to substitute a BIG expression inside a RULE 2513 -- even if that's the only occurrence of the thing 2514 -- (Same goes for INLINE.) 2515 2516delDetails :: UsageDetails -> Id -> UsageDetails 2517delDetails ud bndr 2518 = ud `alterUsageDetails` (`delVarEnv` bndr) 2519 2520delDetailsList :: UsageDetails -> [Id] -> UsageDetails 2521delDetailsList ud bndrs 2522 = ud `alterUsageDetails` (`delVarEnvList` bndrs) 2523 2524emptyDetails :: UsageDetails 2525emptyDetails = UD { ud_env = emptyVarEnv 2526 , ud_z_many = emptyVarEnv 2527 , ud_z_in_lam = emptyVarEnv 2528 , ud_z_no_tail = emptyVarEnv } 2529 2530isEmptyDetails :: UsageDetails -> Bool 2531isEmptyDetails = isEmptyVarEnv . ud_env 2532 2533markAllMany, markAllInsideLam, markAllNonTailCalled, zapDetails 2534 :: UsageDetails -> UsageDetails 2535markAllMany ud = ud { ud_z_many = ud_env ud } 2536markAllInsideLam ud = ud { ud_z_in_lam = ud_env ud } 2537markAllNonTailCalled ud = ud { ud_z_no_tail = ud_env ud } 2538 2539zapDetails = markAllMany . markAllNonTailCalled -- effectively sets to noOccInfo 2540 2541lookupDetails :: UsageDetails -> Id -> OccInfo 2542lookupDetails ud id 2543 | isCoVar id -- We do not currenly gather occurrence info (from types) 2544 = noOccInfo -- for CoVars, so we must conservatively mark them as used 2545 -- See Note [DoO not mark CoVars as dead] 2546 | otherwise 2547 = case lookupVarEnv (ud_env ud) id of 2548 Just occ -> doZapping ud id occ 2549 Nothing -> IAmDead 2550 2551usedIn :: Id -> UsageDetails -> Bool 2552v `usedIn` ud = isExportedId v || v `elemVarEnv` ud_env ud 2553 2554udFreeVars :: VarSet -> UsageDetails -> VarSet 2555-- Find the subset of bndrs that are mentioned in uds 2556udFreeVars bndrs ud = restrictUniqSetToUFM bndrs (ud_env ud) 2557 2558{- Note [Do not mark CoVars as dead] 2559~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 2560It's obviously wrong to mark CoVars as dead if they are used. 2561Currently we don't traverse types to gather usase info for CoVars, 2562so we had better treat them as having noOccInfo. 2563 2564This showed up in #15696 we had something like 2565 case eq_sel d of co -> ...(typeError @(...co...) "urk")... 2566 2567Then 'd' was substitued by a dictionary, so the expression 2568simpified to 2569 case (Coercion <blah>) of co -> ...(typeError @(...co...) "urk")... 2570 2571But then the "drop the case altogether" equation of rebuildCase 2572thought that 'co' was dead, and discarded the entire case. Urk! 2573 2574I have no idea how we managed to avoid this pitfall for so long! 2575-} 2576 2577------------------- 2578-- Auxiliary functions for UsageDetails implementation 2579 2580combineUsageDetailsWith :: (OccInfo -> OccInfo -> OccInfo) 2581 -> UsageDetails -> UsageDetails -> UsageDetails 2582combineUsageDetailsWith plus_occ_info ud1 ud2 2583 | isEmptyDetails ud1 = ud2 2584 | isEmptyDetails ud2 = ud1 2585 | otherwise 2586 = UD { ud_env = plusVarEnv_C plus_occ_info (ud_env ud1) (ud_env ud2) 2587 , ud_z_many = plusVarEnv (ud_z_many ud1) (ud_z_many ud2) 2588 , ud_z_in_lam = plusVarEnv (ud_z_in_lam ud1) (ud_z_in_lam ud2) 2589 , ud_z_no_tail = plusVarEnv (ud_z_no_tail ud1) (ud_z_no_tail ud2) } 2590 2591doZapping :: UsageDetails -> Var -> OccInfo -> OccInfo 2592doZapping ud var occ 2593 = doZappingByUnique ud (varUnique var) occ 2594 2595doZappingByUnique :: UsageDetails -> Unique -> OccInfo -> OccInfo 2596doZappingByUnique ud uniq 2597 = (if | in_subset ud_z_many -> markMany 2598 | in_subset ud_z_in_lam -> markInsideLam 2599 | otherwise -> id) . 2600 (if | in_subset ud_z_no_tail -> markNonTailCalled 2601 | otherwise -> id) 2602 where 2603 in_subset field = uniq `elemVarEnvByKey` field ud 2604 2605alterZappedSets :: UsageDetails -> (ZappedSet -> ZappedSet) -> UsageDetails 2606alterZappedSets ud f 2607 = ud { ud_z_many = f (ud_z_many ud) 2608 , ud_z_in_lam = f (ud_z_in_lam ud) 2609 , ud_z_no_tail = f (ud_z_no_tail ud) } 2610 2611alterUsageDetails :: UsageDetails -> (OccInfoEnv -> OccInfoEnv) -> UsageDetails 2612alterUsageDetails ud f 2613 = ud { ud_env = f (ud_env ud) } 2614 `alterZappedSets` f 2615 2616flattenUsageDetails :: UsageDetails -> UsageDetails 2617flattenUsageDetails ud 2618 = ud { ud_env = mapUFM_Directly (doZappingByUnique ud) (ud_env ud) } 2619 `alterZappedSets` const emptyVarEnv 2620 2621------------------- 2622-- See Note [Adjusting right-hand sides] 2623adjustRhsUsage :: Maybe JoinArity -> RecFlag 2624 -> [CoreBndr] -- Outer lambdas, AFTER occ anal 2625 -> UsageDetails -> UsageDetails 2626adjustRhsUsage mb_join_arity rec_flag bndrs usage 2627 = maybe_mark_lam (maybe_drop_tails usage) 2628 where 2629 maybe_mark_lam ud | one_shot = ud 2630 | otherwise = markAllInsideLam ud 2631 maybe_drop_tails ud | exact_join = ud 2632 | otherwise = markAllNonTailCalled ud 2633 2634 one_shot = case mb_join_arity of 2635 Just join_arity 2636 | isRec rec_flag -> False 2637 | otherwise -> all isOneShotBndr (drop join_arity bndrs) 2638 Nothing -> all isOneShotBndr bndrs 2639 2640 exact_join = case mb_join_arity of 2641 Just join_arity -> bndrs `lengthIs` join_arity 2642 _ -> False 2643 2644type IdWithOccInfo = Id 2645 2646tagLamBinders :: UsageDetails -- Of scope 2647 -> [Id] -- Binders 2648 -> (UsageDetails, -- Details with binders removed 2649 [IdWithOccInfo]) -- Tagged binders 2650tagLamBinders usage binders 2651 = usage' `seq` (usage', bndrs') 2652 where 2653 (usage', bndrs') = mapAccumR tagLamBinder usage binders 2654 2655tagLamBinder :: UsageDetails -- Of scope 2656 -> Id -- Binder 2657 -> (UsageDetails, -- Details with binder removed 2658 IdWithOccInfo) -- Tagged binders 2659-- Used for lambda and case binders 2660-- It copes with the fact that lambda bindings can have a 2661-- stable unfolding, used for join points 2662tagLamBinder usage bndr 2663 = (usage2, bndr') 2664 where 2665 occ = lookupDetails usage bndr 2666 bndr' = setBinderOcc (markNonTailCalled occ) bndr 2667 -- Don't try to make an argument into a join point 2668 usage1 = usage `delDetails` bndr 2669 usage2 | isId bndr = addManyOccsSet usage1 (idUnfoldingVars bndr) 2670 -- This is effectively the RHS of a 2671 -- non-join-point binding, so it's okay to use 2672 -- addManyOccsSet, which assumes no tail calls 2673 | otherwise = usage1 2674 2675tagNonRecBinder :: TopLevelFlag -- At top level? 2676 -> UsageDetails -- Of scope 2677 -> CoreBndr -- Binder 2678 -> (UsageDetails, -- Details with binder removed 2679 IdWithOccInfo) -- Tagged binder 2680 2681tagNonRecBinder lvl usage binder 2682 = let 2683 occ = lookupDetails usage binder 2684 will_be_join = decideJoinPointHood lvl usage [binder] 2685 occ' | will_be_join = -- must already be marked AlwaysTailCalled 2686 ASSERT(isAlwaysTailCalled occ) occ 2687 | otherwise = markNonTailCalled occ 2688 binder' = setBinderOcc occ' binder 2689 usage' = usage `delDetails` binder 2690 in 2691 usage' `seq` (usage', binder') 2692 2693tagRecBinders :: TopLevelFlag -- At top level? 2694 -> UsageDetails -- Of body of let ONLY 2695 -> [(CoreBndr, -- Binder 2696 UsageDetails, -- RHS usage details 2697 [CoreBndr])] -- Lambdas in new RHS 2698 -> (UsageDetails, -- Adjusted details for whole scope, 2699 -- with binders removed 2700 [IdWithOccInfo]) -- Tagged binders 2701-- Substantially more complicated than non-recursive case. Need to adjust RHS 2702-- details *before* tagging binders (because the tags depend on the RHSes). 2703tagRecBinders lvl body_uds triples 2704 = let 2705 (bndrs, rhs_udss, _) = unzip3 triples 2706 2707 -- 1. Determine join-point-hood of whole group, as determined by 2708 -- the *unadjusted* usage details 2709 unadj_uds = foldr andUDs body_uds rhs_udss 2710 will_be_joins = decideJoinPointHood lvl unadj_uds bndrs 2711 2712 -- 2. Adjust usage details of each RHS, taking into account the 2713 -- join-point-hood decision 2714 rhs_udss' = map adjust triples 2715 adjust (bndr, rhs_uds, rhs_bndrs) 2716 = adjustRhsUsage mb_join_arity Recursive rhs_bndrs rhs_uds 2717 where 2718 -- Can't use willBeJoinId_maybe here because we haven't tagged the 2719 -- binder yet (the tag depends on these adjustments!) 2720 mb_join_arity 2721 | will_be_joins 2722 , let occ = lookupDetails unadj_uds bndr 2723 , AlwaysTailCalled arity <- tailCallInfo occ 2724 = Just arity 2725 | otherwise 2726 = ASSERT(not will_be_joins) -- Should be AlwaysTailCalled if 2727 Nothing -- we are making join points! 2728 2729 -- 3. Compute final usage details from adjusted RHS details 2730 adj_uds = foldr andUDs body_uds rhs_udss' 2731 2732 -- 4. Tag each binder with its adjusted details 2733 bndrs' = [ setBinderOcc (lookupDetails adj_uds bndr) bndr 2734 | bndr <- bndrs ] 2735 2736 -- 5. Drop the binders from the adjusted details and return 2737 usage' = adj_uds `delDetailsList` bndrs 2738 in 2739 (usage', bndrs') 2740 2741setBinderOcc :: OccInfo -> CoreBndr -> CoreBndr 2742setBinderOcc occ_info bndr 2743 | isTyVar bndr = bndr 2744 | isExportedId bndr = if isManyOccs (idOccInfo bndr) 2745 then bndr 2746 else setIdOccInfo bndr noOccInfo 2747 -- Don't use local usage info for visible-elsewhere things 2748 -- BUT *do* erase any IAmALoopBreaker annotation, because we're 2749 -- about to re-generate it and it shouldn't be "sticky" 2750 2751 | otherwise = setIdOccInfo bndr occ_info 2752 2753-- | Decide whether some bindings should be made into join points or not. 2754-- Returns `False` if they can't be join points. Note that it's an 2755-- all-or-nothing decision, as if multiple binders are given, they're 2756-- assumed to be mutually recursive. 2757-- 2758-- It must, however, be a final decision. If we say "True" for 'f', 2759-- and then subsequently decide /not/ make 'f' into a join point, then 2760-- the decision about another binding 'g' might be invalidated if (say) 2761-- 'f' tail-calls 'g'. 2762-- 2763-- See Note [Invariants on join points] in CoreSyn. 2764decideJoinPointHood :: TopLevelFlag -> UsageDetails 2765 -> [CoreBndr] 2766 -> Bool 2767decideJoinPointHood TopLevel _ _ 2768 = False 2769decideJoinPointHood NotTopLevel usage bndrs 2770 | isJoinId (head bndrs) 2771 = WARN(not all_ok, text "OccurAnal failed to rediscover join point(s):" <+> 2772 ppr bndrs) 2773 all_ok 2774 | otherwise 2775 = all_ok 2776 where 2777 -- See Note [Invariants on join points]; invariants cited by number below. 2778 -- Invariant 2 is always satisfiable by the simplifier by eta expansion. 2779 all_ok = -- Invariant 3: Either all are join points or none are 2780 all ok bndrs 2781 2782 ok bndr 2783 | -- Invariant 1: Only tail calls, all same join arity 2784 AlwaysTailCalled arity <- tailCallInfo (lookupDetails usage bndr) 2785 2786 , -- Invariant 1 as applied to LHSes of rules 2787 all (ok_rule arity) (idCoreRules bndr) 2788 2789 -- Invariant 2a: stable unfoldings 2790 -- See Note [Join points and INLINE pragmas] 2791 , ok_unfolding arity (realIdUnfolding bndr) 2792 2793 -- Invariant 4: Satisfies polymorphism rule 2794 , isValidJoinPointType arity (idType bndr) 2795 = True 2796 2797 | otherwise 2798 = False 2799 2800 ok_rule _ BuiltinRule{} = False -- only possible with plugin shenanigans 2801 ok_rule join_arity (Rule { ru_args = args }) 2802 = args `lengthIs` join_arity 2803 -- Invariant 1 as applied to LHSes of rules 2804 2805 -- ok_unfolding returns False if we should /not/ convert a non-join-id 2806 -- into a join-id, even though it is AlwaysTailCalled 2807 ok_unfolding join_arity (CoreUnfolding { uf_src = src, uf_tmpl = rhs }) 2808 = not (isStableSource src && join_arity > joinRhsArity rhs) 2809 ok_unfolding _ (DFunUnfolding {}) 2810 = False 2811 ok_unfolding _ _ 2812 = True 2813 2814willBeJoinId_maybe :: CoreBndr -> Maybe JoinArity 2815willBeJoinId_maybe bndr 2816 = case tailCallInfo (idOccInfo bndr) of 2817 AlwaysTailCalled arity -> Just arity 2818 _ -> isJoinId_maybe bndr 2819 2820 2821{- Note [Join points and INLINE pragmas] 2822~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 2823Consider 2824 f x = let g = \x. not -- Arity 1 2825 {-# INLINE g #-} 2826 in case x of 2827 A -> g True True 2828 B -> g True False 2829 C -> blah2 2830 2831Here 'g' is always tail-called applied to 2 args, but the stable 2832unfolding captured by the INLINE pragma has arity 1. If we try to 2833convert g to be a join point, its unfolding will still have arity 1 2834(since it is stable, and we don't meddle with stable unfoldings), and 2835Lint will complain (see Note [Invariants on join points], (2a), in 2836CoreSyn. #13413. 2837 2838Moreover, since g is going to be inlined anyway, there is no benefit 2839from making it a join point. 2840 2841If it is recursive, and uselessly marked INLINE, this will stop us 2842making it a join point, which is annoying. But occasionally 2843(notably in class methods; see Note [Instances and loop breakers] in 2844TcInstDcls) we mark recursive things as INLINE but the recursion 2845unravels; so ignoring INLINE pragmas on recursive things isn't good 2846either. 2847 2848See Invariant 2a of Note [Invariants on join points] in CoreSyn 2849 2850 2851************************************************************************ 2852* * 2853\subsection{Operations over OccInfo} 2854* * 2855************************************************************************ 2856-} 2857 2858markMany, markInsideLam, markNonTailCalled :: OccInfo -> OccInfo 2859 2860markMany IAmDead = IAmDead 2861markMany occ = ManyOccs { occ_tail = occ_tail occ } 2862 2863markInsideLam occ@(OneOcc {}) = occ { occ_in_lam = True } 2864markInsideLam occ = occ 2865 2866markNonTailCalled IAmDead = IAmDead 2867markNonTailCalled occ = occ { occ_tail = NoTailCallInfo } 2868 2869addOccInfo, orOccInfo :: OccInfo -> OccInfo -> OccInfo 2870 2871addOccInfo a1 a2 = ASSERT( not (isDeadOcc a1 || isDeadOcc a2) ) 2872 ManyOccs { occ_tail = tailCallInfo a1 `andTailCallInfo` 2873 tailCallInfo a2 } 2874 -- Both branches are at least One 2875 -- (Argument is never IAmDead) 2876 2877-- (orOccInfo orig new) is used 2878-- when combining occurrence info from branches of a case 2879 2880orOccInfo (OneOcc { occ_in_lam = in_lam1 2881 , occ_n_br = nbr1 2882 , occ_int_cxt = int_cxt1 2883 , occ_tail = tail1 }) 2884 (OneOcc { occ_in_lam = in_lam2 2885 , occ_n_br = nbr2 2886 , occ_int_cxt = int_cxt2 2887 , occ_tail = tail2 }) 2888 = OneOcc { occ_n_br = nbr1 + nbr2 2889 , occ_in_lam = in_lam1 || in_lam2 2890 , occ_int_cxt = int_cxt1 && int_cxt2 2891 , occ_tail = tail1 `andTailCallInfo` tail2 } 2892 2893orOccInfo a1 a2 = ASSERT( not (isDeadOcc a1 || isDeadOcc a2) ) 2894 ManyOccs { occ_tail = tailCallInfo a1 `andTailCallInfo` 2895 tailCallInfo a2 } 2896 2897andTailCallInfo :: TailCallInfo -> TailCallInfo -> TailCallInfo 2898andTailCallInfo info@(AlwaysTailCalled arity1) (AlwaysTailCalled arity2) 2899 | arity1 == arity2 = info 2900andTailCallInfo _ _ = NoTailCallInfo 2901