1{-
2(c) The GRASP/AQUA Project, Glasgow University, 1992-1998
3
4************************************************************************
5*                                                                      *
6\section[OccurAnal]{Occurrence analysis pass}
7*                                                                      *
8************************************************************************
9
10The occurrence analyser re-typechecks a core expression, returning a new
11core expression with (hopefully) improved usage information.
12-}
13
14{-# LANGUAGE CPP, BangPatterns, MultiWayIf, ViewPatterns  #-}
15
16module OccurAnal (
17        occurAnalysePgm, occurAnalyseExpr, occurAnalyseExpr_NoBinderSwap
18    ) where
19
20#include "GhclibHsVersions.h"
21
22import GhcPrelude
23
24import CoreSyn
25import CoreFVs
26import CoreUtils        ( exprIsTrivial, isDefaultAlt, isExpandableApp,
27                          stripTicksTopE, mkTicks )
28import CoreArity        ( joinRhsArity )
29import Id
30import IdInfo
31import Name( localiseName )
32import BasicTypes
33import Module( Module )
34import Coercion
35import Type
36
37import VarSet
38import VarEnv
39import Var
40import Demand           ( argOneShots, argsOneShots )
41import Digraph          ( SCC(..), Node(..)
42                        , stronglyConnCompFromEdgedVerticesUniq
43                        , stronglyConnCompFromEdgedVerticesUniqR )
44import Unique
45import UniqFM
46import UniqSet
47import Util
48import Outputable
49import Data.List
50import Control.Arrow    ( second )
51
52{-
53************************************************************************
54*                                                                      *
55    occurAnalysePgm, occurAnalyseExpr, occurAnalyseExpr_NoBinderSwap
56*                                                                      *
57************************************************************************
58
59Here's the externally-callable interface:
60-}
61
62occurAnalysePgm :: Module         -- Used only in debug output
63                -> (Id -> Bool)         -- Active unfoldings
64                -> (Activation -> Bool) -- Active rules
65                -> [CoreRule]
66                -> CoreProgram -> CoreProgram
67occurAnalysePgm this_mod active_unf active_rule imp_rules binds
68  | isEmptyDetails final_usage
69  = occ_anald_binds
70
71  | otherwise   -- See Note [Glomming]
72  = WARN( True, hang (text "Glomming in" <+> ppr this_mod <> colon)
73                   2 (ppr final_usage ) )
74    occ_anald_glommed_binds
75  where
76    init_env = initOccEnv { occ_rule_act = active_rule
77                          , occ_unf_act  = active_unf }
78
79    (final_usage, occ_anald_binds) = go init_env binds
80    (_, occ_anald_glommed_binds)   = occAnalRecBind init_env TopLevel
81                                                    imp_rule_edges
82                                                    (flattenBinds binds)
83                                                    initial_uds
84          -- It's crucial to re-analyse the glommed-together bindings
85          -- so that we establish the right loop breakers. Otherwise
86          -- we can easily create an infinite loop (#9583 is an example)
87          --
88          -- Also crucial to re-analyse the /original/ bindings
89          -- in case the first pass accidentally discarded as dead code
90          -- a binding that was actually needed (albeit before its
91          -- definition site).  #17724 threw this up.
92
93    initial_uds = addManyOccsSet emptyDetails
94                            (rulesFreeVars imp_rules)
95    -- The RULES declarations keep things alive!
96
97    -- Note [Preventing loops due to imported functions rules]
98    imp_rule_edges = foldr (plusVarEnv_C unionVarSet) emptyVarEnv
99                            [ mapVarEnv (const maps_to) $
100                                getUniqSet (exprFreeIds arg `delVarSetList` ru_bndrs imp_rule)
101                            | imp_rule <- imp_rules
102                            , not (isBuiltinRule imp_rule)  -- See Note [Plugin rules]
103                            , let maps_to = exprFreeIds (ru_rhs imp_rule)
104                                             `delVarSetList` ru_bndrs imp_rule
105                            , arg <- ru_args imp_rule ]
106
107    go :: OccEnv -> [CoreBind] -> (UsageDetails, [CoreBind])
108    go _ []
109        = (initial_uds, [])
110    go env (bind:binds)
111        = (final_usage, bind' ++ binds')
112        where
113           (bs_usage, binds')   = go env binds
114           (final_usage, bind') = occAnalBind env TopLevel imp_rule_edges bind
115                                              bs_usage
116
117occurAnalyseExpr :: CoreExpr -> CoreExpr
118        -- Do occurrence analysis, and discard occurrence info returned
119occurAnalyseExpr = occurAnalyseExpr' True -- do binder swap
120
121occurAnalyseExpr_NoBinderSwap :: CoreExpr -> CoreExpr
122occurAnalyseExpr_NoBinderSwap = occurAnalyseExpr' False -- do not do binder swap
123
124occurAnalyseExpr' :: Bool -> CoreExpr -> CoreExpr
125occurAnalyseExpr' enable_binder_swap expr
126  = snd (occAnal env expr)
127  where
128    env = initOccEnv { occ_binder_swap = enable_binder_swap }
129
130{- Note [Plugin rules]
131~~~~~~~~~~~~~~~~~~~~~~
132Conal Elliott (#11651) built a GHC plugin that added some
133BuiltinRules (for imported Ids) to the mg_rules field of ModGuts, to
134do some domain-specific transformations that could not be expressed
135with an ordinary pattern-matching CoreRule.  But then we can't extract
136the dependencies (in imp_rule_edges) from ru_rhs etc, because a
137BuiltinRule doesn't have any of that stuff.
138
139So we simply assume that BuiltinRules have no dependencies, and filter
140them out from the imp_rule_edges comprehension.
141-}
142
143{-
144************************************************************************
145*                                                                      *
146                Bindings
147*                                                                      *
148************************************************************************
149
150Note [Recursive bindings: the grand plan]
151~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
152When we come across a binding group
153  Rec { x1 = r1; ...; xn = rn }
154we treat it like this (occAnalRecBind):
155
1561. Occurrence-analyse each right hand side, and build a
157   "Details" for each binding to capture the results.
158
159   Wrap the details in a Node (details, node-id, dep-node-ids),
160   where node-id is just the unique of the binder, and
161   dep-node-ids lists all binders on which this binding depends.
162   We'll call these the "scope edges".
163   See Note [Forming the Rec groups].
164
165   All this is done by makeNode.
166
1672. Do SCC-analysis on these Nodes.  Each SCC will become a new Rec or
168   NonRec.  The key property is that every free variable of a binding
169   is accounted for by the scope edges, so that when we are done
170   everything is still in scope.
171
1723. For each Cyclic SCC of the scope-edge SCC-analysis in (2), we
173   identify suitable loop-breakers to ensure that inlining terminates.
174   This is done by occAnalRec.
175
1764. To do so we form a new set of Nodes, with the same details, but
177   different edges, the "loop-breaker nodes". The loop-breaker nodes
178   have both more and fewer dependencies than the scope edges
179   (see Note [Choosing loop breakers])
180
181   More edges: if f calls g, and g has an active rule that mentions h
182               then we add an edge from f -> h
183
184   Fewer edges: we only include dependencies on active rules, on rule
185                RHSs (not LHSs) and if there is an INLINE pragma only
186                on the stable unfolding (and vice versa).  The scope
187                edges must be much more inclusive.
188
1895.  The "weak fvs" of a node are, by definition:
190       the scope fvs - the loop-breaker fvs
191    See Note [Weak loop breakers], and the nd_weak field of Details
192
1936.  Having formed the loop-breaker nodes
194
195Note [Dead code]
196~~~~~~~~~~~~~~~~
197Dropping dead code for a cyclic Strongly Connected Component is done
198in a very simple way:
199
200        the entire SCC is dropped if none of its binders are mentioned
201        in the body; otherwise the whole thing is kept.
202
203The key observation is that dead code elimination happens after
204dependency analysis: so 'occAnalBind' processes SCCs instead of the
205original term's binding groups.
206
207Thus 'occAnalBind' does indeed drop 'f' in an example like
208
209        letrec f = ...g...
210               g = ...(...g...)...
211        in
212           ...g...
213
214when 'g' no longer uses 'f' at all (eg 'f' does not occur in a RULE in
215'g'). 'occAnalBind' first consumes 'CyclicSCC g' and then it consumes
216'AcyclicSCC f', where 'body_usage' won't contain 'f'.
217
218------------------------------------------------------------
219Note [Forming Rec groups]
220~~~~~~~~~~~~~~~~~~~~~~~~~
221We put bindings {f = ef; g = eg } in a Rec group if "f uses g"
222and "g uses f", no matter how indirectly.  We do a SCC analysis
223with an edge f -> g if "f uses g".
224
225More precisely, "f uses g" iff g should be in scope wherever f is.
226That is, g is free in:
227  a) the rhs 'ef'
228  b) or the RHS of a rule for f (Note [Rules are extra RHSs])
229  c) or the LHS or a rule for f (Note [Rule dependency info])
230
231These conditions apply regardless of the activation of the RULE (eg it might be
232inactive in this phase but become active later).  Once a Rec is broken up
233it can never be put back together, so we must be conservative.
234
235The principle is that, regardless of rule firings, every variable is
236always in scope.
237
238  * Note [Rules are extra RHSs]
239    ~~~~~~~~~~~~~~~~~~~~~~~~~~~
240    A RULE for 'f' is like an extra RHS for 'f'. That way the "parent"
241    keeps the specialised "children" alive.  If the parent dies
242    (because it isn't referenced any more), then the children will die
243    too (unless they are already referenced directly).
244
245    To that end, we build a Rec group for each cyclic strongly
246    connected component,
247        *treating f's rules as extra RHSs for 'f'*.
248    More concretely, the SCC analysis runs on a graph with an edge
249    from f -> g iff g is mentioned in
250        (a) f's rhs
251        (b) f's RULES
252    These are rec_edges.
253
254    Under (b) we include variables free in *either* LHS *or* RHS of
255    the rule.  The former might seems silly, but see Note [Rule
256    dependency info].  So in Example [eftInt], eftInt and eftIntFB
257    will be put in the same Rec, even though their 'main' RHSs are
258    both non-recursive.
259
260  * Note [Rule dependency info]
261    ~~~~~~~~~~~~~~~~~~~~~~~~~~~
262    The VarSet in a RuleInfo is used for dependency analysis in the
263    occurrence analyser.  We must track free vars in *both* lhs and rhs.
264    Hence use of idRuleVars, rather than idRuleRhsVars in occAnalBind.
265    Why both? Consider
266        x = y
267        RULE f x = v+4
268    Then if we substitute y for x, we'd better do so in the
269    rule's LHS too, so we'd better ensure the RULE appears to mention 'x'
270    as well as 'v'
271
272  * Note [Rules are visible in their own rec group]
273    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
274    We want the rules for 'f' to be visible in f's right-hand side.
275    And we'd like them to be visible in other functions in f's Rec
276    group.  E.g. in Note [Specialisation rules] we want f' rule
277    to be visible in both f's RHS, and fs's RHS.
278
279    This means that we must simplify the RULEs first, before looking
280    at any of the definitions.  This is done by Simplify.simplRecBind,
281    when it calls addLetIdInfo.
282
283------------------------------------------------------------
284Note [Choosing loop breakers]
285~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
286Loop breaking is surprisingly subtle.  First read the section 4 of
287"Secrets of the GHC inliner".  This describes our basic plan.
288We avoid infinite inlinings by choosing loop breakers, and
289ensuring that a loop breaker cuts each loop.
290
291See also Note [Inlining and hs-boot files] in ToIface, which deals
292with a closely related source of infinite loops.
293
294Fundamentally, we do SCC analysis on a graph.  For each recursive
295group we choose a loop breaker, delete all edges to that node,
296re-analyse the SCC, and iterate.
297
298But what is the graph?  NOT the same graph as was used for Note
299[Forming Rec groups]!  In particular, a RULE is like an equation for
300'f' that is *always* inlined if it is applicable.  We do *not* disable
301rules for loop-breakers.  It's up to whoever makes the rules to make
302sure that the rules themselves always terminate.  See Note [Rules for
303recursive functions] in Simplify.hs
304
305Hence, if
306    f's RHS (or its INLINE template if it has one) mentions g, and
307    g has a RULE that mentions h, and
308    h has a RULE that mentions f
309
310then we *must* choose f to be a loop breaker.  Example: see Note
311[Specialisation rules].
312
313In general, take the free variables of f's RHS, and augment it with
314all the variables reachable by RULES from those starting points.  That
315is the whole reason for computing rule_fv_env in occAnalBind.  (Of
316course we only consider free vars that are also binders in this Rec
317group.)  See also Note [Finding rule RHS free vars]
318
319Note that when we compute this rule_fv_env, we only consider variables
320free in the *RHS* of the rule, in contrast to the way we build the
321Rec group in the first place (Note [Rule dependency info])
322
323Note that if 'g' has RHS that mentions 'w', we should add w to
324g's loop-breaker edges.  More concretely there is an edge from f -> g
325iff
326        (a) g is mentioned in f's RHS `xor` f's INLINE rhs
327            (see Note [Inline rules])
328        (b) or h is mentioned in f's RHS, and
329            g appears in the RHS of an active RULE of h
330            or a transitive sequence of active rules starting with h
331
332Why "active rules"?  See Note [Finding rule RHS free vars]
333
334Note that in Example [eftInt], *neither* eftInt *nor* eftIntFB is
335chosen as a loop breaker, because their RHSs don't mention each other.
336And indeed both can be inlined safely.
337
338Note again that the edges of the graph we use for computing loop breakers
339are not the same as the edges we use for computing the Rec blocks.
340That's why we compute
341
342- rec_edges          for the Rec block analysis
343- loop_breaker_nodes for the loop breaker analysis
344
345  * Note [Finding rule RHS free vars]
346    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
347    Consider this real example from Data Parallel Haskell
348         tagZero :: Array Int -> Array Tag
349         {-# INLINE [1] tagZeroes #-}
350         tagZero xs = pmap (\x -> fromBool (x==0)) xs
351
352         {-# RULES "tagZero" [~1] forall xs n.
353             pmap fromBool <blah blah> = tagZero xs #-}
354    So tagZero's RHS mentions pmap, and pmap's RULE mentions tagZero.
355    However, tagZero can only be inlined in phase 1 and later, while
356    the RULE is only active *before* phase 1.  So there's no problem.
357
358    To make this work, we look for the RHS free vars only for
359    *active* rules. That's the reason for the occ_rule_act field
360    of the OccEnv.
361
362  * Note [Weak loop breakers]
363    ~~~~~~~~~~~~~~~~~~~~~~~~~
364    There is a last nasty wrinkle.  Suppose we have
365
366        Rec { f = f_rhs
367              RULE f [] = g
368
369              h = h_rhs
370              g = h
371              ...more...
372        }
373
374    Remember that we simplify the RULES before any RHS (see Note
375    [Rules are visible in their own rec group] above).
376
377    So we must *not* postInlineUnconditionally 'g', even though
378    its RHS turns out to be trivial.  (I'm assuming that 'g' is
379    not choosen as a loop breaker.)  Why not?  Because then we
380    drop the binding for 'g', which leaves it out of scope in the
381    RULE!
382
383    Here's a somewhat different example of the same thing
384        Rec { g = h
385            ; h = ...f...
386            ; f = f_rhs
387              RULE f [] = g }
388    Here the RULE is "below" g, but we *still* can't postInlineUnconditionally
389    g, because the RULE for f is active throughout.  So the RHS of h
390    might rewrite to     h = ...g...
391    So g must remain in scope in the output program!
392
393    We "solve" this by:
394
395        Make g a "weak" loop breaker (OccInfo = IAmLoopBreaker True)
396        iff g is a "missing free variable" of the Rec group
397
398    A "missing free variable" x is one that is mentioned in an RHS or
399    INLINE or RULE of a binding in the Rec group, but where the
400    dependency on x may not show up in the loop_breaker_nodes (see
401    note [Choosing loop breakers} above).
402
403    A normal "strong" loop breaker has IAmLoopBreaker False.  So
404
405                                    Inline  postInlineUnconditionally
406   strong   IAmLoopBreaker False    no      no
407   weak     IAmLoopBreaker True     yes     no
408            other                   yes     yes
409
410    The **sole** reason for this kind of loop breaker is so that
411    postInlineUnconditionally does not fire.  Ugh.  (Typically it'll
412    inline via the usual callSiteInline stuff, so it'll be dead in the
413    next pass, so the main Ugh is the tiresome complication.)
414
415Note [Rules for imported functions]
416~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
417Consider this
418   f = /\a. B.g a
419   RULE B.g Int = 1 + f Int
420Note that
421  * The RULE is for an imported function.
422  * f is non-recursive
423Now we
424can get
425   f Int --> B.g Int      Inlining f
426         --> 1 + f Int    Firing RULE
427and so the simplifier goes into an infinite loop. This
428would not happen if the RULE was for a local function,
429because we keep track of dependencies through rules.  But
430that is pretty much impossible to do for imported Ids.  Suppose
431f's definition had been
432   f = /\a. C.h a
433where (by some long and devious process), C.h eventually inlines to
434B.g.  We could only spot such loops by exhaustively following
435unfoldings of C.h etc, in case we reach B.g, and hence (via the RULE)
436f.
437
438Note that RULES for imported functions are important in practice; they
439occur a lot in the libraries.
440
441We regard this potential infinite loop as a *programmer* error.
442It's up the programmer not to write silly rules like
443     RULE f x = f x
444and the example above is just a more complicated version.
445
446Note [Preventing loops due to imported functions rules]
447~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
448Consider:
449  import GHC.Base (foldr)
450
451  {-# RULES "filterList" forall p. foldr (filterFB (:) p) [] = filter p #-}
452  filter p xs = build (\c n -> foldr (filterFB c p) n xs)
453  filterFB c p = ...
454
455  f = filter p xs
456
457Note that filter is not a loop-breaker, so what happens is:
458  f =          filter p xs
459    = {inline} build (\c n -> foldr (filterFB c p) n xs)
460    = {inline} foldr (filterFB (:) p) [] xs
461    = {RULE}   filter p xs
462
463We are in an infinite loop.
464
465A more elaborate example (that I actually saw in practice when I went to
466mark GHC.List.filter as INLINABLE) is as follows. Say I have this module:
467  {-# LANGUAGE RankNTypes #-}
468  module GHCList where
469
470  import Prelude hiding (filter)
471  import GHC.Base (build)
472
473  {-# INLINABLE filter #-}
474  filter :: (a -> Bool) -> [a] -> [a]
475  filter p [] = []
476  filter p (x:xs) = if p x then x : filter p xs else filter p xs
477
478  {-# NOINLINE [0] filterFB #-}
479  filterFB :: (a -> b -> b) -> (a -> Bool) -> a -> b -> b
480  filterFB c p x r | p x       = x `c` r
481                   | otherwise = r
482
483  {-# RULES
484  "filter"     [~1] forall p xs.  filter p xs = build (\c n -> foldr
485  (filterFB c p) n xs)
486  "filterList" [1]  forall p.     foldr (filterFB (:) p) [] = filter p
487   #-}
488
489Then (because RULES are applied inside INLINABLE unfoldings, but inlinings
490are not), the unfolding given to "filter" in the interface file will be:
491  filter p []     = []
492  filter p (x:xs) = if p x then x : build (\c n -> foldr (filterFB c p) n xs)
493                           else     build (\c n -> foldr (filterFB c p) n xs
494
495Note that because this unfolding does not mention "filter", filter is not
496marked as a strong loop breaker. Therefore at a use site in another module:
497  filter p xs
498    = {inline}
499      case xs of []     -> []
500                 (x:xs) -> if p x then x : build (\c n -> foldr (filterFB c p) n xs)
501                                  else     build (\c n -> foldr (filterFB c p) n xs)
502
503  build (\c n -> foldr (filterFB c p) n xs)
504    = {inline} foldr (filterFB (:) p) [] xs
505    = {RULE}   filter p xs
506
507And we are in an infinite loop again, except that this time the loop is producing an
508infinitely large *term* (an unrolling of filter) and so the simplifier finally
509dies with "ticks exhausted"
510
511Because of this problem, we make a small change in the occurrence analyser
512designed to mark functions like "filter" as strong loop breakers on the basis that:
513  1. The RHS of filter mentions the local function "filterFB"
514  2. We have a rule which mentions "filterFB" on the LHS and "filter" on the RHS
515
516So for each RULE for an *imported* function we are going to add
517dependency edges between the *local* FVS of the rule LHS and the
518*local* FVS of the rule RHS. We don't do anything special for RULES on
519local functions because the standard occurrence analysis stuff is
520pretty good at getting loop-breakerness correct there.
521
522It is important to note that even with this extra hack we aren't always going to get
523things right. For example, it might be that the rule LHS mentions an imported Id,
524and another module has a RULE that can rewrite that imported Id to one of our local
525Ids.
526
527Note [Specialising imported functions] (referred to from Specialise)
528~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
529BUT for *automatically-generated* rules, the programmer can't be
530responsible for the "programmer error" in Note [Rules for imported
531functions].  In paricular, consider specialising a recursive function
532defined in another module.  If we specialise a recursive function B.g,
533we get
534         g_spec = .....(B.g Int).....
535         RULE B.g Int = g_spec
536Here, g_spec doesn't look recursive, but when the rule fires, it
537becomes so.  And if B.g was mutually recursive, the loop might
538not be as obvious as it is here.
539
540To avoid this,
541 * When specialising a function that is a loop breaker,
542   give a NOINLINE pragma to the specialised function
543
544Note [Glomming]
545~~~~~~~~~~~~~~~
546RULES for imported Ids can make something at the top refer to something at the bottom:
547        f = \x -> B.g (q x)
548        h = \y -> 3
549
550        RULE:  B.g (q x) = h x
551
552Applying this rule makes f refer to h, although f doesn't appear to
553depend on h.  (And, as in Note [Rules for imported functions], the
554dependency might be more indirect. For example, f might mention C.t
555rather than B.g, where C.t eventually inlines to B.g.)
556
557NOTICE that this cannot happen for rules whose head is a
558locally-defined function, because we accurately track dependencies
559through RULES.  It only happens for rules whose head is an imported
560function (B.g in the example above).
561
562Solution:
563  - When simplifying, bring all top level identifiers into
564    scope at the start, ignoring the Rec/NonRec structure, so
565    that when 'h' pops up in f's rhs, we find it in the in-scope set
566    (as the simplifier generally expects). This happens in simplTopBinds.
567
568  - In the occurrence analyser, if there are any out-of-scope
569    occurrences that pop out of the top, which will happen after
570    firing the rule:      f = \x -> h x
571                          h = \y -> 3
572    then just glom all the bindings into a single Rec, so that
573    the *next* iteration of the occurrence analyser will sort
574    them all out.   This part happens in occurAnalysePgm.
575
576------------------------------------------------------------
577Note [Inline rules]
578~~~~~~~~~~~~~~~~~~~
579None of the above stuff about RULES applies to Inline Rules,
580stored in a CoreUnfolding.  The unfolding, if any, is simplified
581at the same time as the regular RHS of the function (ie *not* like
582Note [Rules are visible in their own rec group]), so it should be
583treated *exactly* like an extra RHS.
584
585Or, rather, when computing loop-breaker edges,
586  * If f has an INLINE pragma, and it is active, we treat the
587    INLINE rhs as f's rhs
588  * If it's inactive, we treat f as having no rhs
589  * If it has no INLINE pragma, we look at f's actual rhs
590
591
592There is a danger that we'll be sub-optimal if we see this
593     f = ...f...
594     [INLINE f = ..no f...]
595where f is recursive, but the INLINE is not. This can just about
596happen with a sufficiently odd set of rules; eg
597
598        foo :: Int -> Int
599        {-# INLINE [1] foo #-}
600        foo x = x+1
601
602        bar :: Int -> Int
603        {-# INLINE [1] bar #-}
604        bar x = foo x + 1
605
606        {-# RULES "foo" [~1] forall x. foo x = bar x #-}
607
608Here the RULE makes bar recursive; but it's INLINE pragma remains
609non-recursive. It's tempting to then say that 'bar' should not be
610a loop breaker, but an attempt to do so goes wrong in two ways:
611   a) We may get
612         $df = ...$cfoo...
613         $cfoo = ...$df....
614         [INLINE $cfoo = ...no-$df...]
615      But we want $cfoo to depend on $df explicitly so that we
616      put the bindings in the right order to inline $df in $cfoo
617      and perhaps break the loop altogether.  (Maybe this
618   b)
619
620
621Example [eftInt]
622~~~~~~~~~~~~~~~
623Example (from GHC.Enum):
624
625  eftInt :: Int# -> Int# -> [Int]
626  eftInt x y = ...(non-recursive)...
627
628  {-# INLINE [0] eftIntFB #-}
629  eftIntFB :: (Int -> r -> r) -> r -> Int# -> Int# -> r
630  eftIntFB c n x y = ...(non-recursive)...
631
632  {-# RULES
633  "eftInt"  [~1] forall x y. eftInt x y = build (\ c n -> eftIntFB c n x y)
634  "eftIntList"  [1] eftIntFB  (:) [] = eftInt
635   #-}
636
637Note [Specialisation rules]
638~~~~~~~~~~~~~~~~~~~~~~~~~~~
639Consider this group, which is typical of what SpecConstr builds:
640
641   fs a = ....f (C a)....
642   f  x = ....f (C a)....
643   {-# RULE f (C a) = fs a #-}
644
645So 'f' and 'fs' are in the same Rec group (since f refers to fs via its RULE).
646
647But watch out!  If 'fs' is not chosen as a loop breaker, we may get an infinite loop:
648  - the RULE is applied in f's RHS (see Note [Self-recursive rules] in Simplify
649  - fs is inlined (say it's small)
650  - now there's another opportunity to apply the RULE
651
652This showed up when compiling Control.Concurrent.Chan.getChanContents.
653
654------------------------------------------------------------
655Note [Finding join points]
656~~~~~~~~~~~~~~~~~~~~~~~~~~
657It's the occurrence analyser's job to find bindings that we can turn into join
658points, but it doesn't perform that transformation right away. Rather, it marks
659the eligible bindings as part of their occurrence data, leaving it to the
660simplifier (or to simpleOptPgm) to actually change the binder's 'IdDetails'.
661The simplifier then eta-expands the RHS if needed and then updates the
662occurrence sites. Dividing the work this way means that the occurrence analyser
663still only takes one pass, yet one can always tell the difference between a
664function call and a jump by looking at the occurrence (because the same pass
665changes the 'IdDetails' and propagates the binders to their occurrence sites).
666
667To track potential join points, we use the 'occ_tail' field of OccInfo. A value
668of `AlwaysTailCalled n` indicates that every occurrence of the variable is a
669tail call with `n` arguments (counting both value and type arguments). Otherwise
670'occ_tail' will be 'NoTailCallInfo'. The tail call info flows bottom-up with the
671rest of 'OccInfo' until it goes on the binder.
672
673Note [Rules and join points]
674~~~~~~~~~~~~~~~~~~~~~~~~~~~~
675
676Things get fiddly with rules. Suppose we have:
677
678  let j :: Int -> Int
679      j y = 2 * y
680      k :: Int -> Int -> Int
681      {-# RULES "SPEC k 0" k 0 = j #-}
682      k x y = x + 2 * y
683  in ...
684
685Now suppose that both j and k appear only as saturated tail calls in the body.
686Thus we would like to make them both join points. The rule complicates matters,
687though, as its RHS has an unapplied occurrence of j. *However*, if we were to
688eta-expand the rule, all would be well:
689
690  {-# RULES "SPEC k 0" forall a. k 0 a = j a #-}
691
692So conceivably we could notice that a potential join point would have an
693"undersaturated" rule and account for it. This would mean we could make
694something that's been specialised a join point, for instance. But local bindings
695are rarely specialised, and being overly cautious about rules only
696costs us anything when, for some `j`:
697
698  * Before specialisation, `j` has non-tail calls, so it can't be a join point.
699  * During specialisation, `j` gets specialised and thus acquires rules.
700  * Sometime afterward, the non-tail calls to `j` disappear (as dead code, say),
701    and so now `j` *could* become a join point.
702
703This appears to be very rare in practice. TODO Perhaps we should gather
704statistics to be sure.
705
706------------------------------------------------------------
707Note [Adjusting right-hand sides]
708~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
709There's a bit of a dance we need to do after analysing a lambda expression or
710a right-hand side. In particular, we need to
711
712  a) call 'markAllInsideLam' *unless* the binding is for a thunk, a one-shot
713     lambda, or a non-recursive join point; and
714  b) call 'markAllNonTailCalled' *unless* the binding is for a join point.
715
716Some examples, with how the free occurrences in e (assumed not to be a value
717lambda) get marked:
718
719                             inside lam    non-tail-called
720  ------------------------------------------------------------
721  let x = e                  No            Yes
722  let f = \x -> e            Yes           Yes
723  let f = \x{OneShot} -> e   No            Yes
724  \x -> e                    Yes           Yes
725  join j x = e               No            No
726  joinrec j x = e            Yes           No
727
728There are a few other caveats; most importantly, if we're marking a binding as
729'AlwaysTailCalled', it's *going* to be a join point, so we treat it as one so
730that the effect cascades properly. Consequently, at the time the RHS is
731analysed, we won't know what adjustments to make; thus 'occAnalLamOrRhs' must
732return the unadjusted 'UsageDetails', to be adjusted by 'adjustRhsUsage' once
733join-point-hood has been decided.
734
735Thus the overall sequence taking place in 'occAnalNonRecBind' and
736'occAnalRecBind' is as follows:
737
738  1. Call 'occAnalLamOrRhs' to find usage information for the RHS.
739  2. Call 'tagNonRecBinder' or 'tagRecBinders', which decides whether to make
740     the binding a join point.
741  3. Call 'adjustRhsUsage' accordingly. (Done as part of 'tagRecBinders' when
742     recursive.)
743
744(In the recursive case, this logic is spread between 'makeNode' and
745'occAnalRec'.)
746-}
747
748------------------------------------------------------------------
749--                 occAnalBind
750------------------------------------------------------------------
751
752occAnalBind :: OccEnv           -- The incoming OccEnv
753            -> TopLevelFlag
754            -> ImpRuleEdges
755            -> CoreBind
756            -> UsageDetails             -- Usage details of scope
757            -> (UsageDetails,           -- Of the whole let(rec)
758                [CoreBind])
759
760occAnalBind env lvl top_env (NonRec binder rhs) body_usage
761  = occAnalNonRecBind env lvl top_env binder rhs body_usage
762occAnalBind env lvl top_env (Rec pairs) body_usage
763  = occAnalRecBind env lvl top_env pairs body_usage
764
765-----------------
766occAnalNonRecBind :: OccEnv -> TopLevelFlag -> ImpRuleEdges -> Var -> CoreExpr
767                  -> UsageDetails -> (UsageDetails, [CoreBind])
768occAnalNonRecBind env lvl imp_rule_edges binder rhs body_usage
769  | isTyVar binder      -- A type let; we don't gather usage info
770  = (body_usage, [NonRec binder rhs])
771
772  | not (binder `usedIn` body_usage)    -- It's not mentioned
773  = (body_usage, [])
774
775  | otherwise                   -- It's mentioned in the body
776  = (body_usage' `andUDs` rhs_usage', [NonRec tagged_binder rhs'])
777  where
778    (body_usage', tagged_binder) = tagNonRecBinder lvl body_usage binder
779    mb_join_arity = willBeJoinId_maybe tagged_binder
780
781    (bndrs, body) = collectBinders rhs
782
783    (rhs_usage1, bndrs', body') = occAnalNonRecRhs env tagged_binder bndrs body
784    rhs' = mkLams (markJoinOneShots mb_join_arity bndrs') body'
785           -- For a /non-recursive/ join point we can mark all
786           -- its join-lambda as one-shot; and it's a good idea to do so
787
788    -- Unfoldings
789    -- See Note [Unfoldings and join points]
790    rhs_usage2 = case occAnalUnfolding env NonRecursive binder of
791                   Just unf_usage -> rhs_usage1 `andUDs` unf_usage
792                   Nothing        -> rhs_usage1
793
794    -- Rules
795    -- See Note [Rules are extra RHSs] and Note [Rule dependency info]
796    rules_w_uds = occAnalRules env mb_join_arity NonRecursive tagged_binder
797    rule_uds    = map (\(_, l, r) -> l `andUDs` r) rules_w_uds
798    rhs_usage3 = foldr andUDs rhs_usage2 rule_uds
799    rhs_usage4 = case lookupVarEnv imp_rule_edges binder of
800                   Nothing -> rhs_usage3
801                   Just vs -> addManyOccsSet rhs_usage3 vs
802       -- See Note [Preventing loops due to imported functions rules]
803
804    -- Final adjustment
805    rhs_usage' = adjustRhsUsage mb_join_arity NonRecursive bndrs' rhs_usage4
806
807-----------------
808occAnalRecBind :: OccEnv -> TopLevelFlag -> ImpRuleEdges -> [(Var,CoreExpr)]
809               -> UsageDetails -> (UsageDetails, [CoreBind])
810occAnalRecBind env lvl imp_rule_edges pairs body_usage
811  = foldr (occAnalRec env lvl) (body_usage, []) sccs
812        -- For a recursive group, we
813        --      * occ-analyse all the RHSs
814        --      * compute strongly-connected components
815        --      * feed those components to occAnalRec
816        -- See Note [Recursive bindings: the grand plan]
817  where
818    sccs :: [SCC Details]
819    sccs = {-# SCC "occAnalBind.scc" #-}
820           stronglyConnCompFromEdgedVerticesUniq nodes
821
822    nodes :: [LetrecNode]
823    nodes = {-# SCC "occAnalBind.assoc" #-}
824            map (makeNode env imp_rule_edges bndr_set) pairs
825
826    bndr_set = mkVarSet (map fst pairs)
827
828{-
829Note [Unfoldings and join points]
830~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
831
832We assume that anything in an unfolding occurs multiple times, since unfoldings
833are often copied (that's the whole point!). But we still need to track tail
834calls for the purpose of finding join points.
835-}
836
837-----------------------------
838occAnalRec :: OccEnv -> TopLevelFlag
839           -> SCC Details
840           -> (UsageDetails, [CoreBind])
841           -> (UsageDetails, [CoreBind])
842
843        -- The NonRec case is just like a Let (NonRec ...) above
844occAnalRec _ lvl (AcyclicSCC (ND { nd_bndr = bndr, nd_rhs = rhs
845                                 , nd_uds = rhs_uds, nd_rhs_bndrs = rhs_bndrs }))
846           (body_uds, binds)
847  | not (bndr `usedIn` body_uds)
848  = (body_uds, binds)           -- See Note [Dead code]
849
850  | otherwise                   -- It's mentioned in the body
851  = (body_uds' `andUDs` rhs_uds',
852     NonRec tagged_bndr rhs : binds)
853  where
854    (body_uds', tagged_bndr) = tagNonRecBinder lvl body_uds bndr
855    rhs_uds' = adjustRhsUsage (willBeJoinId_maybe tagged_bndr) NonRecursive
856                              rhs_bndrs rhs_uds
857
858        -- The Rec case is the interesting one
859        -- See Note [Recursive bindings: the grand plan]
860        -- See Note [Loop breaking]
861occAnalRec env lvl (CyclicSCC details_s) (body_uds, binds)
862  | not (any (`usedIn` body_uds) bndrs) -- NB: look at body_uds, not total_uds
863  = (body_uds, binds)                   -- See Note [Dead code]
864
865  | otherwise   -- At this point we always build a single Rec
866  = -- pprTrace "occAnalRec" (vcat
867    --  [ text "weak_fvs" <+> ppr weak_fvs
868    --  , text "lb nodes" <+> ppr loop_breaker_nodes])
869    (final_uds, Rec pairs : binds)
870
871  where
872    bndrs    = map nd_bndr details_s
873    bndr_set = mkVarSet bndrs
874
875    ------------------------------
876        -- See Note [Choosing loop breakers] for loop_breaker_nodes
877    final_uds :: UsageDetails
878    loop_breaker_nodes :: [LetrecNode]
879    (final_uds, loop_breaker_nodes)
880      = mkLoopBreakerNodes env lvl bndr_set body_uds details_s
881
882    ------------------------------
883    weak_fvs :: VarSet
884    weak_fvs = mapUnionVarSet nd_weak details_s
885
886    ---------------------------
887    -- Now reconstruct the cycle
888    pairs :: [(Id,CoreExpr)]
889    pairs | isEmptyVarSet weak_fvs = reOrderNodes   0 bndr_set weak_fvs loop_breaker_nodes []
890          | otherwise              = loopBreakNodes 0 bndr_set weak_fvs loop_breaker_nodes []
891          -- If weak_fvs is empty, the loop_breaker_nodes will include
892          -- all the edges in the original scope edges [remember,
893          -- weak_fvs is the difference between scope edges and
894          -- lb-edges], so a fresh SCC computation would yield a
895          -- single CyclicSCC result; and reOrderNodes deals with
896          -- exactly that case
897
898
899------------------------------------------------------------------
900--                 Loop breaking
901------------------------------------------------------------------
902
903type Binding = (Id,CoreExpr)
904
905loopBreakNodes :: Int
906               -> VarSet        -- All binders
907               -> VarSet        -- Binders whose dependencies may be "missing"
908                                -- See Note [Weak loop breakers]
909               -> [LetrecNode]
910               -> [Binding]             -- Append these to the end
911               -> [Binding]
912{-
913loopBreakNodes is applied to the list of nodes for a cyclic strongly
914connected component (there's guaranteed to be a cycle).  It returns
915the same nodes, but
916        a) in a better order,
917        b) with some of the Ids having a IAmALoopBreaker pragma
918
919The "loop-breaker" Ids are sufficient to break all cycles in the SCC.  This means
920that the simplifier can guarantee not to loop provided it never records an inlining
921for these no-inline guys.
922
923Furthermore, the order of the binds is such that if we neglect dependencies
924on the no-inline Ids then the binds are topologically sorted.  This means
925that the simplifier will generally do a good job if it works from top bottom,
926recording inlinings for any Ids which aren't marked as "no-inline" as it goes.
927-}
928
929-- Return the bindings sorted into a plausible order, and marked with loop breakers.
930loopBreakNodes depth bndr_set weak_fvs nodes binds
931  = -- pprTrace "loopBreakNodes" (ppr nodes) $
932    go (stronglyConnCompFromEdgedVerticesUniqR nodes) binds
933  where
934    go []         binds = binds
935    go (scc:sccs) binds = loop_break_scc scc (go sccs binds)
936
937    loop_break_scc scc binds
938      = case scc of
939          AcyclicSCC node  -> mk_non_loop_breaker weak_fvs node : binds
940          CyclicSCC nodes  -> reOrderNodes depth bndr_set weak_fvs nodes binds
941
942----------------------------------
943reOrderNodes :: Int -> VarSet -> VarSet -> [LetrecNode] -> [Binding] -> [Binding]
944    -- Choose a loop breaker, mark it no-inline,
945    -- and call loopBreakNodes on the rest
946reOrderNodes _ _ _ []     _     = panic "reOrderNodes"
947reOrderNodes _ _ _ [node] binds = mk_loop_breaker node : binds
948reOrderNodes depth bndr_set weak_fvs (node : nodes) binds
949  = -- pprTrace "reOrderNodes" (vcat [ text "unchosen" <+> ppr unchosen
950    --                              , text "chosen" <+> ppr chosen_nodes ]) $
951    loopBreakNodes new_depth bndr_set weak_fvs unchosen $
952    (map mk_loop_breaker chosen_nodes ++ binds)
953  where
954    (chosen_nodes, unchosen) = chooseLoopBreaker approximate_lb
955                                                 (nd_score (node_payload node))
956                                                 [node] [] nodes
957
958    approximate_lb = depth >= 2
959    new_depth | approximate_lb = 0
960              | otherwise      = depth+1
961        -- After two iterations (d=0, d=1) give up
962        -- and approximate, returning to d=0
963
964mk_loop_breaker :: LetrecNode -> Binding
965mk_loop_breaker (node_payload -> ND { nd_bndr = bndr, nd_rhs = rhs})
966  = (bndr `setIdOccInfo` strongLoopBreaker { occ_tail = tail_info }, rhs)
967  where
968    tail_info = tailCallInfo (idOccInfo bndr)
969
970mk_non_loop_breaker :: VarSet -> LetrecNode -> Binding
971-- See Note [Weak loop breakers]
972mk_non_loop_breaker weak_fvs (node_payload -> ND { nd_bndr = bndr
973                                                 , nd_rhs = rhs})
974  | bndr `elemVarSet` weak_fvs = (setIdOccInfo bndr occ', rhs)
975  | otherwise                  = (bndr, rhs)
976  where
977    occ' = weakLoopBreaker { occ_tail = tail_info }
978    tail_info = tailCallInfo (idOccInfo bndr)
979
980----------------------------------
981chooseLoopBreaker :: Bool             -- True <=> Too many iterations,
982                                      --          so approximate
983                  -> NodeScore            -- Best score so far
984                  -> [LetrecNode]       -- Nodes with this score
985                  -> [LetrecNode]       -- Nodes with higher scores
986                  -> [LetrecNode]       -- Unprocessed nodes
987                  -> ([LetrecNode], [LetrecNode])
988    -- This loop looks for the bind with the lowest score
989    -- to pick as the loop  breaker.  The rest accumulate in
990chooseLoopBreaker _ _ loop_nodes acc []
991  = (loop_nodes, acc)        -- Done
992
993    -- If approximate_loop_breaker is True, we pick *all*
994    -- nodes with lowest score, else just one
995    -- See Note [Complexity of loop breaking]
996chooseLoopBreaker approx_lb loop_sc loop_nodes acc (node : nodes)
997  | approx_lb
998  , rank sc == rank loop_sc
999  = chooseLoopBreaker approx_lb loop_sc (node : loop_nodes) acc nodes
1000
1001  | sc `betterLB` loop_sc  -- Better score so pick this new one
1002  = chooseLoopBreaker approx_lb sc [node] (loop_nodes ++ acc) nodes
1003
1004  | otherwise              -- Worse score so don't pick it
1005  = chooseLoopBreaker approx_lb loop_sc loop_nodes (node : acc) nodes
1006  where
1007    sc = nd_score (node_payload node)
1008
1009{-
1010Note [Complexity of loop breaking]
1011~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1012The loop-breaking algorithm knocks out one binder at a time, and
1013performs a new SCC analysis on the remaining binders.  That can
1014behave very badly in tightly-coupled groups of bindings; in the
1015worst case it can be (N**2)*log N, because it does a full SCC
1016on N, then N-1, then N-2 and so on.
1017
1018To avoid this, we switch plans after 2 (or whatever) attempts:
1019  Plan A: pick one binder with the lowest score, make it
1020          a loop breaker, and try again
1021  Plan B: pick *all* binders with the lowest score, make them
1022          all loop breakers, and try again
1023Since there are only a small finite number of scores, this will
1024terminate in a constant number of iterations, rather than O(N)
1025iterations.
1026
1027You might thing that it's very unlikely, but RULES make it much
1028more likely.  Here's a real example from #1969:
1029  Rec { $dm = \d.\x. op d
1030        {-# RULES forall d. $dm Int d  = $s$dm1
1031                  forall d. $dm Bool d = $s$dm2 #-}
1032
1033        dInt = MkD .... opInt ...
1034        dInt = MkD .... opBool ...
1035        opInt  = $dm dInt
1036        opBool = $dm dBool
1037
1038        $s$dm1 = \x. op dInt
1039        $s$dm2 = \x. op dBool }
1040The RULES stuff means that we can't choose $dm as a loop breaker
1041(Note [Choosing loop breakers]), so we must choose at least (say)
1042opInt *and* opBool, and so on.  The number of loop breakders is
1043linear in the number of instance declarations.
1044
1045Note [Loop breakers and INLINE/INLINABLE pragmas]
1046~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1047Avoid choosing a function with an INLINE pramga as the loop breaker!
1048If such a function is mutually-recursive with a non-INLINE thing,
1049then the latter should be the loop-breaker.
1050
1051It's vital to distinguish between INLINE and INLINABLE (the
1052Bool returned by hasStableCoreUnfolding_maybe).  If we start with
1053   Rec { {-# INLINABLE f #-}
1054         f x = ...f... }
1055and then worker/wrapper it through strictness analysis, we'll get
1056   Rec { {-# INLINABLE $wf #-}
1057         $wf p q = let x = (p,q) in ...f...
1058
1059         {-# INLINE f #-}
1060         f x = case x of (p,q) -> $wf p q }
1061
1062Now it is vital that we choose $wf as the loop breaker, so we can
1063inline 'f' in '$wf'.
1064
1065Note [DFuns should not be loop breakers]
1066~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1067It's particularly bad to make a DFun into a loop breaker.  See
1068Note [How instance declarations are translated] in TcInstDcls
1069
1070We give DFuns a higher score than ordinary CONLIKE things because
1071if there's a choice we want the DFun to be the non-loop breaker. Eg
1072
1073rec { sc = /\ a \$dC. $fBWrap (T a) ($fCT @ a $dC)
1074
1075      $fCT :: forall a_afE. (Roman.C a_afE) => Roman.C (Roman.T a_afE)
1076      {-# DFUN #-}
1077      $fCT = /\a \$dC. MkD (T a) ((sc @ a $dC) |> blah) ($ctoF @ a $dC)
1078    }
1079
1080Here 'sc' (the superclass) looks CONLIKE, but we'll never get to it
1081if we can't unravel the DFun first.
1082
1083Note [Constructor applications]
1084~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1085It's really really important to inline dictionaries.  Real
1086example (the Enum Ordering instance from GHC.Base):
1087
1088     rec     f = \ x -> case d of (p,q,r) -> p x
1089             g = \ x -> case d of (p,q,r) -> q x
1090             d = (v, f, g)
1091
1092Here, f and g occur just once; but we can't inline them into d.
1093On the other hand we *could* simplify those case expressions if
1094we didn't stupidly choose d as the loop breaker.
1095But we won't because constructor args are marked "Many".
1096Inlining dictionaries is really essential to unravelling
1097the loops in static numeric dictionaries, see GHC.Float.
1098
1099Note [Closure conversion]
1100~~~~~~~~~~~~~~~~~~~~~~~~~
1101We treat (\x. C p q) as a high-score candidate in the letrec scoring algorithm.
1102The immediate motivation came from the result of a closure-conversion transformation
1103which generated code like this:
1104
1105    data Clo a b = forall c. Clo (c -> a -> b) c
1106
1107    ($:) :: Clo a b -> a -> b
1108    Clo f env $: x = f env x
1109
1110    rec { plus = Clo plus1 ()
1111
1112        ; plus1 _ n = Clo plus2 n
1113
1114        ; plus2 Zero     n = n
1115        ; plus2 (Succ m) n = Succ (plus $: m $: n) }
1116
1117If we inline 'plus' and 'plus1', everything unravels nicely.  But if
1118we choose 'plus1' as the loop breaker (which is entirely possible
1119otherwise), the loop does not unravel nicely.
1120
1121
1122@occAnalUnfolding@ deals with the question of bindings where the Id is marked
1123by an INLINE pragma.  For these we record that anything which occurs
1124in its RHS occurs many times.  This pessimistically assumes that this
1125inlined binder also occurs many times in its scope, but if it doesn't
1126we'll catch it next time round.  At worst this costs an extra simplifier pass.
1127ToDo: try using the occurrence info for the inline'd binder.
1128
1129[March 97] We do the same for atomic RHSs.  Reason: see notes with loopBreakSCC.
1130[June 98, SLPJ]  I've undone this change; I don't understand it.  See notes with loopBreakSCC.
1131
1132
1133************************************************************************
1134*                                                                      *
1135                   Making nodes
1136*                                                                      *
1137************************************************************************
1138-}
1139
1140type ImpRuleEdges = IdEnv IdSet     -- Mapping from FVs of imported RULE LHSs to RHS FVs
1141
1142noImpRuleEdges :: ImpRuleEdges
1143noImpRuleEdges = emptyVarEnv
1144
1145type LetrecNode = Node Unique Details  -- Node comes from Digraph
1146                                       -- The Unique key is gotten from the Id
1147data Details
1148  = ND { nd_bndr :: Id          -- Binder
1149       , nd_rhs  :: CoreExpr    -- RHS, already occ-analysed
1150       , nd_rhs_bndrs :: [CoreBndr] -- Outer lambdas of RHS
1151                                    -- INVARIANT: (nd_rhs_bndrs nd, _) ==
1152                                    --              collectBinders (nd_rhs nd)
1153
1154       , nd_uds  :: UsageDetails  -- Usage from RHS, and RULES, and stable unfoldings
1155                                  -- ignoring phase (ie assuming all are active)
1156                                  -- See Note [Forming Rec groups]
1157
1158       , nd_inl  :: IdSet       -- Free variables of
1159                                --   the stable unfolding (if present and active)
1160                                --   or the RHS (if not)
1161                                -- but excluding any RULES
1162                                -- This is the IdSet that may be used if the Id is inlined
1163
1164       , nd_weak :: IdSet       -- Binders of this Rec that are mentioned in nd_uds
1165                                -- but are *not* in nd_inl.  These are the ones whose
1166                                -- dependencies might not be respected by loop_breaker_nodes
1167                                -- See Note [Weak loop breakers]
1168
1169       , nd_active_rule_fvs :: IdSet   -- Free variables of the RHS of active RULES
1170
1171       , nd_score :: NodeScore
1172  }
1173
1174instance Outputable Details where
1175   ppr nd = text "ND" <> braces
1176             (sep [ text "bndr =" <+> ppr (nd_bndr nd)
1177                  , text "uds =" <+> ppr (nd_uds nd)
1178                  , text "inl =" <+> ppr (nd_inl nd)
1179                  , text "weak =" <+> ppr (nd_weak nd)
1180                  , text "rule =" <+> ppr (nd_active_rule_fvs nd)
1181                  , text "score =" <+> ppr (nd_score nd)
1182             ])
1183
1184-- The NodeScore is compared lexicographically;
1185--      e.g. lower rank wins regardless of size
1186type NodeScore = ( Int     -- Rank: lower => more likely to be picked as loop breaker
1187                 , Int     -- Size of rhs: higher => more likely to be picked as LB
1188                           -- Maxes out at maxExprSize; we just use it to prioritise
1189                           -- small functions
1190                 , Bool )  -- Was it a loop breaker before?
1191                           -- True => more likely to be picked
1192                           -- Note [Loop breakers, node scoring, and stability]
1193
1194rank :: NodeScore -> Int
1195rank (r, _, _) = r
1196
1197makeNode :: OccEnv -> ImpRuleEdges -> VarSet
1198         -> (Var, CoreExpr) -> LetrecNode
1199-- See Note [Recursive bindings: the grand plan]
1200makeNode env imp_rule_edges bndr_set (bndr, rhs)
1201  = DigraphNode details (varUnique bndr) (nonDetKeysUniqSet node_fvs)
1202    -- It's OK to use nonDetKeysUniqSet here as stronglyConnCompFromEdgedVerticesR
1203    -- is still deterministic with edges in nondeterministic order as
1204    -- explained in Note [Deterministic SCC] in Digraph.
1205  where
1206    details = ND { nd_bndr            = bndr
1207                 , nd_rhs             = rhs'
1208                 , nd_rhs_bndrs       = bndrs'
1209                 , nd_uds             = rhs_usage3
1210                 , nd_inl             = inl_fvs
1211                 , nd_weak            = node_fvs `minusVarSet` inl_fvs
1212                 , nd_active_rule_fvs = active_rule_fvs
1213                 , nd_score           = pprPanic "makeNodeDetails" (ppr bndr) }
1214
1215    -- Constructing the edges for the main Rec computation
1216    -- See Note [Forming Rec groups]
1217    (bndrs, body) = collectBinders rhs
1218    (rhs_usage1, bndrs', body') = occAnalRecRhs env bndrs body
1219    rhs' = mkLams bndrs' body'
1220    rhs_usage2 = foldr andUDs rhs_usage1 rule_uds
1221                   -- Note [Rules are extra RHSs]
1222                   -- Note [Rule dependency info]
1223    rhs_usage3 = case mb_unf_uds of
1224                   Just unf_uds -> rhs_usage2 `andUDs` unf_uds
1225                   Nothing      -> rhs_usage2
1226    node_fvs = udFreeVars bndr_set rhs_usage3
1227
1228    -- Finding the free variables of the rules
1229    is_active = occ_rule_act env :: Activation -> Bool
1230
1231    rules_w_uds :: [(CoreRule, UsageDetails, UsageDetails)]
1232    rules_w_uds = occAnalRules env (Just (length bndrs)) Recursive bndr
1233
1234    rules_w_rhs_fvs :: [(Activation, VarSet)]    -- Find the RHS fvs
1235    rules_w_rhs_fvs = maybe id (\ids -> ((AlwaysActive, ids):))
1236                               (lookupVarEnv imp_rule_edges bndr)
1237      -- See Note [Preventing loops due to imported functions rules]
1238                      [ (ru_act rule, udFreeVars bndr_set rhs_uds)
1239                      | (rule, _, rhs_uds) <- rules_w_uds ]
1240    rule_uds = map (\(_, l, r) -> l `andUDs` r) rules_w_uds
1241    active_rule_fvs = unionVarSets [fvs | (a,fvs) <- rules_w_rhs_fvs
1242                                        , is_active a]
1243
1244    -- Finding the usage details of the INLINE pragma (if any)
1245    mb_unf_uds = occAnalUnfolding env Recursive bndr
1246
1247    -- Find the "nd_inl" free vars; for the loop-breaker phase
1248    inl_fvs = case mb_unf_uds of
1249                Nothing -> udFreeVars bndr_set rhs_usage1 -- No INLINE, use RHS
1250                Just unf_uds -> udFreeVars bndr_set unf_uds
1251                      -- We could check for an *active* INLINE (returning
1252                      -- emptyVarSet for an inactive one), but is_active
1253                      -- isn't the right thing (it tells about
1254                      -- RULE activation), so we'd need more plumbing
1255
1256mkLoopBreakerNodes :: OccEnv -> TopLevelFlag
1257                   -> VarSet
1258                   -> UsageDetails   -- for BODY of let
1259                   -> [Details]
1260                   -> (UsageDetails, -- adjusted
1261                       [LetrecNode])
1262-- Does four things
1263--   a) tag each binder with its occurrence info
1264--   b) add a NodeScore to each node
1265--   c) make a Node with the right dependency edges for
1266--      the loop-breaker SCC analysis
1267--   d) adjust each RHS's usage details according to
1268--      the binder's (new) shotness and join-point-hood
1269mkLoopBreakerNodes env lvl bndr_set body_uds details_s
1270  = (final_uds, zipWith mk_lb_node details_s bndrs')
1271  where
1272    (final_uds, bndrs') = tagRecBinders lvl body_uds
1273                            [ ((nd_bndr nd)
1274                               ,(nd_uds nd)
1275                               ,(nd_rhs_bndrs nd))
1276                            | nd <- details_s ]
1277    mk_lb_node nd@(ND { nd_bndr = bndr, nd_rhs = rhs, nd_inl = inl_fvs }) bndr'
1278      = DigraphNode nd' (varUnique bndr) (nonDetKeysUniqSet lb_deps)
1279              -- It's OK to use nonDetKeysUniqSet here as
1280              -- stronglyConnCompFromEdgedVerticesR is still deterministic with edges
1281              -- in nondeterministic order as explained in
1282              -- Note [Deterministic SCC] in Digraph.
1283      where
1284        nd'     = nd { nd_bndr = bndr', nd_score = score }
1285        score   = nodeScore env bndr bndr' rhs lb_deps
1286        lb_deps = extendFvs_ rule_fv_env inl_fvs
1287
1288    rule_fv_env :: IdEnv IdSet
1289        -- Maps a variable f to the variables from this group
1290        --      mentioned in RHS of active rules for f
1291        -- Domain is *subset* of bound vars (others have no rule fvs)
1292    rule_fv_env = transClosureFV (mkVarEnv init_rule_fvs)
1293    init_rule_fvs   -- See Note [Finding rule RHS free vars]
1294      = [ (b, trimmed_rule_fvs)
1295        | ND { nd_bndr = b, nd_active_rule_fvs = rule_fvs } <- details_s
1296        , let trimmed_rule_fvs = rule_fvs `intersectVarSet` bndr_set
1297        , not (isEmptyVarSet trimmed_rule_fvs) ]
1298
1299
1300------------------------------------------
1301nodeScore :: OccEnv
1302          -> Id        -- Binder has old occ-info (just for loop-breaker-ness)
1303          -> Id        -- Binder with new occ-info
1304          -> CoreExpr  -- RHS
1305          -> VarSet    -- Loop-breaker dependencies
1306          -> NodeScore
1307nodeScore env old_bndr new_bndr bind_rhs lb_deps
1308  | not (isId old_bndr)     -- A type or cercion variable is never a loop breaker
1309  = (100, 0, False)
1310
1311  | old_bndr `elemVarSet` lb_deps  -- Self-recursive things are great loop breakers
1312  = (0, 0, True)                   -- See Note [Self-recursion and loop breakers]
1313
1314  | not (occ_unf_act env old_bndr) -- A binder whose inlining is inactive (e.g. has
1315  = (0, 0, True)                   -- a NOINLINE pragma) makes a great loop breaker
1316
1317  | exprIsTrivial rhs
1318  = mk_score 10  -- Practically certain to be inlined
1319    -- Used to have also: && not (isExportedId bndr)
1320    -- But I found this sometimes cost an extra iteration when we have
1321    --      rec { d = (a,b); a = ...df...; b = ...df...; df = d }
1322    -- where df is the exported dictionary. Then df makes a really
1323    -- bad choice for loop breaker
1324
1325  | DFunUnfolding { df_args = args } <- id_unfolding
1326    -- Never choose a DFun as a loop breaker
1327    -- Note [DFuns should not be loop breakers]
1328  = (9, length args, is_lb)
1329
1330    -- Data structures are more important than INLINE pragmas
1331    -- so that dictionary/method recursion unravels
1332
1333  | CoreUnfolding { uf_guidance = UnfWhen {} } <- id_unfolding
1334  = mk_score 6
1335
1336  | is_con_app rhs   -- Data types help with cases:
1337  = mk_score 5       -- Note [Constructor applications]
1338
1339  | isStableUnfolding id_unfolding
1340  , can_unfold
1341  = mk_score 3
1342
1343  | isOneOcc (idOccInfo new_bndr)
1344  = mk_score 2  -- Likely to be inlined
1345
1346  | can_unfold  -- The Id has some kind of unfolding
1347  = mk_score 1
1348
1349  | otherwise
1350  = (0, 0, is_lb)
1351
1352  where
1353    mk_score :: Int -> NodeScore
1354    mk_score rank = (rank, rhs_size, is_lb)
1355
1356    is_lb    = isStrongLoopBreaker (idOccInfo old_bndr)
1357    rhs      = case id_unfolding of
1358                 CoreUnfolding { uf_src = src, uf_tmpl = unf_rhs }
1359                    | isStableSource src
1360                    -> unf_rhs
1361                 _  -> bind_rhs
1362       -- 'bind_rhs' is irrelevant for inlining things with a stable unfolding
1363    rhs_size = case id_unfolding of
1364                 CoreUnfolding { uf_guidance = guidance }
1365                    | UnfIfGoodArgs { ug_size = size } <- guidance
1366                    -> size
1367                 _  -> cheapExprSize rhs
1368
1369    can_unfold   = canUnfold id_unfolding
1370    id_unfolding = realIdUnfolding old_bndr
1371       -- realIdUnfolding: Ignore loop-breaker-ness here because
1372       -- that is what we are setting!
1373
1374        -- Checking for a constructor application
1375        -- Cheap and cheerful; the simplifier moves casts out of the way
1376        -- The lambda case is important to spot x = /\a. C (f a)
1377        -- which comes up when C is a dictionary constructor and
1378        -- f is a default method.
1379        -- Example: the instance for Show (ST s a) in GHC.ST
1380        --
1381        -- However we *also* treat (\x. C p q) as a con-app-like thing,
1382        --      Note [Closure conversion]
1383    is_con_app (Var v)    = isConLikeId v
1384    is_con_app (App f _)  = is_con_app f
1385    is_con_app (Lam _ e)  = is_con_app e
1386    is_con_app (Tick _ e) = is_con_app e
1387    is_con_app _          = False
1388
1389maxExprSize :: Int
1390maxExprSize = 20  -- Rather arbitrary
1391
1392cheapExprSize :: CoreExpr -> Int
1393-- Maxes out at maxExprSize
1394cheapExprSize e
1395  = go 0 e
1396  where
1397    go n e | n >= maxExprSize = n
1398           | otherwise        = go1 n e
1399
1400    go1 n (Var {})        = n+1
1401    go1 n (Lit {})        = n+1
1402    go1 n (Type {})       = n
1403    go1 n (Coercion {})   = n
1404    go1 n (Tick _ e)      = go1 n e
1405    go1 n (Cast e _)      = go1 n e
1406    go1 n (App f a)       = go (go1 n f) a
1407    go1 n (Lam b e)
1408      | isTyVar b         = go1 n e
1409      | otherwise         = go (n+1) e
1410    go1 n (Let b e)       = gos (go1 n e) (rhssOfBind b)
1411    go1 n (Case e _ _ as) = gos (go1 n e) (rhssOfAlts as)
1412
1413    gos n [] = n
1414    gos n (e:es) | n >= maxExprSize = n
1415                 | otherwise        = gos (go1 n e) es
1416
1417betterLB :: NodeScore -> NodeScore -> Bool
1418-- If  n1 `betterLB` n2  then choose n1 as the loop breaker
1419betterLB (rank1, size1, lb1) (rank2, size2, _)
1420  | rank1 < rank2 = True
1421  | rank1 > rank2 = False
1422  | size1 < size2 = False   -- Make the bigger n2 into the loop breaker
1423  | size1 > size2 = True
1424  | lb1           = True    -- Tie-break: if n1 was a loop breaker before, choose it
1425  | otherwise     = False   -- See Note [Loop breakers, node scoring, and stability]
1426
1427{- Note [Self-recursion and loop breakers]
1428~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1429If we have
1430   rec { f = ...f...g...
1431       ; g = .....f...   }
1432then 'f' has to be a loop breaker anyway, so we may as well choose it
1433right away, so that g can inline freely.
1434
1435This is really just a cheap hack. Consider
1436   rec { f = ...g...
1437       ; g = ..f..h...
1438      ;  h = ...f....}
1439Here f or g are better loop breakers than h; but we might accidentally
1440choose h.  Finding the minimal set of loop breakers is hard.
1441
1442Note [Loop breakers, node scoring, and stability]
1443~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1444To choose a loop breaker, we give a NodeScore to each node in the SCC,
1445and pick the one with the best score (according to 'betterLB').
1446
1447We need to be jolly careful (#12425, #12234) about the stability
1448of this choice. Suppose we have
1449
1450    let rec { f = ...g...g...
1451            ; g = ...f...f... }
1452    in
1453    case x of
1454      True  -> ...f..
1455      False -> ..f...
1456
1457In each iteration of the simplifier the occurrence analyser OccAnal
1458chooses a loop breaker. Suppose in iteration 1 it choose g as the loop
1459breaker. That means it is free to inline f.
1460
1461Suppose that GHC decides to inline f in the branches of the case, but
1462(for some reason; eg it is not saturated) in the rhs of g. So we get
1463
1464    let rec { f = ...g...g...
1465            ; g = ...f...f... }
1466    in
1467    case x of
1468      True  -> ...g...g.....
1469      False -> ..g..g....
1470
1471Now suppose that, for some reason, in the next iteration the occurrence
1472analyser chooses f as the loop breaker, so it can freely inline g. And
1473again for some reason the simplifier inlines g at its calls in the case
1474branches, but not in the RHS of f. Then we get
1475
1476    let rec { f = ...g...g...
1477            ; g = ...f...f... }
1478    in
1479    case x of
1480      True  -> ...(...f...f...)...(...f..f..).....
1481      False -> ..(...f...f...)...(..f..f...)....
1482
1483You can see where this is going! Each iteration of the simplifier
1484doubles the number of calls to f or g. No wonder GHC is slow!
1485
1486(In the particular example in comment:3 of #12425, f and g are the two
1487mutually recursive fmap instances for CondT and Result. They are both
1488marked INLINE which, oddly, is why they don't inline in each other's
1489RHS, because the call there is not saturated.)
1490
1491The root cause is that we flip-flop on our choice of loop breaker. I
1492always thought it didn't matter, and indeed for any single iteration
1493to terminate, it doesn't matter. But when we iterate, it matters a
1494lot!!
1495
1496So The Plan is this:
1497   If there is a tie, choose the node that
1498   was a loop breaker last time round
1499
1500Hence the is_lb field of NodeScore
1501
1502************************************************************************
1503*                                                                      *
1504                   Right hand sides
1505*                                                                      *
1506************************************************************************
1507-}
1508
1509occAnalRhs :: OccEnv -> RecFlag -> Id -> [CoreBndr] -> CoreExpr
1510           -> (UsageDetails, [CoreBndr], CoreExpr)
1511              -- Returned usage details covers only the RHS,
1512              -- and *not* the RULE or INLINE template for the Id
1513occAnalRhs env Recursive _ bndrs body
1514  = occAnalRecRhs env bndrs body
1515occAnalRhs env NonRecursive id bndrs body
1516  = occAnalNonRecRhs env id bndrs body
1517
1518occAnalRecRhs :: OccEnv -> [CoreBndr] -> CoreExpr    -- Rhs lambdas, body
1519           -> (UsageDetails, [CoreBndr], CoreExpr)
1520              -- Returned usage details covers only the RHS,
1521              -- and *not* the RULE or INLINE template for the Id
1522occAnalRecRhs env bndrs body = occAnalLamOrRhs (rhsCtxt env) bndrs body
1523
1524occAnalNonRecRhs :: OccEnv
1525                 -> Id -> [CoreBndr] -> CoreExpr    -- Binder; rhs lams, body
1526                     -- Binder is already tagged with occurrence info
1527                 -> (UsageDetails, [CoreBndr], CoreExpr)
1528              -- Returned usage details covers only the RHS,
1529              -- and *not* the RULE or INLINE template for the Id
1530occAnalNonRecRhs env bndr bndrs body
1531  = occAnalLamOrRhs rhs_env bndrs body
1532  where
1533    env1 | is_join_point    = env  -- See Note [Join point RHSs]
1534         | certainly_inline = env  -- See Note [Cascading inlines]
1535         | otherwise        = rhsCtxt env
1536
1537    -- See Note [Sources of one-shot information]
1538    rhs_env = env1 { occ_one_shots = argOneShots dmd }
1539
1540    certainly_inline -- See Note [Cascading inlines]
1541      = case occ of
1542          OneOcc { occ_in_lam = in_lam, occ_n_br = n_br }
1543            -> not in_lam && n_br == 1 && active && not_stable
1544          _ -> False
1545
1546    is_join_point = isAlwaysTailCalled occ
1547    -- Like (isJoinId bndr) but happens one step earlier
1548    --  c.f. willBeJoinId_maybe
1549
1550    occ        = idOccInfo bndr
1551    dmd        = idDemandInfo bndr
1552    active     = isAlwaysActive (idInlineActivation bndr)
1553    not_stable = not (isStableUnfolding (idUnfolding bndr))
1554
1555occAnalUnfolding :: OccEnv
1556                 -> RecFlag
1557                 -> Id
1558                 -> Maybe UsageDetails
1559                      -- Just the analysis, not a new unfolding. The unfolding
1560                      -- got analysed when it was created and we don't need to
1561                      -- update it.
1562occAnalUnfolding env rec_flag id
1563  = case realIdUnfolding id of -- ignore previous loop-breaker flag
1564      CoreUnfolding { uf_tmpl = rhs, uf_src = src }
1565        | not (isStableSource src)
1566        -> Nothing
1567        | otherwise
1568        -> Just $ markAllMany usage
1569        where
1570          (bndrs, body) = collectBinders rhs
1571          (usage, _, _) = occAnalRhs env rec_flag id bndrs body
1572
1573      DFunUnfolding { df_bndrs = bndrs, df_args = args }
1574        -> Just $ zapDetails (delDetailsList usage bndrs)
1575        where
1576          usage = andUDsList (map (fst . occAnal env) args)
1577
1578      _ -> Nothing
1579
1580occAnalRules :: OccEnv
1581             -> Maybe JoinArity -- If the binder is (or MAY become) a join
1582                                -- point, what its join arity is (or WOULD
1583                                -- become). See Note [Rules and join points].
1584             -> RecFlag
1585             -> Id
1586             -> [(CoreRule,      -- Each (non-built-in) rule
1587                  UsageDetails,  -- Usage details for LHS
1588                  UsageDetails)] -- Usage details for RHS
1589occAnalRules env mb_expected_join_arity rec_flag id
1590  = [ (rule, lhs_uds, rhs_uds) | rule@Rule {} <- idCoreRules id
1591                               , let (lhs_uds, rhs_uds) = occ_anal_rule rule ]
1592  where
1593    occ_anal_rule (Rule { ru_bndrs = bndrs, ru_args = args, ru_rhs = rhs })
1594      = (lhs_uds, final_rhs_uds)
1595      where
1596        lhs_uds = addManyOccsSet emptyDetails $
1597                    (exprsFreeVars args `delVarSetList` bndrs)
1598        (rhs_bndrs, rhs_body) = collectBinders rhs
1599        (rhs_uds, _, _) = occAnalRhs env rec_flag id rhs_bndrs rhs_body
1600                            -- Note [Rules are extra RHSs]
1601                            -- Note [Rule dependency info]
1602        final_rhs_uds = adjust_tail_info args $ markAllMany $
1603                          (rhs_uds `delDetailsList` bndrs)
1604    occ_anal_rule _
1605      = (emptyDetails, emptyDetails)
1606
1607    adjust_tail_info args uds -- see Note [Rules and join points]
1608      = case mb_expected_join_arity of
1609          Just ar | args `lengthIs` ar -> uds
1610          _                            -> markAllNonTailCalled uds
1611{- Note [Join point RHSs]
1612~~~~~~~~~~~~~~~~~~~~~~~~~
1613Consider
1614   x = e
1615   join j = Just x
1616
1617We want to inline x into j right away, so we don't want to give
1618the join point a RhsCtxt (#14137).  It's not a huge deal, because
1619the FloatIn pass knows to float into join point RHSs; and the simplifier
1620does not float things out of join point RHSs.  But it's a simple, cheap
1621thing to do.  See #14137.
1622
1623Note [Cascading inlines]
1624~~~~~~~~~~~~~~~~~~~~~~~~
1625By default we use an rhsCtxt for the RHS of a binding.  This tells the
1626occ anal n that it's looking at an RHS, which has an effect in
1627occAnalApp.  In particular, for constructor applications, it makes
1628the arguments appear to have NoOccInfo, so that we don't inline into
1629them. Thus    x = f y
1630              k = Just x
1631we do not want to inline x.
1632
1633But there's a problem.  Consider
1634     x1 = a0 : []
1635     x2 = a1 : x1
1636     x3 = a2 : x2
1637     g  = f x3
1638First time round, it looks as if x1 and x2 occur as an arg of a
1639let-bound constructor ==> give them a many-occurrence.
1640But then x3 is inlined (unconditionally as it happens) and
1641next time round, x2 will be, and the next time round x1 will be
1642Result: multiple simplifier iterations.  Sigh.
1643
1644So, when analysing the RHS of x3 we notice that x3 will itself
1645definitely inline the next time round, and so we analyse x3's rhs in
1646an ordinary context, not rhsCtxt.  Hence the "certainly_inline" stuff.
1647
1648Annoyingly, we have to approximate SimplUtils.preInlineUnconditionally.
1649If (a) the RHS is expandable (see isExpandableApp in occAnalApp), and
1650   (b) certainly_inline says "yes" when preInlineUnconditionally says "no"
1651then the simplifier iterates indefinitely:
1652        x = f y
1653        k = Just x   -- We decide that k is 'certainly_inline'
1654        v = ...k...  -- but preInlineUnconditionally doesn't inline it
1655inline ==>
1656        k = Just (f y)
1657        v = ...k...
1658float ==>
1659        x1 = f y
1660        k = Just x1
1661        v = ...k...
1662
1663This is worse than the slow cascade, so we only want to say "certainly_inline"
1664if it really is certain.  Look at the note with preInlineUnconditionally
1665for the various clauses.
1666
1667
1668************************************************************************
1669*                                                                      *
1670                Expressions
1671*                                                                      *
1672************************************************************************
1673-}
1674
1675occAnal :: OccEnv
1676        -> CoreExpr
1677        -> (UsageDetails,       -- Gives info only about the "interesting" Ids
1678            CoreExpr)
1679
1680occAnal _   expr@(Type _) = (emptyDetails,         expr)
1681occAnal _   expr@(Lit _)  = (emptyDetails,         expr)
1682occAnal env expr@(Var _)  = occAnalApp env (expr, [], [])
1683    -- At one stage, I gathered the idRuleVars for the variable here too,
1684    -- which in a way is the right thing to do.
1685    -- But that went wrong right after specialisation, when
1686    -- the *occurrences* of the overloaded function didn't have any
1687    -- rules in them, so the *specialised* versions looked as if they
1688    -- weren't used at all.
1689
1690occAnal _ (Coercion co)
1691  = (addManyOccsSet emptyDetails (coVarsOfCo co), Coercion co)
1692        -- See Note [Gather occurrences of coercion variables]
1693
1694{-
1695Note [Gather occurrences of coercion variables]
1696~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1697We need to gather info about what coercion variables appear, so that
1698we can sort them into the right place when doing dependency analysis.
1699-}
1700
1701occAnal env (Tick tickish body)
1702  | SourceNote{} <- tickish
1703  = (usage, Tick tickish body')
1704                  -- SourceNotes are best-effort; so we just proceed as usual.
1705                  -- If we drop a tick due to the issues described below it's
1706                  -- not the end of the world.
1707
1708  | tickish `tickishScopesLike` SoftScope
1709  = (markAllNonTailCalled usage, Tick tickish body')
1710
1711  | Breakpoint _ ids <- tickish
1712  = (usage_lam `andUDs` foldr addManyOccs emptyDetails ids, Tick tickish body')
1713    -- never substitute for any of the Ids in a Breakpoint
1714
1715  | otherwise
1716  = (usage_lam, Tick tickish body')
1717  where
1718    !(usage,body') = occAnal env body
1719    -- for a non-soft tick scope, we can inline lambdas only
1720    usage_lam = markAllNonTailCalled (markAllInsideLam usage)
1721                  -- TODO There may be ways to make ticks and join points play
1722                  -- nicer together, but right now there are problems:
1723                  --   let j x = ... in tick<t> (j 1)
1724                  -- Making j a join point may cause the simplifier to drop t
1725                  -- (if the tick is put into the continuation). So we don't
1726                  -- count j 1 as a tail call.
1727                  -- See #14242.
1728
1729occAnal env (Cast expr co)
1730  = case occAnal env expr of { (usage, expr') ->
1731    let usage1 = zapDetailsIf (isRhsEnv env) usage
1732          -- usage1: if we see let x = y `cast` co
1733          -- then mark y as 'Many' so that we don't
1734          -- immediately inline y again.
1735        usage2 = addManyOccsSet usage1 (coVarsOfCo co)
1736          -- usage2: see Note [Gather occurrences of coercion variables]
1737    in (markAllNonTailCalled usage2, Cast expr' co)
1738    }
1739
1740occAnal env app@(App _ _)
1741  = occAnalApp env (collectArgsTicks tickishFloatable app)
1742
1743-- Ignore type variables altogether
1744--   (a) occurrences inside type lambdas only not marked as InsideLam
1745--   (b) type variables not in environment
1746
1747occAnal env (Lam x body)
1748  | isTyVar x
1749  = case occAnal env body of { (body_usage, body') ->
1750    (markAllNonTailCalled body_usage, Lam x body')
1751    }
1752
1753-- For value lambdas we do a special hack.  Consider
1754--      (\x. \y. ...x...)
1755-- If we did nothing, x is used inside the \y, so would be marked
1756-- as dangerous to dup.  But in the common case where the abstraction
1757-- is applied to two arguments this is over-pessimistic.
1758-- So instead, we just mark each binder with its occurrence
1759-- info in the *body* of the multiple lambda.
1760-- Then, the simplifier is careful when partially applying lambdas.
1761
1762occAnal env expr@(Lam _ _)
1763  = case occAnalLamOrRhs env binders body of { (usage, tagged_binders, body') ->
1764    let
1765        expr'       = mkLams tagged_binders body'
1766        usage1      = markAllNonTailCalled usage
1767        one_shot_gp = all isOneShotBndr tagged_binders
1768        final_usage | one_shot_gp = usage1
1769                    | otherwise   = markAllInsideLam usage1
1770    in
1771    (final_usage, expr') }
1772  where
1773    (binders, body) = collectBinders expr
1774
1775occAnal env (Case scrut bndr ty alts)
1776  = case occ_anal_scrut scrut alts     of { (scrut_usage, scrut') ->
1777    case mapAndUnzip occ_anal_alt alts of { (alts_usage_s, alts')   ->
1778    let
1779        alts_usage  = foldr orUDs emptyDetails alts_usage_s
1780        (alts_usage1, tagged_bndr) = tagLamBinder alts_usage bndr
1781        total_usage = markAllNonTailCalled scrut_usage `andUDs` alts_usage1
1782                        -- Alts can have tail calls, but the scrutinee can't
1783    in
1784    total_usage `seq` (total_usage, Case scrut' tagged_bndr ty alts') }}
1785  where
1786    alt_env = mkAltEnv env scrut bndr
1787    occ_anal_alt = occAnalAlt alt_env
1788
1789    occ_anal_scrut (Var v) (alt1 : other_alts)
1790        | not (null other_alts) || not (isDefaultAlt alt1)
1791        = (mkOneOcc env v True 0, Var v)
1792            -- The 'True' says that the variable occurs in an interesting
1793            -- context; the case has at least one non-default alternative
1794    occ_anal_scrut (Tick t e) alts
1795        | t `tickishScopesLike` SoftScope
1796          -- No reason to not look through all ticks here, but only
1797          -- for soft-scoped ticks we can do so without having to
1798          -- update returned occurance info (see occAnal)
1799        = second (Tick t) $ occ_anal_scrut e alts
1800
1801    occ_anal_scrut scrut _alts
1802        = occAnal (vanillaCtxt env) scrut    -- No need for rhsCtxt
1803
1804occAnal env (Let bind body)
1805  = case occAnal env body                of { (body_usage, body') ->
1806    case occAnalBind env NotTopLevel
1807                     noImpRuleEdges bind
1808                     body_usage          of { (final_usage, new_binds) ->
1809       (final_usage, mkLets new_binds body') }}
1810
1811occAnalArgs :: OccEnv -> [CoreExpr] -> [OneShots] -> (UsageDetails, [CoreExpr])
1812occAnalArgs _ [] _
1813  = (emptyDetails, [])
1814
1815occAnalArgs env (arg:args) one_shots
1816  | isTypeArg arg
1817  = case occAnalArgs env args one_shots of { (uds, args') ->
1818    (uds, arg:args') }
1819
1820  | otherwise
1821  = case argCtxt env one_shots           of { (arg_env, one_shots') ->
1822    case occAnal arg_env arg             of { (uds1, arg') ->
1823    case occAnalArgs env args one_shots' of { (uds2, args') ->
1824    (uds1 `andUDs` uds2, arg':args') }}}
1825
1826{-
1827Applications are dealt with specially because we want
1828the "build hack" to work.
1829
1830Note [Arguments of let-bound constructors]
1831~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1832Consider
1833    f x = let y = expensive x in
1834          let z = (True,y) in
1835          (case z of {(p,q)->q}, case z of {(p,q)->q})
1836We feel free to duplicate the WHNF (True,y), but that means
1837that y may be duplicated thereby.
1838
1839If we aren't careful we duplicate the (expensive x) call!
1840Constructors are rather like lambdas in this way.
1841-}
1842
1843occAnalApp :: OccEnv
1844           -> (Expr CoreBndr, [Arg CoreBndr], [Tickish Id])
1845           -> (UsageDetails, Expr CoreBndr)
1846occAnalApp env (Var fun, args, ticks)
1847  | null ticks = (uds, mkApps (Var fun) args')
1848  | otherwise  = (uds, mkTicks ticks $ mkApps (Var fun) args')
1849  where
1850    uds = fun_uds `andUDs` final_args_uds
1851
1852    !(args_uds, args') = occAnalArgs env args one_shots
1853    !final_args_uds
1854       | isRhsEnv env && is_exp = markAllNonTailCalled $
1855                                  markAllInsideLam args_uds
1856       | otherwise              = markAllNonTailCalled args_uds
1857       -- We mark the free vars of the argument of a constructor or PAP
1858       -- as "inside-lambda", if it is the RHS of a let(rec).
1859       -- This means that nothing gets inlined into a constructor or PAP
1860       -- argument position, which is what we want.  Typically those
1861       -- constructor arguments are just variables, or trivial expressions.
1862       -- We use inside-lam because it's like eta-expanding the PAP.
1863       --
1864       -- This is the *whole point* of the isRhsEnv predicate
1865       -- See Note [Arguments of let-bound constructors]
1866
1867    n_val_args = valArgCount args
1868    n_args     = length args
1869    fun_uds    = mkOneOcc env fun (n_val_args > 0) n_args
1870    is_exp     = isExpandableApp fun n_val_args
1871        -- See Note [CONLIKE pragma] in BasicTypes
1872        -- The definition of is_exp should match that in Simplify.prepareRhs
1873
1874    one_shots  = argsOneShots (idStrictness fun) guaranteed_val_args
1875    guaranteed_val_args = n_val_args + length (takeWhile isOneShotInfo
1876                                                         (occ_one_shots env))
1877        -- See Note [Sources of one-shot information], bullet point A']
1878
1879occAnalApp env (fun, args, ticks)
1880  = (markAllNonTailCalled (fun_uds `andUDs` args_uds),
1881     mkTicks ticks $ mkApps fun' args')
1882  where
1883    !(fun_uds, fun') = occAnal (addAppCtxt env args) fun
1884        -- The addAppCtxt is a bit cunning.  One iteration of the simplifier
1885        -- often leaves behind beta redexs like
1886        --      (\x y -> e) a1 a2
1887        -- Here we would like to mark x,y as one-shot, and treat the whole
1888        -- thing much like a let.  We do this by pushing some True items
1889        -- onto the context stack.
1890    !(args_uds, args') = occAnalArgs env args []
1891
1892zapDetailsIf :: Bool              -- If this is true
1893             -> UsageDetails      -- Then do zapDetails on this
1894             -> UsageDetails
1895zapDetailsIf True  uds = zapDetails uds
1896zapDetailsIf False uds = uds
1897
1898{-
1899Note [Sources of one-shot information]
1900~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1901The occurrence analyser obtains one-shot-lambda information from two sources:
1902
1903A:  Saturated applications:  eg   f e1 .. en
1904
1905    In general, given a call (f e1 .. en) we can propagate one-shot info from
1906    f's strictness signature into e1 .. en, but /only/ if n is enough to
1907    saturate the strictness signature. A strictness signature like
1908
1909          f :: C1(C1(L))LS
1910
1911    means that *if f is applied to three arguments* then it will guarantee to
1912    call its first argument at most once, and to call the result of that at
1913    most once. But if f has fewer than three arguments, all bets are off; e.g.
1914
1915          map (f (\x y. expensive) e2) xs
1916
1917    Here the \x y abstraction may be called many times (once for each element of
1918    xs) so we should not mark x and y as one-shot. But if it was
1919
1920          map (f (\x y. expensive) 3 2) xs
1921
1922    then the first argument of f will be called at most once.
1923
1924    The one-shot info, derived from f's strictness signature, is
1925    computed by 'argsOneShots', called in occAnalApp.
1926
1927A': Non-obviously saturated applications: eg    build (f (\x y -> expensive))
1928    where f is as above.
1929
1930    In this case, f is only manifestly applied to one argument, so it does not
1931    look saturated. So by the previous point, we should not use its strictness
1932    signature to learn about the one-shotness of \x y. But in this case we can:
1933    build is fully applied, so we may use its strictness signature; and from
1934    that we learn that build calls its argument with two arguments *at most once*.
1935
1936    So there is really only one call to f, and it will have three arguments. In
1937    that sense, f is saturated, and we may proceed as described above.
1938
1939    Hence the computation of 'guaranteed_val_args' in occAnalApp, using
1940    '(occ_one_shots env)'.  See also #13227, comment:9
1941
1942B:  Let-bindings:  eg   let f = \c. let ... in \n -> blah
1943                        in (build f, build f)
1944
1945    Propagate one-shot info from the demanand-info on 'f' to the
1946    lambdas in its RHS (which may not be syntactically at the top)
1947
1948    This information must have come from a previous run of the demanand
1949    analyser.
1950
1951Previously, the demand analyser would *also* set the one-shot information, but
1952that code was buggy (see #11770), so doing it only in on place, namely here, is
1953saner.
1954
1955Note [OneShots]
1956~~~~~~~~~~~~~~~
1957When analysing an expression, the occ_one_shots argument contains information
1958about how the function is being used. The length of the list indicates
1959how many arguments will eventually be passed to the analysed expression,
1960and the OneShotInfo indicates whether this application is once or multiple times.
1961
1962Example:
1963
1964 Context of f                occ_one_shots when analysing f
1965
1966 f 1 2                       [OneShot, OneShot]
1967 map (f 1)                   [OneShot, NoOneShotInfo]
1968 build f                     [OneShot, OneShot]
1969 f 1 2 `seq` f 2 1           [NoOneShotInfo, OneShot]
1970
1971Note [Binders in case alternatives]
1972~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1973Consider
1974    case x of y { (a,b) -> f y }
1975We treat 'a', 'b' as dead, because they don't physically occur in the
1976case alternative.  (Indeed, a variable is dead iff it doesn't occur in
1977its scope in the output of OccAnal.)  It really helps to know when
1978binders are unused.  See esp the call to isDeadBinder in
1979Simplify.mkDupableAlt
1980
1981In this example, though, the Simplifier will bring 'a' and 'b' back to
1982life, beause it binds 'y' to (a,b) (imagine got inlined and
1983scrutinised y).
1984-}
1985
1986occAnalLamOrRhs :: OccEnv -> [CoreBndr] -> CoreExpr
1987                -> (UsageDetails, [CoreBndr], CoreExpr)
1988occAnalLamOrRhs env [] body
1989  = case occAnal env body of (body_usage, body') -> (body_usage, [], body')
1990      -- RHS of thunk or nullary join point
1991occAnalLamOrRhs env (bndr:bndrs) body
1992  | isTyVar bndr
1993  = -- Important: Keep the environment so that we don't inline into an RHS like
1994    --   \(@ x) -> C @x (f @x)
1995    -- (see the beginning of Note [Cascading inlines]).
1996    case occAnalLamOrRhs env bndrs body of
1997      (body_usage, bndrs', body') -> (body_usage, bndr:bndrs', body')
1998occAnalLamOrRhs env binders body
1999  = case occAnal env_body body of { (body_usage, body') ->
2000    let
2001        (final_usage, tagged_binders) = tagLamBinders body_usage binders'
2002                      -- Use binders' to put one-shot info on the lambdas
2003    in
2004    (final_usage, tagged_binders, body') }
2005  where
2006    (env_body, binders') = oneShotGroup env binders
2007
2008occAnalAlt :: (OccEnv, Maybe (Id, CoreExpr))
2009           -> CoreAlt
2010           -> (UsageDetails, Alt IdWithOccInfo)
2011occAnalAlt (env, scrut_bind) (con, bndrs, rhs)
2012  = case occAnal env rhs of { (rhs_usage1, rhs1) ->
2013    let
2014      (alt_usg, tagged_bndrs) = tagLamBinders rhs_usage1 bndrs
2015                                -- See Note [Binders in case alternatives]
2016      (alt_usg', rhs2) = wrapAltRHS env scrut_bind alt_usg tagged_bndrs rhs1
2017    in
2018    (alt_usg', (con, tagged_bndrs, rhs2)) }
2019
2020wrapAltRHS :: OccEnv
2021           -> Maybe (Id, CoreExpr)      -- proxy mapping generated by mkAltEnv
2022           -> UsageDetails              -- usage for entire alt (p -> rhs)
2023           -> [Var]                     -- alt binders
2024           -> CoreExpr                  -- alt RHS
2025           -> (UsageDetails, CoreExpr)
2026wrapAltRHS env (Just (scrut_var, let_rhs)) alt_usg bndrs alt_rhs
2027  | occ_binder_swap env
2028  , scrut_var `usedIn` alt_usg -- bndrs are not be present in alt_usg so this
2029                               -- handles condition (a) in Note [Binder swap]
2030  , not captured               -- See condition (b) in Note [Binder swap]
2031  = ( alt_usg' `andUDs` let_rhs_usg
2032    , Let (NonRec tagged_scrut_var let_rhs') alt_rhs )
2033  where
2034    captured = any (`usedIn` let_rhs_usg) bndrs  -- Check condition (b)
2035
2036    -- The rhs of the let may include coercion variables
2037    -- if the scrutinee was a cast, so we must gather their
2038    -- usage. See Note [Gather occurrences of coercion variables]
2039    -- Moreover, the rhs of the let may mention the case-binder, and
2040    -- we want to gather its occ-info as well
2041    (let_rhs_usg, let_rhs') = occAnal env let_rhs
2042
2043    (alt_usg', tagged_scrut_var) = tagLamBinder alt_usg scrut_var
2044
2045wrapAltRHS _ _ alt_usg _ alt_rhs
2046  = (alt_usg, alt_rhs)
2047
2048{-
2049************************************************************************
2050*                                                                      *
2051                    OccEnv
2052*                                                                      *
2053************************************************************************
2054-}
2055
2056data OccEnv
2057  = OccEnv { occ_encl       :: !OccEncl      -- Enclosing context information
2058           , occ_one_shots  :: !OneShots     -- See Note [OneShots]
2059           , occ_gbl_scrut  :: GlobalScruts
2060
2061           , occ_unf_act   :: Id -> Bool   -- Which Id unfoldings are active
2062
2063           , occ_rule_act   :: Activation -> Bool   -- Which rules are active
2064             -- See Note [Finding rule RHS free vars]
2065
2066           , occ_binder_swap :: !Bool -- enable the binder_swap
2067             -- See CorePrep Note [Dead code in CorePrep]
2068    }
2069
2070type GlobalScruts = IdSet   -- See Note [Binder swap on GlobalId scrutinees]
2071
2072-----------------------------
2073-- OccEncl is used to control whether to inline into constructor arguments
2074-- For example:
2075--      x = (p,q)               -- Don't inline p or q
2076--      y = /\a -> (p a, q a)   -- Still don't inline p or q
2077--      z = f (p,q)             -- Do inline p,q; it may make a rule fire
2078-- So OccEncl tells enough about the context to know what to do when
2079-- we encounter a constructor application or PAP.
2080
2081data OccEncl
2082  = OccRhs              -- RHS of let(rec), albeit perhaps inside a type lambda
2083                        -- Don't inline into constructor args here
2084  | OccVanilla          -- Argument of function, body of lambda, scruintee of case etc.
2085                        -- Do inline into constructor args here
2086
2087instance Outputable OccEncl where
2088  ppr OccRhs     = text "occRhs"
2089  ppr OccVanilla = text "occVanilla"
2090
2091-- See note [OneShots]
2092type OneShots = [OneShotInfo]
2093
2094initOccEnv :: OccEnv
2095initOccEnv
2096  = OccEnv { occ_encl      = OccVanilla
2097           , occ_one_shots = []
2098           , occ_gbl_scrut = emptyVarSet
2099                 -- To be conservative, we say that all
2100                 -- inlines and rules are active
2101           , occ_unf_act   = \_ -> True
2102           , occ_rule_act  = \_ -> True
2103           , occ_binder_swap = True }
2104
2105vanillaCtxt :: OccEnv -> OccEnv
2106vanillaCtxt env = env { occ_encl = OccVanilla, occ_one_shots = [] }
2107
2108rhsCtxt :: OccEnv -> OccEnv
2109rhsCtxt env = env { occ_encl = OccRhs, occ_one_shots = [] }
2110
2111argCtxt :: OccEnv -> [OneShots] -> (OccEnv, [OneShots])
2112argCtxt env []
2113  = (env { occ_encl = OccVanilla, occ_one_shots = [] }, [])
2114argCtxt env (one_shots:one_shots_s)
2115  = (env { occ_encl = OccVanilla, occ_one_shots = one_shots }, one_shots_s)
2116
2117isRhsEnv :: OccEnv -> Bool
2118isRhsEnv (OccEnv { occ_encl = OccRhs })     = True
2119isRhsEnv (OccEnv { occ_encl = OccVanilla }) = False
2120
2121oneShotGroup :: OccEnv -> [CoreBndr]
2122             -> ( OccEnv
2123                , [CoreBndr] )
2124        -- The result binders have one-shot-ness set that they might not have had originally.
2125        -- This happens in (build (\c n -> e)).  Here the occurrence analyser
2126        -- linearity context knows that c,n are one-shot, and it records that fact in
2127        -- the binder. This is useful to guide subsequent float-in/float-out tranformations
2128
2129oneShotGroup env@(OccEnv { occ_one_shots = ctxt }) bndrs
2130  = go ctxt bndrs []
2131  where
2132    go ctxt [] rev_bndrs
2133      = ( env { occ_one_shots = ctxt, occ_encl = OccVanilla }
2134        , reverse rev_bndrs )
2135
2136    go [] bndrs rev_bndrs
2137      = ( env { occ_one_shots = [], occ_encl = OccVanilla }
2138        , reverse rev_bndrs ++ bndrs )
2139
2140    go ctxt@(one_shot : ctxt') (bndr : bndrs) rev_bndrs
2141      | isId bndr = go ctxt' bndrs (bndr': rev_bndrs)
2142      | otherwise = go ctxt  bndrs (bndr : rev_bndrs)
2143      where
2144        bndr' = updOneShotInfo bndr one_shot
2145               -- Use updOneShotInfo, not setOneShotInfo, as pre-existing
2146               -- one-shot info might be better than what we can infer, e.g.
2147               -- due to explicit use of the magic 'oneShot' function.
2148               -- See Note [The oneShot function]
2149
2150
2151markJoinOneShots :: Maybe JoinArity -> [Var] -> [Var]
2152-- Mark the lambdas of a non-recursive join point as one-shot.
2153-- This is good to prevent gratuitous float-out etc
2154markJoinOneShots mb_join_arity bndrs
2155  = case mb_join_arity of
2156      Nothing -> bndrs
2157      Just n  -> go n bndrs
2158 where
2159   go 0 bndrs  = bndrs
2160   go _ []     = [] -- This can legitimately happen.
2161                    -- e.g.    let j = case ... in j True
2162                    -- This will become an arity-1 join point after the
2163                    -- simplifier has eta-expanded it; but it may not have
2164                    -- enough lambdas /yet/. (Lint checks that JoinIds do
2165                    -- have enough lambdas.)
2166   go n (b:bs) = b' : go (n-1) bs
2167     where
2168       b' | isId b    = setOneShotLambda b
2169          | otherwise = b
2170
2171addAppCtxt :: OccEnv -> [Arg CoreBndr] -> OccEnv
2172addAppCtxt env@(OccEnv { occ_one_shots = ctxt }) args
2173  = env { occ_one_shots = replicate (valArgCount args) OneShotLam ++ ctxt }
2174
2175transClosureFV :: UniqFM VarSet -> UniqFM VarSet
2176-- If (f,g), (g,h) are in the input, then (f,h) is in the output
2177--                                   as well as (f,g), (g,h)
2178transClosureFV env
2179  | no_change = env
2180  | otherwise = transClosureFV (listToUFM new_fv_list)
2181  where
2182    (no_change, new_fv_list) = mapAccumL bump True (nonDetUFMToList env)
2183      -- It's OK to use nonDetUFMToList here because we'll forget the
2184      -- ordering by creating a new set with listToUFM
2185    bump no_change (b,fvs)
2186      | no_change_here = (no_change, (b,fvs))
2187      | otherwise      = (False,     (b,new_fvs))
2188      where
2189        (new_fvs, no_change_here) = extendFvs env fvs
2190
2191-------------
2192extendFvs_ :: UniqFM VarSet -> VarSet -> VarSet
2193extendFvs_ env s = fst (extendFvs env s)   -- Discard the Bool flag
2194
2195extendFvs :: UniqFM VarSet -> VarSet -> (VarSet, Bool)
2196-- (extendFVs env s) returns
2197--     (s `union` env(s), env(s) `subset` s)
2198extendFvs env s
2199  | isNullUFM env
2200  = (s, True)
2201  | otherwise
2202  = (s `unionVarSet` extras, extras `subVarSet` s)
2203  where
2204    extras :: VarSet    -- env(s)
2205    extras = nonDetFoldUFM unionVarSet emptyVarSet $
2206      -- It's OK to use nonDetFoldUFM here because unionVarSet commutes
2207             intersectUFM_C (\x _ -> x) env (getUniqSet s)
2208
2209{-
2210************************************************************************
2211*                                                                      *
2212                    Binder swap
2213*                                                                      *
2214************************************************************************
2215
2216Note [Binder swap]
2217~~~~~~~~~~~~~~~~~~
2218The "binder swap" tranformation swaps occurence of the
2219scrutinee of a case for occurrences of the case-binder:
2220
2221 (1)  case x of b { pi -> ri }
2222         ==>
2223      case x of b { pi -> let x=b in ri }
2224
2225 (2)  case (x |> co) of b { pi -> ri }
2226        ==>
2227      case (x |> co) of b { pi -> let x = b |> sym co in ri }
2228
2229In both cases, the trivial 'let' can be eliminated by the
2230immediately following simplifier pass.
2231
2232There are two reasons for making this swap:
2233
2234(A) It reduces the number of occurrences of the scrutinee, x.
2235    That in turn might reduce its occurrences to one, so we
2236    can inline it and save an allocation.  E.g.
2237      let x = factorial y in case x of b { I# v -> ...x... }
2238    If we replace 'x' by 'b' in the alternative we get
2239      let x = factorial y in case x of b { I# v -> ...b... }
2240    and now we can inline 'x', thus
2241      case (factorial y) of b { I# v -> ...b... }
2242
2243(B) The case-binder b has unfolding information; in the
2244    example above we know that b = I# v. That in turn allows
2245    nested cases to simplify.  Consider
2246       case x of b { I# v ->
2247       ...(case x of b2 { I# v2 -> rhs })...
2248    If we replace 'x' by 'b' in the alternative we get
2249       case x of b { I# v ->
2250       ...(case b of b2 { I# v2 -> rhs })...
2251    and now it is trivial to simplify the inner case:
2252       case x of b { I# v ->
2253       ...(let b2 = b in rhs)...
2254
2255    The same can happen even if the scrutinee is a variable
2256    with a cast: see Note [Case of cast]
2257
2258In both cases, in a particular alternative (pi -> ri), we only
2259add the binding if
2260  (a) x occurs free in (pi -> ri)
2261        (ie it occurs in ri, but is not bound in pi)
2262  (b) the pi does not bind b (or the free vars of co)
2263We need (a) and (b) for the inserted binding to be correct.
2264
2265For the alternatives where we inject the binding, we can transfer
2266all x's OccInfo to b.  And that is the point.
2267
2268Notice that
2269  * The deliberate shadowing of 'x'.
2270  * That (a) rapidly becomes false, so no bindings are injected.
2271
2272The reason for doing these transformations /here in the occurrence
2273analyser/ is because it allows us to adjust the OccInfo for 'x' and
2274'b' as we go.
2275
2276  * Suppose the only occurrences of 'x' are the scrutinee and in the
2277    ri; then this transformation makes it occur just once, and hence
2278    get inlined right away.
2279
2280  * If instead we do this in the Simplifier, we don't know whether 'x'
2281    is used in ri, so we are forced to pessimistically zap b's OccInfo
2282    even though it is typically dead (ie neither it nor x appear in
2283    the ri).  There's nothing actually wrong with zapping it, except
2284    that it's kind of nice to know which variables are dead.  My nose
2285    tells me to keep this information as robustly as possible.
2286
2287The Maybe (Id,CoreExpr) passed to occAnalAlt is the extra let-binding
2288{x=b}; it's Nothing if the binder-swap doesn't happen.
2289
2290There is a danger though.  Consider
2291      let v = x +# y
2292      in case (f v) of w -> ...v...v...
2293And suppose that (f v) expands to just v.  Then we'd like to
2294use 'w' instead of 'v' in the alternative.  But it may be too
2295late; we may have substituted the (cheap) x+#y for v in the
2296same simplifier pass that reduced (f v) to v.
2297
2298I think this is just too bad.  CSE will recover some of it.
2299
2300Note [Case of cast]
2301~~~~~~~~~~~~~~~~~~~
2302Consider        case (x `cast` co) of b { I# ->
2303                ... (case (x `cast` co) of {...}) ...
2304We'd like to eliminate the inner case.  That is the motivation for
2305equation (2) in Note [Binder swap].  When we get to the inner case, we
2306inline x, cancel the casts, and away we go.
2307
2308Note [Binder swap on GlobalId scrutinees]
2309~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2310When the scrutinee is a GlobalId we must take care in two ways
2311
2312 i) In order to *know* whether 'x' occurs free in the RHS, we need its
2313    occurrence info. BUT, we don't gather occurrence info for
2314    GlobalIds.  That's the reason for the (small) occ_gbl_scrut env in
2315    OccEnv is for: it says "gather occurrence info for these".
2316
2317 ii) We must call localiseId on 'x' first, in case it's a GlobalId, or
2318     has an External Name. See, for example, SimplEnv Note [Global Ids in
2319     the substitution].
2320
2321Note [Zap case binders in proxy bindings]
2322~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2323From the original
2324     case x of cb(dead) { p -> ...x... }
2325we will get
2326     case x of cb(live) { p -> let x = cb in ...x... }
2327
2328Core Lint never expects to find an *occurrence* of an Id marked
2329as Dead, so we must zap the OccInfo on cb before making the
2330binding x = cb.  See #5028.
2331
2332NB: the OccInfo on /occurrences/ really doesn't matter much; the simplifier
2333doesn't use it. So this is only to satisfy the perhpas-over-picky Lint.
2334
2335Historical note [no-case-of-case]
2336~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2337We *used* to suppress the binder-swap in case expressions when
2338-fno-case-of-case is on.  Old remarks:
2339    "This happens in the first simplifier pass,
2340    and enhances full laziness.  Here's the bad case:
2341            f = \ y -> ...(case x of I# v -> ...(case x of ...) ... )
2342    If we eliminate the inner case, we trap it inside the I# v -> arm,
2343    which might prevent some full laziness happening.  I've seen this
2344    in action in spectral/cichelli/Prog.hs:
2345             [(m,n) | m <- [1..max], n <- [1..max]]
2346    Hence the check for NoCaseOfCase."
2347However, now the full-laziness pass itself reverses the binder-swap, so this
2348check is no longer necessary.
2349
2350Historical note [Suppressing the case binder-swap]
2351~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2352This old note describes a problem that is also fixed by doing the
2353binder-swap in OccAnal:
2354
2355    There is another situation when it might make sense to suppress the
2356    case-expression binde-swap. If we have
2357
2358        case x of w1 { DEFAULT -> case x of w2 { A -> e1; B -> e2 }
2359                       ...other cases .... }
2360
2361    We'll perform the binder-swap for the outer case, giving
2362
2363        case x of w1 { DEFAULT -> case w1 of w2 { A -> e1; B -> e2 }
2364                       ...other cases .... }
2365
2366    But there is no point in doing it for the inner case, because w1 can't
2367    be inlined anyway.  Furthermore, doing the case-swapping involves
2368    zapping w2's occurrence info (see paragraphs that follow), and that
2369    forces us to bind w2 when doing case merging.  So we get
2370
2371        case x of w1 { A -> let w2 = w1 in e1
2372                       B -> let w2 = w1 in e2
2373                       ...other cases .... }
2374
2375    This is plain silly in the common case where w2 is dead.
2376
2377    Even so, I can't see a good way to implement this idea.  I tried
2378    not doing the binder-swap if the scrutinee was already evaluated
2379    but that failed big-time:
2380
2381            data T = MkT !Int
2382
2383            case v of w  { MkT x ->
2384            case x of x1 { I# y1 ->
2385            case x of x2 { I# y2 -> ...
2386
2387    Notice that because MkT is strict, x is marked "evaluated".  But to
2388    eliminate the last case, we must either make sure that x (as well as
2389    x1) has unfolding MkT y1.  The straightforward thing to do is to do
2390    the binder-swap.  So this whole note is a no-op.
2391
2392It's fixed by doing the binder-swap in OccAnal because we can do the
2393binder-swap unconditionally and still get occurrence analysis
2394information right.
2395-}
2396
2397mkAltEnv :: OccEnv -> CoreExpr -> Id -> (OccEnv, Maybe (Id, CoreExpr))
2398-- Does three things: a) makes the occ_one_shots = OccVanilla
2399--                    b) extends the GlobalScruts if possible
2400--                    c) returns a proxy mapping, binding the scrutinee
2401--                       to the case binder, if possible
2402mkAltEnv env@(OccEnv { occ_gbl_scrut = pe }) scrut case_bndr
2403  = case stripTicksTopE (const True) scrut of
2404      Var v           -> add_scrut v case_bndr'
2405      Cast (Var v) co -> add_scrut v (Cast case_bndr' (mkSymCo co))
2406                          -- See Note [Case of cast]
2407      _               -> (env { occ_encl = OccVanilla }, Nothing)
2408
2409  where
2410    add_scrut v rhs
2411      | isGlobalId v = (env { occ_encl = OccVanilla }, Nothing)
2412      | otherwise    = ( env { occ_encl = OccVanilla
2413                             , occ_gbl_scrut = pe `extendVarSet` v }
2414                       , Just (localise v, rhs) )
2415      -- ToDO: this isGlobalId stuff is a TEMPORARY FIX
2416      --       to avoid the binder-swap for GlobalIds
2417      --       See #16346
2418
2419    case_bndr' = Var (zapIdOccInfo case_bndr)
2420                   -- See Note [Zap case binders in proxy bindings]
2421
2422    -- Localise the scrut_var before shadowing it; we're making a
2423    -- new binding for it, and it might have an External Name, or
2424    -- even be a GlobalId; Note [Binder swap on GlobalId scrutinees]
2425    -- Also we don't want any INLINE or NOINLINE pragmas!
2426    localise scrut_var = mkLocalIdOrCoVar (localiseName (idName scrut_var))
2427                                          (idType scrut_var)
2428
2429{-
2430************************************************************************
2431*                                                                      *
2432\subsection[OccurAnal-types]{OccEnv}
2433*                                                                      *
2434************************************************************************
2435
2436Note [UsageDetails and zapping]
2437~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2438
2439On many occasions, we must modify all gathered occurrence data at once. For
2440instance, all occurrences underneath a (non-one-shot) lambda set the
2441'occ_in_lam' flag to become 'True'. We could use 'mapVarEnv' to do this, but
2442that takes O(n) time and we will do this often---in particular, there are many
2443places where tail calls are not allowed, and each of these causes all variables
2444to get marked with 'NoTailCallInfo'.
2445
2446Instead of relying on `mapVarEnv`, then, we carry three 'IdEnv's around along
2447with the 'OccInfoEnv'. Each of these extra environments is a "zapped set"
2448recording which variables have been zapped in some way. Zapping all occurrence
2449info then simply means setting the corresponding zapped set to the whole
2450'OccInfoEnv', a fast O(1) operation.
2451-}
2452
2453type OccInfoEnv = IdEnv OccInfo -- A finite map from ids to their usage
2454                -- INVARIANT: never IAmDead
2455                -- (Deadness is signalled by not being in the map at all)
2456
2457type ZappedSet = OccInfoEnv -- Values are ignored
2458
2459data UsageDetails
2460  = UD { ud_env       :: !OccInfoEnv
2461       , ud_z_many    :: ZappedSet   -- apply 'markMany' to these
2462       , ud_z_in_lam  :: ZappedSet   -- apply 'markInsideLam' to these
2463       , ud_z_no_tail :: ZappedSet } -- apply 'markNonTailCalled' to these
2464  -- INVARIANT: All three zapped sets are subsets of the OccInfoEnv
2465
2466instance Outputable UsageDetails where
2467  ppr ud = ppr (ud_env (flattenUsageDetails ud))
2468
2469-------------------
2470-- UsageDetails API
2471
2472andUDs, orUDs
2473        :: UsageDetails -> UsageDetails -> UsageDetails
2474andUDs = combineUsageDetailsWith addOccInfo
2475orUDs  = combineUsageDetailsWith orOccInfo
2476
2477andUDsList :: [UsageDetails] -> UsageDetails
2478andUDsList = foldl' andUDs emptyDetails
2479
2480mkOneOcc :: OccEnv -> Id -> InterestingCxt -> JoinArity -> UsageDetails
2481mkOneOcc env id int_cxt arity
2482  | isLocalId id
2483  = singleton $ OneOcc { occ_in_lam  = False
2484                       , occ_n_br  = 1
2485                       , occ_int_cxt = int_cxt
2486                       , occ_tail    = AlwaysTailCalled arity }
2487  | id `elemVarSet` occ_gbl_scrut env
2488  = singleton noOccInfo
2489
2490  | otherwise
2491  = emptyDetails
2492  where
2493    singleton info = emptyDetails { ud_env = unitVarEnv id info }
2494
2495addOneOcc :: UsageDetails -> Id -> OccInfo -> UsageDetails
2496addOneOcc ud id info
2497  = ud { ud_env = extendVarEnv_C plus_zapped (ud_env ud) id info }
2498      `alterZappedSets` (`delVarEnv` id)
2499  where
2500    plus_zapped old new = doZapping ud id old `addOccInfo` new
2501
2502addManyOccsSet :: UsageDetails -> VarSet -> UsageDetails
2503addManyOccsSet usage id_set = nonDetFoldUniqSet addManyOccs usage id_set
2504  -- It's OK to use nonDetFoldUFM here because addManyOccs commutes
2505
2506-- Add several occurrences, assumed not to be tail calls
2507addManyOccs :: Var -> UsageDetails -> UsageDetails
2508addManyOccs v u | isId v    = addOneOcc u v noOccInfo
2509                | otherwise = u
2510        -- Give a non-committal binder info (i.e noOccInfo) because
2511        --   a) Many copies of the specialised thing can appear
2512        --   b) We don't want to substitute a BIG expression inside a RULE
2513        --      even if that's the only occurrence of the thing
2514        --      (Same goes for INLINE.)
2515
2516delDetails :: UsageDetails -> Id -> UsageDetails
2517delDetails ud bndr
2518  = ud `alterUsageDetails` (`delVarEnv` bndr)
2519
2520delDetailsList :: UsageDetails -> [Id] -> UsageDetails
2521delDetailsList ud bndrs
2522  = ud `alterUsageDetails` (`delVarEnvList` bndrs)
2523
2524emptyDetails :: UsageDetails
2525emptyDetails = UD { ud_env       = emptyVarEnv
2526                  , ud_z_many    = emptyVarEnv
2527                  , ud_z_in_lam  = emptyVarEnv
2528                  , ud_z_no_tail = emptyVarEnv }
2529
2530isEmptyDetails :: UsageDetails -> Bool
2531isEmptyDetails = isEmptyVarEnv . ud_env
2532
2533markAllMany, markAllInsideLam, markAllNonTailCalled, zapDetails
2534  :: UsageDetails -> UsageDetails
2535markAllMany          ud = ud { ud_z_many    = ud_env ud }
2536markAllInsideLam     ud = ud { ud_z_in_lam  = ud_env ud }
2537markAllNonTailCalled ud = ud { ud_z_no_tail = ud_env ud }
2538
2539zapDetails = markAllMany . markAllNonTailCalled -- effectively sets to noOccInfo
2540
2541lookupDetails :: UsageDetails -> Id -> OccInfo
2542lookupDetails ud id
2543  | isCoVar id  -- We do not currenly gather occurrence info (from types)
2544  = noOccInfo   -- for CoVars, so we must conservatively mark them as used
2545                -- See Note [DoO not mark CoVars as dead]
2546  | otherwise
2547  = case lookupVarEnv (ud_env ud) id of
2548      Just occ -> doZapping ud id occ
2549      Nothing  -> IAmDead
2550
2551usedIn :: Id -> UsageDetails -> Bool
2552v `usedIn` ud = isExportedId v || v `elemVarEnv` ud_env ud
2553
2554udFreeVars :: VarSet -> UsageDetails -> VarSet
2555-- Find the subset of bndrs that are mentioned in uds
2556udFreeVars bndrs ud = restrictUniqSetToUFM bndrs (ud_env ud)
2557
2558{- Note [Do not mark CoVars as dead]
2559~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2560It's obviously wrong to mark CoVars as dead if they are used.
2561Currently we don't traverse types to gather usase info for CoVars,
2562so we had better treat them as having noOccInfo.
2563
2564This showed up in #15696 we had something like
2565  case eq_sel d of co -> ...(typeError @(...co...) "urk")...
2566
2567Then 'd' was substitued by a dictionary, so the expression
2568simpified to
2569  case (Coercion <blah>) of co -> ...(typeError @(...co...) "urk")...
2570
2571But then the "drop the case altogether" equation of rebuildCase
2572thought that 'co' was dead, and discarded the entire case. Urk!
2573
2574I have no idea how we managed to avoid this pitfall for so long!
2575-}
2576
2577-------------------
2578-- Auxiliary functions for UsageDetails implementation
2579
2580combineUsageDetailsWith :: (OccInfo -> OccInfo -> OccInfo)
2581                        -> UsageDetails -> UsageDetails -> UsageDetails
2582combineUsageDetailsWith plus_occ_info ud1 ud2
2583  | isEmptyDetails ud1 = ud2
2584  | isEmptyDetails ud2 = ud1
2585  | otherwise
2586  = UD { ud_env       = plusVarEnv_C plus_occ_info (ud_env ud1) (ud_env ud2)
2587       , ud_z_many    = plusVarEnv (ud_z_many    ud1) (ud_z_many    ud2)
2588       , ud_z_in_lam  = plusVarEnv (ud_z_in_lam  ud1) (ud_z_in_lam  ud2)
2589       , ud_z_no_tail = plusVarEnv (ud_z_no_tail ud1) (ud_z_no_tail ud2) }
2590
2591doZapping :: UsageDetails -> Var -> OccInfo -> OccInfo
2592doZapping ud var occ
2593  = doZappingByUnique ud (varUnique var) occ
2594
2595doZappingByUnique :: UsageDetails -> Unique -> OccInfo -> OccInfo
2596doZappingByUnique ud uniq
2597  = (if | in_subset ud_z_many    -> markMany
2598        | in_subset ud_z_in_lam  -> markInsideLam
2599        | otherwise              -> id) .
2600    (if | in_subset ud_z_no_tail -> markNonTailCalled
2601        | otherwise              -> id)
2602  where
2603    in_subset field = uniq `elemVarEnvByKey` field ud
2604
2605alterZappedSets :: UsageDetails -> (ZappedSet -> ZappedSet) -> UsageDetails
2606alterZappedSets ud f
2607  = ud { ud_z_many    = f (ud_z_many    ud)
2608       , ud_z_in_lam  = f (ud_z_in_lam  ud)
2609       , ud_z_no_tail = f (ud_z_no_tail ud) }
2610
2611alterUsageDetails :: UsageDetails -> (OccInfoEnv -> OccInfoEnv) -> UsageDetails
2612alterUsageDetails ud f
2613  = ud { ud_env = f (ud_env ud) }
2614      `alterZappedSets` f
2615
2616flattenUsageDetails :: UsageDetails -> UsageDetails
2617flattenUsageDetails ud
2618  = ud { ud_env = mapUFM_Directly (doZappingByUnique ud) (ud_env ud) }
2619      `alterZappedSets` const emptyVarEnv
2620
2621-------------------
2622-- See Note [Adjusting right-hand sides]
2623adjustRhsUsage :: Maybe JoinArity -> RecFlag
2624               -> [CoreBndr] -- Outer lambdas, AFTER occ anal
2625               -> UsageDetails -> UsageDetails
2626adjustRhsUsage mb_join_arity rec_flag bndrs usage
2627  = maybe_mark_lam (maybe_drop_tails usage)
2628  where
2629    maybe_mark_lam ud   | one_shot   = ud
2630                        | otherwise  = markAllInsideLam ud
2631    maybe_drop_tails ud | exact_join = ud
2632                        | otherwise  = markAllNonTailCalled ud
2633
2634    one_shot = case mb_join_arity of
2635                 Just join_arity
2636                   | isRec rec_flag -> False
2637                   | otherwise      -> all isOneShotBndr (drop join_arity bndrs)
2638                 Nothing            -> all isOneShotBndr bndrs
2639
2640    exact_join = case mb_join_arity of
2641                   Just join_arity -> bndrs `lengthIs` join_arity
2642                   _               -> False
2643
2644type IdWithOccInfo = Id
2645
2646tagLamBinders :: UsageDetails          -- Of scope
2647              -> [Id]                  -- Binders
2648              -> (UsageDetails,        -- Details with binders removed
2649                 [IdWithOccInfo])    -- Tagged binders
2650tagLamBinders usage binders
2651  = usage' `seq` (usage', bndrs')
2652  where
2653    (usage', bndrs') = mapAccumR tagLamBinder usage binders
2654
2655tagLamBinder :: UsageDetails       -- Of scope
2656             -> Id                 -- Binder
2657             -> (UsageDetails,     -- Details with binder removed
2658                 IdWithOccInfo)    -- Tagged binders
2659-- Used for lambda and case binders
2660-- It copes with the fact that lambda bindings can have a
2661-- stable unfolding, used for join points
2662tagLamBinder usage bndr
2663  = (usage2, bndr')
2664  where
2665        occ    = lookupDetails usage bndr
2666        bndr'  = setBinderOcc (markNonTailCalled occ) bndr
2667                   -- Don't try to make an argument into a join point
2668        usage1 = usage `delDetails` bndr
2669        usage2 | isId bndr = addManyOccsSet usage1 (idUnfoldingVars bndr)
2670                               -- This is effectively the RHS of a
2671                               -- non-join-point binding, so it's okay to use
2672                               -- addManyOccsSet, which assumes no tail calls
2673               | otherwise = usage1
2674
2675tagNonRecBinder :: TopLevelFlag           -- At top level?
2676                -> UsageDetails           -- Of scope
2677                -> CoreBndr               -- Binder
2678                -> (UsageDetails,         -- Details with binder removed
2679                    IdWithOccInfo)        -- Tagged binder
2680
2681tagNonRecBinder lvl usage binder
2682 = let
2683     occ     = lookupDetails usage binder
2684     will_be_join = decideJoinPointHood lvl usage [binder]
2685     occ'    | will_be_join = -- must already be marked AlwaysTailCalled
2686                              ASSERT(isAlwaysTailCalled occ) occ
2687             | otherwise    = markNonTailCalled occ
2688     binder' = setBinderOcc occ' binder
2689     usage'  = usage `delDetails` binder
2690   in
2691   usage' `seq` (usage', binder')
2692
2693tagRecBinders :: TopLevelFlag           -- At top level?
2694              -> UsageDetails           -- Of body of let ONLY
2695              -> [(CoreBndr,            -- Binder
2696                   UsageDetails,        -- RHS usage details
2697                   [CoreBndr])]         -- Lambdas in new RHS
2698              -> (UsageDetails,         -- Adjusted details for whole scope,
2699                                        -- with binders removed
2700                  [IdWithOccInfo])      -- Tagged binders
2701-- Substantially more complicated than non-recursive case. Need to adjust RHS
2702-- details *before* tagging binders (because the tags depend on the RHSes).
2703tagRecBinders lvl body_uds triples
2704 = let
2705     (bndrs, rhs_udss, _) = unzip3 triples
2706
2707     -- 1. Determine join-point-hood of whole group, as determined by
2708     --    the *unadjusted* usage details
2709     unadj_uds     = foldr andUDs body_uds rhs_udss
2710     will_be_joins = decideJoinPointHood lvl unadj_uds bndrs
2711
2712     -- 2. Adjust usage details of each RHS, taking into account the
2713     --    join-point-hood decision
2714     rhs_udss' = map adjust triples
2715     adjust (bndr, rhs_uds, rhs_bndrs)
2716       = adjustRhsUsage mb_join_arity Recursive rhs_bndrs rhs_uds
2717       where
2718         -- Can't use willBeJoinId_maybe here because we haven't tagged the
2719         -- binder yet (the tag depends on these adjustments!)
2720         mb_join_arity
2721           | will_be_joins
2722           , let occ = lookupDetails unadj_uds bndr
2723           , AlwaysTailCalled arity <- tailCallInfo occ
2724           = Just arity
2725           | otherwise
2726           = ASSERT(not will_be_joins) -- Should be AlwaysTailCalled if
2727             Nothing                   -- we are making join points!
2728
2729     -- 3. Compute final usage details from adjusted RHS details
2730     adj_uds   = foldr andUDs body_uds rhs_udss'
2731
2732     -- 4. Tag each binder with its adjusted details
2733     bndrs'    = [ setBinderOcc (lookupDetails adj_uds bndr) bndr
2734                 | bndr <- bndrs ]
2735
2736     -- 5. Drop the binders from the adjusted details and return
2737     usage'    = adj_uds `delDetailsList` bndrs
2738   in
2739   (usage', bndrs')
2740
2741setBinderOcc :: OccInfo -> CoreBndr -> CoreBndr
2742setBinderOcc occ_info bndr
2743  | isTyVar bndr      = bndr
2744  | isExportedId bndr = if isManyOccs (idOccInfo bndr)
2745                          then bndr
2746                          else setIdOccInfo bndr noOccInfo
2747            -- Don't use local usage info for visible-elsewhere things
2748            -- BUT *do* erase any IAmALoopBreaker annotation, because we're
2749            -- about to re-generate it and it shouldn't be "sticky"
2750
2751  | otherwise = setIdOccInfo bndr occ_info
2752
2753-- | Decide whether some bindings should be made into join points or not.
2754-- Returns `False` if they can't be join points. Note that it's an
2755-- all-or-nothing decision, as if multiple binders are given, they're
2756-- assumed to be mutually recursive.
2757--
2758-- It must, however, be a final decision. If we say "True" for 'f',
2759-- and then subsequently decide /not/ make 'f' into a join point, then
2760-- the decision about another binding 'g' might be invalidated if (say)
2761-- 'f' tail-calls 'g'.
2762--
2763-- See Note [Invariants on join points] in CoreSyn.
2764decideJoinPointHood :: TopLevelFlag -> UsageDetails
2765                    -> [CoreBndr]
2766                    -> Bool
2767decideJoinPointHood TopLevel _ _
2768  = False
2769decideJoinPointHood NotTopLevel usage bndrs
2770  | isJoinId (head bndrs)
2771  = WARN(not all_ok, text "OccurAnal failed to rediscover join point(s):" <+>
2772                       ppr bndrs)
2773    all_ok
2774  | otherwise
2775  = all_ok
2776  where
2777    -- See Note [Invariants on join points]; invariants cited by number below.
2778    -- Invariant 2 is always satisfiable by the simplifier by eta expansion.
2779    all_ok = -- Invariant 3: Either all are join points or none are
2780             all ok bndrs
2781
2782    ok bndr
2783      | -- Invariant 1: Only tail calls, all same join arity
2784        AlwaysTailCalled arity <- tailCallInfo (lookupDetails usage bndr)
2785
2786      , -- Invariant 1 as applied to LHSes of rules
2787        all (ok_rule arity) (idCoreRules bndr)
2788
2789        -- Invariant 2a: stable unfoldings
2790        -- See Note [Join points and INLINE pragmas]
2791      , ok_unfolding arity (realIdUnfolding bndr)
2792
2793        -- Invariant 4: Satisfies polymorphism rule
2794      , isValidJoinPointType arity (idType bndr)
2795      = True
2796
2797      | otherwise
2798      = False
2799
2800    ok_rule _ BuiltinRule{} = False -- only possible with plugin shenanigans
2801    ok_rule join_arity (Rule { ru_args = args })
2802      = args `lengthIs` join_arity
2803        -- Invariant 1 as applied to LHSes of rules
2804
2805    -- ok_unfolding returns False if we should /not/ convert a non-join-id
2806    -- into a join-id, even though it is AlwaysTailCalled
2807    ok_unfolding join_arity (CoreUnfolding { uf_src = src, uf_tmpl = rhs })
2808      = not (isStableSource src && join_arity > joinRhsArity rhs)
2809    ok_unfolding _ (DFunUnfolding {})
2810      = False
2811    ok_unfolding _ _
2812      = True
2813
2814willBeJoinId_maybe :: CoreBndr -> Maybe JoinArity
2815willBeJoinId_maybe bndr
2816  = case tailCallInfo (idOccInfo bndr) of
2817      AlwaysTailCalled arity -> Just arity
2818      _                      -> isJoinId_maybe bndr
2819
2820
2821{- Note [Join points and INLINE pragmas]
2822~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
2823Consider
2824   f x = let g = \x. not  -- Arity 1
2825             {-# INLINE g #-}
2826         in case x of
2827              A -> g True True
2828              B -> g True False
2829              C -> blah2
2830
2831Here 'g' is always tail-called applied to 2 args, but the stable
2832unfolding captured by the INLINE pragma has arity 1.  If we try to
2833convert g to be a join point, its unfolding will still have arity 1
2834(since it is stable, and we don't meddle with stable unfoldings), and
2835Lint will complain (see Note [Invariants on join points], (2a), in
2836CoreSyn.  #13413.
2837
2838Moreover, since g is going to be inlined anyway, there is no benefit
2839from making it a join point.
2840
2841If it is recursive, and uselessly marked INLINE, this will stop us
2842making it a join point, which is annoying.  But occasionally
2843(notably in class methods; see Note [Instances and loop breakers] in
2844TcInstDcls) we mark recursive things as INLINE but the recursion
2845unravels; so ignoring INLINE pragmas on recursive things isn't good
2846either.
2847
2848See Invariant 2a of Note [Invariants on join points] in CoreSyn
2849
2850
2851************************************************************************
2852*                                                                      *
2853\subsection{Operations over OccInfo}
2854*                                                                      *
2855************************************************************************
2856-}
2857
2858markMany, markInsideLam, markNonTailCalled :: OccInfo -> OccInfo
2859
2860markMany IAmDead = IAmDead
2861markMany occ     = ManyOccs { occ_tail = occ_tail occ }
2862
2863markInsideLam occ@(OneOcc {}) = occ { occ_in_lam = True }
2864markInsideLam occ             = occ
2865
2866markNonTailCalled IAmDead = IAmDead
2867markNonTailCalled occ     = occ { occ_tail = NoTailCallInfo }
2868
2869addOccInfo, orOccInfo :: OccInfo -> OccInfo -> OccInfo
2870
2871addOccInfo a1 a2  = ASSERT( not (isDeadOcc a1 || isDeadOcc a2) )
2872                    ManyOccs { occ_tail = tailCallInfo a1 `andTailCallInfo`
2873                                          tailCallInfo a2 }
2874                                -- Both branches are at least One
2875                                -- (Argument is never IAmDead)
2876
2877-- (orOccInfo orig new) is used
2878-- when combining occurrence info from branches of a case
2879
2880orOccInfo (OneOcc { occ_in_lam  = in_lam1
2881                  , occ_n_br    = nbr1
2882                  , occ_int_cxt = int_cxt1
2883                  , occ_tail    = tail1 })
2884          (OneOcc { occ_in_lam  = in_lam2
2885                  , occ_n_br    = nbr2
2886                  , occ_int_cxt = int_cxt2
2887                  , occ_tail    = tail2 })
2888  = OneOcc { occ_n_br  = nbr1 + nbr2
2889           , occ_in_lam  = in_lam1 || in_lam2
2890           , occ_int_cxt = int_cxt1 && int_cxt2
2891           , occ_tail    = tail1 `andTailCallInfo` tail2 }
2892
2893orOccInfo a1 a2 = ASSERT( not (isDeadOcc a1 || isDeadOcc a2) )
2894                  ManyOccs { occ_tail = tailCallInfo a1 `andTailCallInfo`
2895                                        tailCallInfo a2 }
2896
2897andTailCallInfo :: TailCallInfo -> TailCallInfo -> TailCallInfo
2898andTailCallInfo info@(AlwaysTailCalled arity1) (AlwaysTailCalled arity2)
2899  | arity1 == arity2 = info
2900andTailCallInfo _ _  = NoTailCallInfo
2901