1{-# LANGUAGE GADTs #-}
2{-# OPTIONS_GHC -Wno-incomplete-uni-patterns #-}
3module GHC.Cmm.Switch (
4     SwitchTargets,
5     mkSwitchTargets,
6     switchTargetsCases, switchTargetsDefault, switchTargetsRange, switchTargetsSigned,
7     mapSwitchTargets, switchTargetsToTable, switchTargetsFallThrough,
8     switchTargetsToList, eqSwitchTargetWith,
9
10     SwitchPlan(..),
11     backendSupportsSwitch,
12     createSwitchPlan,
13  ) where
14
15import GHC.Prelude
16
17import GHC.Utils.Outputable
18import GHC.Driver.Backend
19import GHC.Utils.Panic
20import GHC.Cmm.Dataflow.Label (Label)
21
22import Data.Maybe
23import Data.List (groupBy)
24import Data.Function (on)
25import qualified Data.Map as M
26
27-- Note [Cmm Switches, the general plan]
28-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
29--
30-- Compiling a high-level switch statement, as it comes out of a STG case
31-- expression, for example, allows for a surprising amount of design decisions.
32-- Therefore, we cleanly separated this from the Stg → Cmm transformation, as
33-- well as from the actual code generation.
34--
35-- The overall plan is:
36--  * The Stg → Cmm transformation creates a single `SwitchTargets` in
37--    emitSwitch and emitCmmLitSwitch in GHC.StgToCmm.Utils.
38--    At this stage, they are unsuitable for code generation.
39--  * A dedicated Cmm transformation (GHC.Cmm.Switch.Implement) replaces these
40--    switch statements with code that is suitable for code generation, i.e.
41--    a nice balanced tree of decisions with dense jump tables in the leafs.
42--    The actual planning of this tree is performed in pure code in createSwitchPlan
43--    in this module. See Note [createSwitchPlan].
44--  * The actual code generation will not do any further processing and
45--    implement each CmmSwitch with a jump tables.
46--
47-- When compiling to LLVM or C, GHC.Cmm.Switch.Implement leaves the switch
48-- statements alone, as we can turn a SwitchTargets value into a nice
49-- switch-statement in LLVM resp. C, and leave the rest to the compiler.
50--
51-- See Note [GHC.Cmm.Switch vs. GHC.Cmm.Switch.Implement] why the two module are
52-- separated.
53
54-----------------------------------------------------------------------------
55-- Note [Magic Constants in GHC.Cmm.Switch]
56-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
57--
58-- There are a lot of heuristics here that depend on magic values where it is
59-- hard to determine the "best" value (for whatever that means). These are the
60-- magic values:
61
62-- | Number of consecutive default values allowed in a jump table. If there are
63-- more of them, the jump tables are split.
64--
65-- Currently 7, as it costs 7 words of additional code when a jump table is
66-- split (at least on x64, determined experimentally).
67maxJumpTableHole :: Integer
68maxJumpTableHole = 7
69
70-- | Minimum size of a jump table. If the number is smaller, the switch is
71-- implemented using conditionals.
72-- Currently 5, because an if-then-else tree of 4 values is nice and compact.
73minJumpTableSize :: Int
74minJumpTableSize = 5
75
76-- | Minimum non-zero offset for a jump table. See Note [Jump Table Offset].
77minJumpTableOffset :: Integer
78minJumpTableOffset = 2
79
80
81-----------------------------------------------------------------------------
82-- Switch Targets
83
84-- Note [SwitchTargets]
85-- ~~~~~~~~~~~~~~~~~~~~
86--
87-- The branches of a switch are stored in a SwitchTargets, which consists of an
88-- (optional) default jump target, and a map from values to jump targets.
89--
90-- If the default jump target is absent, the behaviour of the switch outside the
91-- values of the map is undefined.
92--
93-- We use an Integer for the keys the map so that it can be used in switches on
94-- unsigned as well as signed integers.
95--
96-- The map may be empty (we prune out-of-range branches here, so it could be us
97-- emptying it).
98--
99-- Before code generation, the table needs to be brought into a form where all
100-- entries are non-negative, so that it can be compiled into a jump table.
101-- See switchTargetsToTable.
102
103
104-- | A value of type SwitchTargets contains the alternatives for a 'CmmSwitch'
105-- value, and knows whether the value is signed, the possible range, an
106-- optional default value and a map from values to jump labels.
107data SwitchTargets =
108    SwitchTargets
109        Bool                       -- Signed values
110        (Integer, Integer)         -- Range
111        (Maybe Label)              -- Default value
112        (M.Map Integer Label)      -- The branches
113    deriving (Show, Eq)
114
115-- | The smart constructor mkSwitchTargets normalises the map a bit:
116--  * No entries outside the range
117--  * No entries equal to the default
118--  * No default if all elements have explicit values
119mkSwitchTargets :: Bool -> (Integer, Integer) -> Maybe Label -> M.Map Integer Label -> SwitchTargets
120mkSwitchTargets signed range@(lo,hi) mbdef ids
121    = SwitchTargets signed range mbdef' ids'
122  where
123    ids' = dropDefault $ restrict ids
124    mbdef' | defaultNeeded = mbdef
125           | otherwise     = Nothing
126
127    -- Drop entries outside the range, if there is a range
128    restrict = restrictMap (lo,hi)
129
130    -- Drop entries that equal the default, if there is a default
131    dropDefault | Just l <- mbdef = M.filter (/= l)
132                | otherwise       = id
133
134    -- Check if the default is still needed
135    defaultNeeded = fromIntegral (M.size ids') /= hi-lo+1
136
137
138-- | Changes all labels mentioned in the SwitchTargets value
139mapSwitchTargets :: (Label -> Label) -> SwitchTargets -> SwitchTargets
140mapSwitchTargets f (SwitchTargets signed range mbdef branches)
141    = SwitchTargets signed range (fmap f mbdef) (fmap f branches)
142
143-- | Returns the list of non-default branches of the SwitchTargets value
144switchTargetsCases :: SwitchTargets -> [(Integer, Label)]
145switchTargetsCases (SwitchTargets _ _ _ branches) = M.toList branches
146
147-- | Return the default label of the SwitchTargets value
148switchTargetsDefault :: SwitchTargets -> Maybe Label
149switchTargetsDefault (SwitchTargets _ _ mbdef _) = mbdef
150
151-- | Return the range of the SwitchTargets value
152switchTargetsRange :: SwitchTargets -> (Integer, Integer)
153switchTargetsRange (SwitchTargets _ range _ _) = range
154
155-- | Return whether this is used for a signed value
156switchTargetsSigned :: SwitchTargets -> Bool
157switchTargetsSigned (SwitchTargets signed _ _ _) = signed
158
159-- | switchTargetsToTable creates a dense jump table, usable for code generation.
160--
161-- Also returns an offset to add to the value; the list is 0-based on the
162-- result of that addition.
163--
164-- The conversion from Integer to Int is a bit of a wart, as the actual
165-- scrutinee might be an unsigned word, but it just works, due to wrap-around
166-- arithmetic (as verified by the CmmSwitchTest test case).
167switchTargetsToTable :: SwitchTargets -> (Int, [Maybe Label])
168switchTargetsToTable (SwitchTargets _ (lo,hi) mbdef branches)
169    = (fromIntegral (-start), [ labelFor i | i <- [start..hi] ])
170  where
171    labelFor i = case M.lookup i branches of Just l -> Just l
172                                             Nothing -> mbdef
173    start | lo >= 0 && lo < minJumpTableOffset  = 0  -- See Note [Jump Table Offset]
174          | otherwise                           = lo
175
176-- Note [Jump Table Offset]
177-- ~~~~~~~~~~~~~~~~~~~~~~~~
178--
179-- Usually, the code for a jump table starting at x will first subtract x from
180-- the value, to avoid a large amount of empty entries. But if x is very small,
181-- the extra entries are no worse than the subtraction in terms of code size, and
182-- not having to do the subtraction is quicker.
183--
184-- I.e. instead of
185--     _u20N:
186--             leaq -1(%r14),%rax
187--             jmp *_n20R(,%rax,8)
188--     _n20R:
189--             .quad   _c20p
190--             .quad   _c20q
191-- do
192--     _u20N:
193--             jmp *_n20Q(,%r14,8)
194--
195--     _n20Q:
196--             .quad   0
197--             .quad   _c20p
198--             .quad   _c20q
199--             .quad   _c20r
200
201-- | The list of all labels occurring in the SwitchTargets value.
202switchTargetsToList :: SwitchTargets -> [Label]
203switchTargetsToList (SwitchTargets _ _ mbdef branches)
204    = maybeToList mbdef ++ M.elems branches
205
206-- | Groups cases with equal targets, suitable for pretty-printing to a
207-- c-like switch statement with fall-through semantics.
208switchTargetsFallThrough :: SwitchTargets -> ([([Integer], Label)], Maybe Label)
209switchTargetsFallThrough (SwitchTargets _ _ mbdef branches) = (groups, mbdef)
210  where
211    groups = map (\xs -> (map fst xs, snd (head xs))) $
212             groupBy ((==) `on` snd) $
213             M.toList branches
214
215-- | Custom equality helper, needed for "GHC.Cmm.CommonBlockElim"
216eqSwitchTargetWith :: (Label -> Label -> Bool) -> SwitchTargets -> SwitchTargets -> Bool
217eqSwitchTargetWith eq (SwitchTargets signed1 range1 mbdef1 ids1) (SwitchTargets signed2 range2 mbdef2 ids2) =
218    signed1 == signed2 && range1 == range2 && goMB mbdef1 mbdef2 && goList (M.toList ids1) (M.toList ids2)
219  where
220    goMB Nothing Nothing = True
221    goMB (Just l1) (Just l2) = l1 `eq` l2
222    goMB _ _ = False
223    goList [] [] = True
224    goList ((i1,l1):ls1) ((i2,l2):ls2) = i1 == i2 && l1 `eq` l2 && goList ls1 ls2
225    goList _ _ = False
226
227-----------------------------------------------------------------------------
228-- Code generation for Switches
229
230
231-- | A SwitchPlan abstractly describes how a Switch statement ought to be
232-- implemented. See Note [createSwitchPlan]
233data SwitchPlan
234    = Unconditionally Label
235    | IfEqual Integer Label SwitchPlan
236    | IfLT Bool Integer SwitchPlan SwitchPlan
237    | JumpTable SwitchTargets
238  deriving Show
239--
240-- Note [createSwitchPlan]
241-- ~~~~~~~~~~~~~~~~~~~~~~~
242--
243-- A SwitchPlan describes how a Switch statement is to be broken down into
244-- smaller pieces suitable for code generation.
245--
246-- createSwitchPlan creates such a switch plan, in these steps:
247--  1. It splits the switch statement at segments of non-default values that
248--     are too large. See splitAtHoles and Note [Magic Constants in GHC.Cmm.Switch]
249--  2. Too small jump tables should be avoided, so we break up smaller pieces
250--     in breakTooSmall.
251--  3. We fill in the segments between those pieces with a jump to the default
252--     label (if there is one), returning a SeparatedList in mkFlatSwitchPlan
253--  4. We find and replace two less-than branches by a single equal-to-test in
254--     findSingleValues
255--  5. The thus collected pieces are assembled to a balanced binary tree.
256
257{-
258  Note [Two alts + default]
259  ~~~~~~~~~~~~~~~~~~~~~~~~~
260
261Discussion and a bit more info at #14644
262
263When dealing with a switch of the form:
264switch(e) {
265  case 1: goto l1;
266  case 3000: goto l2;
267  default: goto ldef;
268}
269
270If we treat it as a sparse jump table we would generate:
271
272if (e > 3000) //Check if value is outside of the jump table.
273    goto ldef;
274else {
275    if (e < 3000) { //Compare to upper value
276        if(e != 1) //Compare to remaining value
277            goto ldef;
278          else
279            goto l2;
280    }
281    else
282        goto l1;
283}
284
285Instead we special case this to :
286
287if (e==1) goto l1;
288else if (e==3000) goto l2;
289else goto l3;
290
291This means we have:
292* Less comparisons for: 1,<3000
293* Unchanged for 3000
294* One more for >3000
295
296This improves code in a few ways:
297* One comparison less means smaller code which helps with cache.
298* It exchanges a taken jump for two jumps no taken in the >range case.
299  Jumps not taken are cheaper (See Agner guides) making this about as fast.
300* For all other cases the first range check is removed making it faster.
301
302The end result is that the change is not measurably slower for the case
303>3000 and faster for the other cases.
304
305This makes running this kind of match in an inner loop cheaper by 10-20%
306depending on the data.
307In nofib this improves wheel-sieve1 by 4-9% depending on problem
308size.
309
310We could also add a second conditional jump after the comparison to
311keep the range check like this:
312    cmp 3000, rArgument
313    jg <default>
314    je <branch 2>
315While this is fairly cheap it made no big difference for the >3000 case
316and slowed down all other cases making it not worthwhile.
317-}
318
319
320-- | Does the backend support switch out of the box? Then leave this to the
321-- backend!
322backendSupportsSwitch :: Backend -> Bool
323backendSupportsSwitch ViaC = True
324backendSupportsSwitch LLVM = True
325backendSupportsSwitch _    = False
326
327-- | This function creates a SwitchPlan from a SwitchTargets value, breaking it
328-- down into smaller pieces suitable for code generation.
329createSwitchPlan :: SwitchTargets -> SwitchPlan
330-- Lets do the common case of a singleton map quickly and efficiently (#10677)
331createSwitchPlan (SwitchTargets _signed _range (Just defLabel) m)
332    | [(x, l)] <- M.toList m
333    = IfEqual x l (Unconditionally defLabel)
334-- And another common case, matching "booleans"
335createSwitchPlan (SwitchTargets _signed (lo,hi) Nothing m)
336    | [(x1, l1), (_x2,l2)] <- M.toAscList m
337    --Checking If |range| = 2 is enough if we have two unique literals
338    , hi - lo == 1
339    = IfEqual x1 l1 (Unconditionally l2)
340-- See Note [Two alts + default]
341createSwitchPlan (SwitchTargets _signed _range (Just defLabel) m)
342    | [(x1, l1), (x2,l2)] <- M.toAscList m
343    = IfEqual x1 l1 (IfEqual x2 l2 (Unconditionally defLabel))
344createSwitchPlan (SwitchTargets signed range mbdef m) =
345    -- pprTrace "createSwitchPlan" (text (show ids) $$ text (show (range,m)) $$ text (show pieces) $$ text (show flatPlan) $$ text (show plan)) $
346    plan
347  where
348    pieces = concatMap breakTooSmall $ splitAtHoles maxJumpTableHole m
349    flatPlan = findSingleValues $ mkFlatSwitchPlan signed mbdef range pieces
350    plan = buildTree signed $ flatPlan
351
352
353---
354--- Step 1: Splitting at large holes
355---
356splitAtHoles :: Integer -> M.Map Integer a -> [M.Map Integer a]
357splitAtHoles _        m | M.null m = []
358splitAtHoles holeSize m = map (\range -> restrictMap range m) nonHoles
359  where
360    holes = filter (\(l,h) -> h - l > holeSize) $ zip (M.keys m) (tail (M.keys m))
361    nonHoles = reassocTuples lo holes hi
362
363    (lo,_) = M.findMin m
364    (hi,_) = M.findMax m
365
366---
367--- Step 2: Avoid small jump tables
368---
369-- We do not want jump tables below a certain size. This breaks them up
370-- (into singleton maps, for now).
371breakTooSmall :: M.Map Integer a -> [M.Map Integer a]
372breakTooSmall m
373  | M.size m > minJumpTableSize = [m]
374  | otherwise                   = [M.singleton k v | (k,v) <- M.toList m]
375
376---
377---  Step 3: Fill in the blanks
378---
379
380-- | A FlatSwitchPlan is a list of SwitchPlans, with an integer inbetween every
381-- two entries, dividing the range.
382-- So if we have (abusing list syntax) [plan1,n,plan2], then we use plan1 if
383-- the expression is < n, and plan2 otherwise.
384
385type FlatSwitchPlan = SeparatedList Integer SwitchPlan
386
387mkFlatSwitchPlan :: Bool -> Maybe Label -> (Integer, Integer) -> [M.Map Integer Label] -> FlatSwitchPlan
388
389-- If we have no default (i.e. undefined where there is no entry), we can
390-- branch at the minimum of each map
391mkFlatSwitchPlan _ Nothing _ [] = pprPanic "mkFlatSwitchPlan with nothing left to do" empty
392mkFlatSwitchPlan signed  Nothing _ (m:ms)
393  = (mkLeafPlan signed Nothing m , [ (fst (M.findMin m'), mkLeafPlan signed Nothing m') | m' <- ms ])
394
395-- If we have a default, we have to interleave segments that jump
396-- to the default between the maps
397mkFlatSwitchPlan signed (Just l) r ms = let ((_,p1):ps) = go r ms in (p1, ps)
398  where
399    go (lo,hi) []
400        | lo > hi = []
401        | otherwise = [(lo, Unconditionally l)]
402    go (lo,hi) (m:ms)
403        | lo < min
404        = (lo, Unconditionally l) : go (min,hi) (m:ms)
405        | lo == min
406        = (lo, mkLeafPlan signed (Just l) m) : go (max+1,hi) ms
407        | otherwise
408        = pprPanic "mkFlatSwitchPlan" (integer lo <+> integer min)
409      where
410        min = fst (M.findMin m)
411        max = fst (M.findMax m)
412
413
414mkLeafPlan :: Bool -> Maybe Label -> M.Map Integer Label -> SwitchPlan
415mkLeafPlan signed mbdef m
416    | [(_,l)] <- M.toList m -- singleton map
417    = Unconditionally l
418    | otherwise
419    = JumpTable $ mkSwitchTargets signed (min,max) mbdef m
420  where
421    min = fst (M.findMin m)
422    max = fst (M.findMax m)
423
424---
425---  Step 4: Reduce the number of branches using ==
426---
427
428-- A sequence of three unconditional jumps, with the outer two pointing to the
429-- same value and the bounds off by exactly one can be improved
430findSingleValues :: FlatSwitchPlan -> FlatSwitchPlan
431findSingleValues (Unconditionally l, (i, Unconditionally l2) : (i', Unconditionally l3) : xs)
432  | l == l3 && i + 1 == i'
433  = findSingleValues (IfEqual i l2 (Unconditionally l), xs)
434findSingleValues (p, (i,p'):xs)
435  = (p,i) `consSL` findSingleValues (p', xs)
436findSingleValues (p, [])
437  = (p, [])
438
439---
440---  Step 5: Actually build the tree
441---
442
443-- Build a balanced tree from a separated list
444buildTree :: Bool -> FlatSwitchPlan -> SwitchPlan
445buildTree _ (p,[]) = p
446buildTree signed sl = IfLT signed m (buildTree signed sl1) (buildTree signed sl2)
447  where
448    (sl1, m, sl2) = divideSL sl
449
450
451
452--
453-- Utility data type: Non-empty lists with extra markers in between each
454-- element:
455--
456
457type SeparatedList b a = (a, [(b,a)])
458
459consSL :: (a, b) -> SeparatedList b a -> SeparatedList b a
460consSL (a, b) (a', xs) = (a, (b,a'):xs)
461
462divideSL :: SeparatedList b a -> (SeparatedList b a, b, SeparatedList b a)
463divideSL (_,[]) = error "divideSL: Singleton SeparatedList"
464divideSL (p,xs) = ((p, xs1), m, (p', xs2))
465  where
466    (xs1, (m,p'):xs2) = splitAt (length xs `div` 2) xs
467
468--
469-- Other Utilities
470--
471
472restrictMap :: (Integer,Integer) -> M.Map Integer b -> M.Map Integer b
473restrictMap (lo,hi) m = mid
474  where (_,   mid_hi) = M.split (lo-1) m
475        (mid, _) =      M.split (hi+1) mid_hi
476
477-- for example: reassocTuples a [(b,c),(d,e)] f == [(a,b),(c,d),(e,f)]
478reassocTuples :: a -> [(a,a)] -> a -> [(a,a)]
479reassocTuples initial [] last
480    = [(initial,last)]
481reassocTuples initial ((a,b):tuples) last
482    = (initial,a) : reassocTuples b tuples last
483
484-- Note [GHC.Cmm.Switch vs. GHC.Cmm.Switch.Implement]
485-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
486-- I (Joachim) separated the two somewhat closely related modules
487--
488--  - GHC.Cmm.Switch, which provides the CmmSwitchTargets type and contains the strategy
489--    for implementing a Cmm switch (createSwitchPlan), and
490--  - GHC.Cmm.Switch.Implement, which contains the actual Cmm graph modification,
491--
492-- for these reasons:
493--
494--  * GHC.Cmm.Switch is very low in the dependency tree, i.e. does not depend on any
495--    GHC specific modules at all (with the exception of Output and
496--    GHC.Cmm.Dataflow (Literal)).
497--  * GHC.Cmm.Switch.Implement is the Cmm transformation and hence very high in
498--    the dependency tree.
499--  * GHC.Cmm.Switch provides the CmmSwitchTargets data type, which is abstract, but
500--    used in GHC.Cmm.Node.
501--  * Because GHC.Cmm.Switch is low in the dependency tree, the separation allows
502--    for more parallelism when building GHC.
503--  * The interaction between the modules is very explicit and easy to
504--    understand, due to the small and simple interface.
505