1{-# LANGUAGE GADTs #-}
2module CmmSwitch (
3     SwitchTargets,
4     mkSwitchTargets,
5     switchTargetsCases, switchTargetsDefault, switchTargetsRange, switchTargetsSigned,
6     mapSwitchTargets, switchTargetsToTable, switchTargetsFallThrough,
7     switchTargetsToList, eqSwitchTargetWith,
8
9     SwitchPlan(..),
10     targetSupportsSwitch,
11     createSwitchPlan,
12  ) where
13
14import GhcPrelude
15
16import Outputable
17import DynFlags
18import Hoopl.Label (Label)
19
20import Data.Maybe
21import Data.List (groupBy)
22import Data.Function (on)
23import qualified Data.Map as M
24
25-- Note [Cmm Switches, the general plan]
26-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
27--
28-- Compiling a high-level switch statement, as it comes out of a STG case
29-- expression, for example, allows for a surprising amount of design decisions.
30-- Therefore, we cleanly separated this from the Stg → Cmm transformation, as
31-- well as from the actual code generation.
32--
33-- The overall plan is:
34--  * The Stg → Cmm transformation creates a single `SwitchTargets` in
35--    emitSwitch and emitCmmLitSwitch in GHC.StgToCmm/Utils.hs.
36--    At this stage, they are unsuitable for code generation.
37--  * A dedicated Cmm transformation (CmmImplementSwitchPlans) replaces these
38--    switch statements with code that is suitable for code generation, i.e.
39--    a nice balanced tree of decisions with dense jump tables in the leafs.
40--    The actual planning of this tree is performed in pure code in createSwitchPlan
41--    in this module. See Note [createSwitchPlan].
42--  * The actual code generation will not do any further processing and
43--    implement each CmmSwitch with a jump tables.
44--
45-- When compiling to LLVM or C, CmmImplementSwitchPlans leaves the switch
46-- statements alone, as we can turn a SwitchTargets value into a nice
47-- switch-statement in LLVM resp. C, and leave the rest to the compiler.
48--
49-- See Note [CmmSwitch vs. CmmImplementSwitchPlans] why the two module are
50-- separated.
51
52-----------------------------------------------------------------------------
53-- Note [Magic Constants in CmmSwitch]
54--
55-- There are a lot of heuristics here that depend on magic values where it is
56-- hard to determine the "best" value (for whatever that means). These are the
57-- magic values:
58
59-- | Number of consecutive default values allowed in a jump table. If there are
60-- more of them, the jump tables are split.
61--
62-- Currently 7, as it costs 7 words of additional code when a jump table is
63-- split (at least on x64, determined experimentally).
64maxJumpTableHole :: Integer
65maxJumpTableHole = 7
66
67-- | Minimum size of a jump table. If the number is smaller, the switch is
68-- implemented using conditionals.
69-- Currently 5, because an if-then-else tree of 4 values is nice and compact.
70minJumpTableSize :: Int
71minJumpTableSize = 5
72
73-- | Minimum non-zero offset for a jump table. See Note [Jump Table Offset].
74minJumpTableOffset :: Integer
75minJumpTableOffset = 2
76
77
78-----------------------------------------------------------------------------
79-- Switch Targets
80
81-- Note [SwitchTargets]:
82-- ~~~~~~~~~~~~~~~~~~~~~
83--
84-- The branches of a switch are stored in a SwitchTargets, which consists of an
85-- (optional) default jump target, and a map from values to jump targets.
86--
87-- If the default jump target is absent, the behaviour of the switch outside the
88-- values of the map is undefined.
89--
90-- We use an Integer for the keys the map so that it can be used in switches on
91-- unsigned as well as signed integers.
92--
93-- The map may be empty (we prune out-of-range branches here, so it could be us
94-- emptying it).
95--
96-- Before code generation, the table needs to be brought into a form where all
97-- entries are non-negative, so that it can be compiled into a jump table.
98-- See switchTargetsToTable.
99
100
101-- | A value of type SwitchTargets contains the alternatives for a 'CmmSwitch'
102-- value, and knows whether the value is signed, the possible range, an
103-- optional default value and a map from values to jump labels.
104data SwitchTargets =
105    SwitchTargets
106        Bool                       -- Signed values
107        (Integer, Integer)         -- Range
108        (Maybe Label)              -- Default value
109        (M.Map Integer Label)      -- The branches
110    deriving (Show, Eq)
111
112-- | The smart constructor mkSwitchTargets normalises the map a bit:
113--  * No entries outside the range
114--  * No entries equal to the default
115--  * No default if all elements have explicit values
116mkSwitchTargets :: Bool -> (Integer, Integer) -> Maybe Label -> M.Map Integer Label -> SwitchTargets
117mkSwitchTargets signed range@(lo,hi) mbdef ids
118    = SwitchTargets signed range mbdef' ids'
119  where
120    ids' = dropDefault $ restrict ids
121    mbdef' | defaultNeeded = mbdef
122           | otherwise     = Nothing
123
124    -- Drop entries outside the range, if there is a range
125    restrict = restrictMap (lo,hi)
126
127    -- Drop entries that equal the default, if there is a default
128    dropDefault | Just l <- mbdef = M.filter (/= l)
129                | otherwise       = id
130
131    -- Check if the default is still needed
132    defaultNeeded = fromIntegral (M.size ids') /= hi-lo+1
133
134
135-- | Changes all labels mentioned in the SwitchTargets value
136mapSwitchTargets :: (Label -> Label) -> SwitchTargets -> SwitchTargets
137mapSwitchTargets f (SwitchTargets signed range mbdef branches)
138    = SwitchTargets signed range (fmap f mbdef) (fmap f branches)
139
140-- | Returns the list of non-default branches of the SwitchTargets value
141switchTargetsCases :: SwitchTargets -> [(Integer, Label)]
142switchTargetsCases (SwitchTargets _ _ _ branches) = M.toList branches
143
144-- | Return the default label of the SwitchTargets value
145switchTargetsDefault :: SwitchTargets -> Maybe Label
146switchTargetsDefault (SwitchTargets _ _ mbdef _) = mbdef
147
148-- | Return the range of the SwitchTargets value
149switchTargetsRange :: SwitchTargets -> (Integer, Integer)
150switchTargetsRange (SwitchTargets _ range _ _) = range
151
152-- | Return whether this is used for a signed value
153switchTargetsSigned :: SwitchTargets -> Bool
154switchTargetsSigned (SwitchTargets signed _ _ _) = signed
155
156-- | switchTargetsToTable creates a dense jump table, usable for code generation.
157--
158-- Also returns an offset to add to the value; the list is 0-based on the
159-- result of that addition.
160--
161-- The conversion from Integer to Int is a bit of a wart, as the actual
162-- scrutinee might be an unsigned word, but it just works, due to wrap-around
163-- arithmetic (as verified by the CmmSwitchTest test case).
164switchTargetsToTable :: SwitchTargets -> (Int, [Maybe Label])
165switchTargetsToTable (SwitchTargets _ (lo,hi) mbdef branches)
166    = (fromIntegral (-start), [ labelFor i | i <- [start..hi] ])
167  where
168    labelFor i = case M.lookup i branches of Just l -> Just l
169                                             Nothing -> mbdef
170    start | lo >= 0 && lo < minJumpTableOffset  = 0  -- See Note [Jump Table Offset]
171          | otherwise                           = lo
172
173-- Note [Jump Table Offset]
174-- ~~~~~~~~~~~~~~~~~~~~~~~~
175--
176-- Usually, the code for a jump table starting at x will first subtract x from
177-- the value, to avoid a large amount of empty entries. But if x is very small,
178-- the extra entries are no worse than the subtraction in terms of code size, and
179-- not having to do the subtraction is quicker.
180--
181-- I.e. instead of
182--     _u20N:
183--             leaq -1(%r14),%rax
184--             jmp *_n20R(,%rax,8)
185--     _n20R:
186--             .quad   _c20p
187--             .quad   _c20q
188-- do
189--     _u20N:
190--             jmp *_n20Q(,%r14,8)
191--
192--     _n20Q:
193--             .quad   0
194--             .quad   _c20p
195--             .quad   _c20q
196--             .quad   _c20r
197
198-- | The list of all labels occuring in the SwitchTargets value.
199switchTargetsToList :: SwitchTargets -> [Label]
200switchTargetsToList (SwitchTargets _ _ mbdef branches)
201    = maybeToList mbdef ++ M.elems branches
202
203-- | Groups cases with equal targets, suitable for pretty-printing to a
204-- c-like switch statement with fall-through semantics.
205switchTargetsFallThrough :: SwitchTargets -> ([([Integer], Label)], Maybe Label)
206switchTargetsFallThrough (SwitchTargets _ _ mbdef branches) = (groups, mbdef)
207  where
208    groups = map (\xs -> (map fst xs, snd (head xs))) $
209             groupBy ((==) `on` snd) $
210             M.toList branches
211
212-- | Custom equality helper, needed for "CmmCommonBlockElim"
213eqSwitchTargetWith :: (Label -> Label -> Bool) -> SwitchTargets -> SwitchTargets -> Bool
214eqSwitchTargetWith eq (SwitchTargets signed1 range1 mbdef1 ids1) (SwitchTargets signed2 range2 mbdef2 ids2) =
215    signed1 == signed2 && range1 == range2 && goMB mbdef1 mbdef2 && goList (M.toList ids1) (M.toList ids2)
216  where
217    goMB Nothing Nothing = True
218    goMB (Just l1) (Just l2) = l1 `eq` l2
219    goMB _ _ = False
220    goList [] [] = True
221    goList ((i1,l1):ls1) ((i2,l2):ls2) = i1 == i2 && l1 `eq` l2 && goList ls1 ls2
222    goList _ _ = False
223
224-----------------------------------------------------------------------------
225-- Code generation for Switches
226
227
228-- | A SwitchPlan abstractly describes how a Switch statement ought to be
229-- implemented. See Note [createSwitchPlan]
230data SwitchPlan
231    = Unconditionally Label
232    | IfEqual Integer Label SwitchPlan
233    | IfLT Bool Integer SwitchPlan SwitchPlan
234    | JumpTable SwitchTargets
235  deriving Show
236--
237-- Note [createSwitchPlan]
238-- ~~~~~~~~~~~~~~~~~~~~~~~
239--
240-- A SwitchPlan describes how a Switch statement is to be broken down into
241-- smaller pieces suitable for code generation.
242--
243-- createSwitchPlan creates such a switch plan, in these steps:
244--  1. It splits the switch statement at segments of non-default values that
245--     are too large. See splitAtHoles and Note [Magic Constants in CmmSwitch]
246--  2. Too small jump tables should be avoided, so we break up smaller pieces
247--     in breakTooSmall.
248--  3. We fill in the segments between those pieces with a jump to the default
249--     label (if there is one), returning a SeparatedList in mkFlatSwitchPlan
250--  4. We find and replace two less-than branches by a single equal-to-test in
251--     findSingleValues
252--  5. The thus collected pieces are assembled to a balanced binary tree.
253
254{-
255  Note [Two alts + default]
256  ~~~~~~~~~~~~~~~~~~~~~~~~~
257
258Discussion and a bit more info at #14644
259
260When dealing with a switch of the form:
261switch(e) {
262  case 1: goto l1;
263  case 3000: goto l2;
264  default: goto ldef;
265}
266
267If we treat it as a sparse jump table we would generate:
268
269if (e > 3000) //Check if value is outside of the jump table.
270    goto ldef;
271else {
272    if (e < 3000) { //Compare to upper value
273        if(e != 1) //Compare to remaining value
274            goto ldef;
275          else
276            goto l2;
277    }
278    else
279        goto l1;
280}
281
282Instead we special case this to :
283
284if (e==1) goto l1;
285else if (e==3000) goto l2;
286else goto l3;
287
288This means we have:
289* Less comparisons for: 1,<3000
290* Unchanged for 3000
291* One more for >3000
292
293This improves code in a few ways:
294* One comparison less means smaller code which helps with cache.
295* It exchanges a taken jump for two jumps no taken in the >range case.
296  Jumps not taken are cheaper (See Agner guides) making this about as fast.
297* For all other cases the first range check is removed making it faster.
298
299The end result is that the change is not measurably slower for the case
300>3000 and faster for the other cases.
301
302This makes running this kind of match in an inner loop cheaper by 10-20%
303depending on the data.
304In nofib this improves wheel-sieve1 by 4-9% depending on problem
305size.
306
307We could also add a second conditional jump after the comparison to
308keep the range check like this:
309    cmp 3000, rArgument
310    jg <default>
311    je <branch 2>
312While this is fairly cheap it made no big difference for the >3000 case
313and slowed down all other cases making it not worthwhile.
314-}
315
316
317-- | Does the target support switch out of the box? Then leave this to the
318-- target!
319targetSupportsSwitch :: HscTarget -> Bool
320targetSupportsSwitch HscC = True
321targetSupportsSwitch HscLlvm = True
322targetSupportsSwitch _ = False
323
324-- | This function creates a SwitchPlan from a SwitchTargets value, breaking it
325-- down into smaller pieces suitable for code generation.
326createSwitchPlan :: SwitchTargets -> SwitchPlan
327-- Lets do the common case of a singleton map quicky and efficiently (#10677)
328createSwitchPlan (SwitchTargets _signed _range (Just defLabel) m)
329    | [(x, l)] <- M.toList m
330    = IfEqual x l (Unconditionally defLabel)
331-- And another common case, matching "booleans"
332createSwitchPlan (SwitchTargets _signed (lo,hi) Nothing m)
333    | [(x1, l1), (_x2,l2)] <- M.toAscList m
334    --Checking If |range| = 2 is enough if we have two unique literals
335    , hi - lo == 1
336    = IfEqual x1 l1 (Unconditionally l2)
337-- See Note [Two alts + default]
338createSwitchPlan (SwitchTargets _signed _range (Just defLabel) m)
339    | [(x1, l1), (x2,l2)] <- M.toAscList m
340    = IfEqual x1 l1 (IfEqual x2 l2 (Unconditionally defLabel))
341createSwitchPlan (SwitchTargets signed range mbdef m) =
342    -- pprTrace "createSwitchPlan" (text (show ids) $$ text (show (range,m)) $$ text (show pieces) $$ text (show flatPlan) $$ text (show plan)) $
343    plan
344  where
345    pieces = concatMap breakTooSmall $ splitAtHoles maxJumpTableHole m
346    flatPlan = findSingleValues $ mkFlatSwitchPlan signed mbdef range pieces
347    plan = buildTree signed $ flatPlan
348
349
350---
351--- Step 1: Splitting at large holes
352---
353splitAtHoles :: Integer -> M.Map Integer a -> [M.Map Integer a]
354splitAtHoles _        m | M.null m = []
355splitAtHoles holeSize m = map (\range -> restrictMap range m) nonHoles
356  where
357    holes = filter (\(l,h) -> h - l > holeSize) $ zip (M.keys m) (tail (M.keys m))
358    nonHoles = reassocTuples lo holes hi
359
360    (lo,_) = M.findMin m
361    (hi,_) = M.findMax m
362
363---
364--- Step 2: Avoid small jump tables
365---
366-- We do not want jump tables below a certain size. This breaks them up
367-- (into singleton maps, for now).
368breakTooSmall :: M.Map Integer a -> [M.Map Integer a]
369breakTooSmall m
370  | M.size m > minJumpTableSize = [m]
371  | otherwise                   = [M.singleton k v | (k,v) <- M.toList m]
372
373---
374---  Step 3: Fill in the blanks
375---
376
377-- | A FlatSwitchPlan is a list of SwitchPlans, with an integer inbetween every
378-- two entries, dividing the range.
379-- So if we have (abusing list syntax) [plan1,n,plan2], then we use plan1 if
380-- the expression is < n, and plan2 otherwise.
381
382type FlatSwitchPlan = SeparatedList Integer SwitchPlan
383
384mkFlatSwitchPlan :: Bool -> Maybe Label -> (Integer, Integer) -> [M.Map Integer Label] -> FlatSwitchPlan
385
386-- If we have no default (i.e. undefined where there is no entry), we can
387-- branch at the minimum of each map
388mkFlatSwitchPlan _ Nothing _ [] = pprPanic "mkFlatSwitchPlan with nothing left to do" empty
389mkFlatSwitchPlan signed  Nothing _ (m:ms)
390  = (mkLeafPlan signed Nothing m , [ (fst (M.findMin m'), mkLeafPlan signed Nothing m') | m' <- ms ])
391
392-- If we have a default, we have to interleave segments that jump
393-- to the default between the maps
394mkFlatSwitchPlan signed (Just l) r ms = let ((_,p1):ps) = go r ms in (p1, ps)
395  where
396    go (lo,hi) []
397        | lo > hi = []
398        | otherwise = [(lo, Unconditionally l)]
399    go (lo,hi) (m:ms)
400        | lo < min
401        = (lo, Unconditionally l) : go (min,hi) (m:ms)
402        | lo == min
403        = (lo, mkLeafPlan signed (Just l) m) : go (max+1,hi) ms
404        | otherwise
405        = pprPanic "mkFlatSwitchPlan" (integer lo <+> integer min)
406      where
407        min = fst (M.findMin m)
408        max = fst (M.findMax m)
409
410
411mkLeafPlan :: Bool -> Maybe Label -> M.Map Integer Label -> SwitchPlan
412mkLeafPlan signed mbdef m
413    | [(_,l)] <- M.toList m -- singleton map
414    = Unconditionally l
415    | otherwise
416    = JumpTable $ mkSwitchTargets signed (min,max) mbdef m
417  where
418    min = fst (M.findMin m)
419    max = fst (M.findMax m)
420
421---
422---  Step 4: Reduce the number of branches using ==
423---
424
425-- A sequence of three unconditional jumps, with the outer two pointing to the
426-- same value and the bounds off by exactly one can be improved
427findSingleValues :: FlatSwitchPlan -> FlatSwitchPlan
428findSingleValues (Unconditionally l, (i, Unconditionally l2) : (i', Unconditionally l3) : xs)
429  | l == l3 && i + 1 == i'
430  = findSingleValues (IfEqual i l2 (Unconditionally l), xs)
431findSingleValues (p, (i,p'):xs)
432  = (p,i) `consSL` findSingleValues (p', xs)
433findSingleValues (p, [])
434  = (p, [])
435
436---
437---  Step 5: Actually build the tree
438---
439
440-- Build a balanced tree from a separated list
441buildTree :: Bool -> FlatSwitchPlan -> SwitchPlan
442buildTree _ (p,[]) = p
443buildTree signed sl = IfLT signed m (buildTree signed sl1) (buildTree signed sl2)
444  where
445    (sl1, m, sl2) = divideSL sl
446
447
448
449--
450-- Utility data type: Non-empty lists with extra markers in between each
451-- element:
452--
453
454type SeparatedList b a = (a, [(b,a)])
455
456consSL :: (a, b) -> SeparatedList b a -> SeparatedList b a
457consSL (a, b) (a', xs) = (a, (b,a'):xs)
458
459divideSL :: SeparatedList b a -> (SeparatedList b a, b, SeparatedList b a)
460divideSL (_,[]) = error "divideSL: Singleton SeparatedList"
461divideSL (p,xs) = ((p, xs1), m, (p', xs2))
462  where
463    (xs1, (m,p'):xs2) = splitAt (length xs `div` 2) xs
464
465--
466-- Other Utilities
467--
468
469restrictMap :: (Integer,Integer) -> M.Map Integer b -> M.Map Integer b
470restrictMap (lo,hi) m = mid
471  where (_,   mid_hi) = M.split (lo-1) m
472        (mid, _) =      M.split (hi+1) mid_hi
473
474-- for example: reassocTuples a [(b,c),(d,e)] f == [(a,b),(c,d),(e,f)]
475reassocTuples :: a -> [(a,a)] -> a -> [(a,a)]
476reassocTuples initial [] last
477    = [(initial,last)]
478reassocTuples initial ((a,b):tuples) last
479    = (initial,a) : reassocTuples b tuples last
480
481-- Note [CmmSwitch vs. CmmImplementSwitchPlans]
482-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
483-- I (Joachim) separated the two somewhat closely related modules
484--
485--  - CmmSwitch, which provides the CmmSwitchTargets type and contains the strategy
486--    for implementing a Cmm switch (createSwitchPlan), and
487--  - CmmImplementSwitchPlans, which contains the actuall Cmm graph modification,
488--
489-- for these reasons:
490--
491--  * CmmSwitch is very low in the dependency tree, i.e. does not depend on any
492--    GHC specific modules at all (with the exception of Output and Hoople
493--    (Literal)). CmmImplementSwitchPlans is the Cmm transformation and hence very
494--    high in the dependency tree.
495--  * CmmSwitch provides the CmmSwitchTargets data type, which is abstract, but
496--    used in CmmNodes.
497--  * Because CmmSwitch is low in the dependency tree, the separation allows
498--    for more parallelism when building GHC.
499--  * The interaction between the modules is very explicit and easy to
500--    understand, due to the small and simple interface.
501