1{-# OPTIONS_GHC -fno-warn-name-shadowing #-}
2{-# LANGUAGE PatternGuards #-}
3module DFAMin (minimizeDFA) where
4
5import AbsSyn
6
7import Data.Map (Map)
8import qualified Data.Map as Map
9import Data.IntSet (IntSet)
10import qualified Data.IntSet as IS
11import Data.IntMap (IntMap)
12import qualified Data.IntMap as IM
13import Data.List as List
14
15
16-- Hopcroft's Algorithm for DFA minimization (cut/pasted from Wikipedia):
17
18-- P := {{all accepting states}, {all nonaccepting states}};
19-- Q := {{all accepting states}};
20-- while (Q is not empty) do
21--      choose and remove a set A from Q
22--      for each c in ∑ do
23--           let X be the set of states for which a transition on c leads to a state in A
24--           for each set Y in P for which X ∩ Y is nonempty do
25--                replace Y in P by the two sets X ∩ Y and Y \ X
26--                if Y is in Q
27--                     replace Y in Q by the same two sets
28--                else
29--                     add the smaller of the two sets to Q
30--           end;
31--      end;
32-- end;
33
34minimizeDFA :: Ord a => DFA Int a -> DFA Int a
35minimizeDFA dfa@(DFA { dfa_start_states = starts,
36                       dfa_states       = statemap
37                     })
38  = DFA { dfa_start_states = starts,
39          dfa_states       = Map.fromList states }
40  where
41      equiv_classes   = groupEquivStates dfa
42
43      numbered_states = number (length starts) equiv_classes
44
45      -- assign each state in the minimized DFA a number, making
46      -- sure that we assign the numbers [0..] to the start states.
47      number _ [] = []
48      number n (ss:sss) =
49        case filter (`IS.member` ss) starts of
50          []      -> (n,ss) : number (n+1) sss
51          starts' -> zip starts' (repeat ss) ++ number n sss
52          -- if one of the states of the minimized DFA corresponds
53          -- to multiple starts states, we just have to duplicate
54          -- that state.
55
56      states = [
57                let old_states = map (lookup statemap) (IS.toList equiv)
58                    accs = map fix_acc (state_acc (head old_states))
59                           -- accepts should all be the same
60                    out  = IM.fromList [ (b, get_new old)
61                                           | State _ out <- old_states,
62                                             (b,old) <- IM.toList out ]
63                in (n, State accs out)
64               | (n, equiv) <- numbered_states
65               ]
66
67      fix_acc acc = acc { accRightCtx = fix_rctxt (accRightCtx acc) }
68
69      fix_rctxt (RightContextRExp s) = RightContextRExp (get_new s)
70      fix_rctxt other = other
71
72      lookup m k = Map.findWithDefault (error "minimizeDFA") k m
73      get_new = lookup old_to_new
74
75      old_to_new :: Map Int Int
76      old_to_new = Map.fromList [ (s,n) | (n,ss) <- numbered_states,
77                                          s <- IS.toList ss ]
78
79
80groupEquivStates :: (Ord a) => DFA Int a -> [IntSet]
81groupEquivStates DFA { dfa_states = statemap }
82  = go init_p init_q
83  where
84    (accepting, nonaccepting) = Map.partition acc statemap
85       where acc (State as _) = not (List.null as)
86
87    nonaccepting_states = IS.fromList (Map.keys nonaccepting)
88
89    -- group the accepting states into equivalence classes
90    accept_map = {-# SCC "accept_map" #-}
91      foldl' (\m (n,s) -> Map.insertWith (++) (state_acc s) [n] m)
92             Map.empty
93             (Map.toList accepting)
94
95    -- accept_groups :: Ord s => [Set s]
96    accept_groups = map IS.fromList (Map.elems accept_map)
97
98    init_p = nonaccepting_states : accept_groups
99    init_q = accept_groups
100
101    -- map token T to
102    --   a map from state S to the list of states that transition to
103    --   S on token T
104    -- This is a cache of the information needed to compute x below
105    bigmap :: IntMap (IntMap [SNum])
106    bigmap = IM.fromListWith (IM.unionWith (++))
107                [ (i, IM.singleton to [from])
108                | (from, state) <- Map.toList statemap,
109                  (i,to) <- IM.toList (state_out state) ]
110
111    -- incoming I A = the set of states that transition to a state in
112    -- A on token I.
113    incoming :: Int -> IntSet -> IntSet
114    incoming i a = IS.fromList (concat ss)
115       where
116         map1 = IM.findWithDefault IM.empty i bigmap
117         ss = [ IM.findWithDefault [] s map1
118              | s <- IS.toList a ]
119
120    -- The outer loop: recurse on each set in Q
121    go p [] = p
122    go p (a:q) = go1 0 p q
123     where
124       -- recurse on each token (0..255)
125       go1 256 p q = go p q
126       go1 i   p q = go1 (i+1) p' q'
127          where
128            (p',q') = go2 p [] q
129
130            x = incoming i a
131
132            -- recurse on each set in P
133            go2 []    p' q = (p',q)
134            go2 (y:p) p' q
135              | IS.null i || IS.null d = go2 p (y:p') q
136              | otherwise              = go2 p (i:d:p') q1
137              where
138                    i = IS.intersection x y
139                    d = IS.difference y x
140
141                    q1 = replaceyin q
142                           where
143                             replaceyin [] =
144                                if IS.size i < IS.size d then [i] else [d]
145                             replaceyin (z:zs)
146                                | z == y    = i : d : zs
147                                | otherwise = z : replaceyin zs
148
149
150
151