1{-# OPTIONS_GHC -fno-warn-name-shadowing #-} 2{-# LANGUAGE PatternGuards #-} 3module DFAMin (minimizeDFA) where 4 5import AbsSyn 6 7import Data.Map (Map) 8import qualified Data.Map as Map 9import Data.IntSet (IntSet) 10import qualified Data.IntSet as IS 11import Data.IntMap (IntMap) 12import qualified Data.IntMap as IM 13import Data.List as List 14 15 16-- Hopcroft's Algorithm for DFA minimization (cut/pasted from Wikipedia): 17 18-- P := {{all accepting states}, {all nonaccepting states}}; 19-- Q := {{all accepting states}}; 20-- while (Q is not empty) do 21-- choose and remove a set A from Q 22-- for each c in ∑ do 23-- let X be the set of states for which a transition on c leads to a state in A 24-- for each set Y in P for which X ∩ Y is nonempty do 25-- replace Y in P by the two sets X ∩ Y and Y \ X 26-- if Y is in Q 27-- replace Y in Q by the same two sets 28-- else 29-- add the smaller of the two sets to Q 30-- end; 31-- end; 32-- end; 33 34minimizeDFA :: Ord a => DFA Int a -> DFA Int a 35minimizeDFA dfa@(DFA { dfa_start_states = starts, 36 dfa_states = statemap 37 }) 38 = DFA { dfa_start_states = starts, 39 dfa_states = Map.fromList states } 40 where 41 equiv_classes = groupEquivStates dfa 42 43 numbered_states = number (length starts) equiv_classes 44 45 -- assign each state in the minimized DFA a number, making 46 -- sure that we assign the numbers [0..] to the start states. 47 number _ [] = [] 48 number n (ss:sss) = 49 case filter (`IS.member` ss) starts of 50 [] -> (n,ss) : number (n+1) sss 51 starts' -> zip starts' (repeat ss) ++ number n sss 52 -- if one of the states of the minimized DFA corresponds 53 -- to multiple starts states, we just have to duplicate 54 -- that state. 55 56 states = [ 57 let old_states = map (lookup statemap) (IS.toList equiv) 58 accs = map fix_acc (state_acc (head old_states)) 59 -- accepts should all be the same 60 out = IM.fromList [ (b, get_new old) 61 | State _ out <- old_states, 62 (b,old) <- IM.toList out ] 63 in (n, State accs out) 64 | (n, equiv) <- numbered_states 65 ] 66 67 fix_acc acc = acc { accRightCtx = fix_rctxt (accRightCtx acc) } 68 69 fix_rctxt (RightContextRExp s) = RightContextRExp (get_new s) 70 fix_rctxt other = other 71 72 lookup m k = Map.findWithDefault (error "minimizeDFA") k m 73 get_new = lookup old_to_new 74 75 old_to_new :: Map Int Int 76 old_to_new = Map.fromList [ (s,n) | (n,ss) <- numbered_states, 77 s <- IS.toList ss ] 78 79 80groupEquivStates :: (Ord a) => DFA Int a -> [IntSet] 81groupEquivStates DFA { dfa_states = statemap } 82 = go init_p init_q 83 where 84 (accepting, nonaccepting) = Map.partition acc statemap 85 where acc (State as _) = not (List.null as) 86 87 nonaccepting_states = IS.fromList (Map.keys nonaccepting) 88 89 -- group the accepting states into equivalence classes 90 accept_map = {-# SCC "accept_map" #-} 91 foldl' (\m (n,s) -> Map.insertWith (++) (state_acc s) [n] m) 92 Map.empty 93 (Map.toList accepting) 94 95 -- accept_groups :: Ord s => [Set s] 96 accept_groups = map IS.fromList (Map.elems accept_map) 97 98 init_p = nonaccepting_states : accept_groups 99 init_q = accept_groups 100 101 -- map token T to 102 -- a map from state S to the list of states that transition to 103 -- S on token T 104 -- This is a cache of the information needed to compute x below 105 bigmap :: IntMap (IntMap [SNum]) 106 bigmap = IM.fromListWith (IM.unionWith (++)) 107 [ (i, IM.singleton to [from]) 108 | (from, state) <- Map.toList statemap, 109 (i,to) <- IM.toList (state_out state) ] 110 111 -- incoming I A = the set of states that transition to a state in 112 -- A on token I. 113 incoming :: Int -> IntSet -> IntSet 114 incoming i a = IS.fromList (concat ss) 115 where 116 map1 = IM.findWithDefault IM.empty i bigmap 117 ss = [ IM.findWithDefault [] s map1 118 | s <- IS.toList a ] 119 120 -- The outer loop: recurse on each set in Q 121 go p [] = p 122 go p (a:q) = go1 0 p q 123 where 124 -- recurse on each token (0..255) 125 go1 256 p q = go p q 126 go1 i p q = go1 (i+1) p' q' 127 where 128 (p',q') = go2 p [] q 129 130 x = incoming i a 131 132 -- recurse on each set in P 133 go2 [] p' q = (p',q) 134 go2 (y:p) p' q 135 | IS.null i || IS.null d = go2 p (y:p') q 136 | otherwise = go2 p (i:d:p') q1 137 where 138 i = IS.intersection x y 139 d = IS.difference y x 140 141 q1 = replaceyin q 142 where 143 replaceyin [] = 144 if IS.size i < IS.size d then [i] else [d] 145 replaceyin (z:zs) 146 | z == y = i : d : zs 147 | otherwise = z : replaceyin zs 148 149 150 151