1-- | This is the non-capturing form of Text.Regex.TDFA.NewDFA.String 2module Text.Regex.TDFA.NewDFA.Engine_NC(execMatch) where 3 4import Control.Monad(when,join,filterM) 5import Data.Array.Base(unsafeRead,unsafeWrite) 6import Prelude hiding ((!!)) 7 8import Data.Array.MArray(MArray(..)) 9import Data.Array.Unsafe(unsafeFreeze) 10import Data.Array.IArray(Ix) 11import Data.Array.ST(STArray,STUArray) 12import qualified Data.IntMap.CharMap2 as CMap(findWithDefault) 13import qualified Data.IntMap as IMap(null,toList,keys,member) 14import qualified Data.IntSet as ISet(toAscList) 15import Data.STRef(STRef,newSTRef,readSTRef,writeSTRef) 16import qualified Control.Monad.ST.Lazy as L(runST,strictToLazyST) 17import qualified Control.Monad.ST.Strict as S(ST) 18import Data.Sequence(Seq) 19import qualified Data.ByteString.Char8 as SBS(ByteString) 20import qualified Data.ByteString.Lazy.Char8 as LBS(ByteString) 21 22import Text.Regex.Base(MatchArray,MatchOffset,MatchLength) 23import qualified Text.Regex.TDFA.IntArrTrieSet as Trie(lookupAsc) 24import Text.Regex.TDFA.Common hiding (indent) 25import Text.Regex.TDFA.NewDFA.Uncons(Uncons(uncons)) 26import Text.Regex.TDFA.NewDFA.MakeTest(test_singleline,test_multiline) 27 28-- import Debug.Trace 29 30-- trace :: String -> a -> a 31-- trace _ a = a 32 33err :: String -> a 34err s = common_error "Text.Regex.TDFA.NewDFA.Engine_NC" s 35 36{-# INLINE (!!) #-} 37(!!) :: (MArray a e (S.ST s),Ix i) => a i e -> Int -> S.ST s e 38(!!) = unsafeRead 39{-# INLINE set #-} 40set :: (MArray a e (S.ST s),Ix i) => a i e -> Int -> e -> S.ST s () 41set = unsafeWrite 42 43{-# SPECIALIZE execMatch :: Regex -> Position -> Char -> ([] Char) -> [MatchArray] #-} 44{-# SPECIALIZE execMatch :: Regex -> Position -> Char -> (Seq Char) -> [MatchArray] #-} 45{-# SPECIALIZE execMatch :: Regex -> Position -> Char -> SBS.ByteString -> [MatchArray] #-} 46{-# SPECIALIZE execMatch :: Regex -> Position -> Char -> LBS.ByteString -> [MatchArray] #-} 47execMatch :: Uncons text => Regex -> Position -> Char -> text -> [MatchArray] 48execMatch (Regex { regex_dfa = (DFA {d_id=didIn,d_dt=dtIn}) 49 , regex_init = startState 50 , regex_b_index = b_index 51 , regex_trie = trie 52 , regex_compOptions = CompOption { multiline = newline } } ) 53 offsetIn prevIn inputIn = L.runST runCaptureGroup where 54 55 !test = mkTest newline 56 57 runCaptureGroup = {-# SCC "runCaptureGroup" #-} do 58 obtainNext <- L.strictToLazyST constructNewEngine 59 let loop = do vals <- L.strictToLazyST obtainNext 60 if null vals -- force vals before defining valsRest 61 then return [] 62 else do valsRest <- loop 63 return (vals ++ valsRest) 64 loop 65 66 constructNewEngine :: S.ST s (S.ST s [MatchArray]) 67 constructNewEngine = {-# SCC "constructNewEngine" #-} do 68 storeNext <- newSTRef undefined 69 writeSTRef storeNext (goNext storeNext) 70 let obtainNext = join (readSTRef storeNext) 71 return obtainNext 72 73 goNext storeNext = {-# SCC "goNext" #-} do 74 (SScratch s1In s2In winQ) <- newScratch b_index 75 set s1In startState offsetIn 76 writeSTRef storeNext (err "obtainNext called while goNext is running!") 77 eliminatedStateFlag <- newSTRef False 78 let next s1 s2 did dt offset prev input = {-# SCC "goNext.next" #-} 79 case dt of 80 Testing' {dt_test=wt,dt_a=a,dt_b=b} -> 81 if test wt offset prev input 82 then next s1 s2 did a offset prev input 83 else next s1 s2 did b offset prev input 84 Simple' {dt_win=w,dt_trans=t, dt_other=o} 85 | IMap.null w -> 86 case uncons input of 87 Nothing -> finalizeWinners 88 Just (c,input') -> do 89 case CMap.findWithDefault o c t of 90 Transition {trans_many=DFA {d_id=did',d_dt=dt'},trans_how=dtrans} -> 91 findTrans s1 s2 did' dt' dtrans offset c input' 92 | otherwise -> do 93 (did',dt') <- processWinner s1 did dt w offset 94 next' s1 s2 did' dt' offset prev input 95 96 next' s1 s2 did dt offset prev input = {-# SCC "goNext'.next" #-} 97 case dt of 98 Testing' {dt_test=wt,dt_a=a,dt_b=b} -> 99 if test wt offset prev input 100 then next' s1 s2 did a offset prev input 101 else next' s1 s2 did b offset prev input 102 Simple' {dt_trans=t, dt_other=o} -> 103 case uncons input of 104 Nothing -> finalizeWinners 105 Just (c,input') -> do 106 case CMap.findWithDefault o c t of 107 Transition {trans_many=DFA {d_id=did',d_dt=dt'},trans_how=dtrans} -> 108 findTrans s1 s2 did' dt' dtrans offset c input' 109 110 findTrans s1 s2 did' dt' dtrans offset prev' input' = {-# SCC "goNext.findTrans" #-} do 111 -- 112 let findTransTo (destIndex,sources) = do 113 val <- if IMap.null sources then return (succ offset) 114 else return . minimum =<< mapM (s1 !!) (IMap.keys sources) 115 set s2 destIndex val 116 return val 117 earlyStart <- fmap minimum $ mapM findTransTo (IMap.toList dtrans) 118 -- 119 earlyWin <- readSTRef (mq_earliest winQ) 120 if earlyWin < earlyStart 121 then do 122 winnersR <- getMQ earlyStart winQ 123 writeSTRef storeNext (next s2 s1 did' dt' (succ offset) prev' input') 124 mapM wsToGroup (reverse winnersR) 125 else do 126 let offset' = succ offset in seq offset' $ next s2 s1 did' dt' offset' prev' input' 127 128 processWinner s1 did dt w offset = {-# SCC "goNext.newWinnerThenProceed" #-} do 129 let getStart (sourceIndex,_) = s1 !! sourceIndex 130 vals <- mapM getStart (IMap.toList w) 131 let low = minimum vals -- perhaps a non-empty winner 132 high = maximum vals -- perhaps an empty winner 133 if low < offset 134 then do 135 putMQ (WScratch low offset) winQ 136 when (high==offset || IMap.member startState w) $ 137 putMQ (WScratch offset offset) winQ 138 let keepState i1 = do 139 startsAt <- s1 !! i1 140 let keep = (startsAt <= low) || (offset <= startsAt) 141 if keep 142 then return True 143 else if i1 == startState 144 then {- check for additional empty winner -} 145 set s1 i1 (succ offset) >> return True 146 else writeSTRef eliminatedStateFlag True >> return False 147 states' <- filterM keepState (ISet.toAscList did) 148 flag <- readSTRef eliminatedStateFlag 149 if flag 150 then do 151 writeSTRef eliminatedStateFlag False 152 let DFA {d_id=did',d_dt=dt'} = Trie.lookupAsc trie states' 153 return (did',dt') 154 else do 155 return (did,dt) 156 else do 157 -- offset == low == minimum vals == maximum vals == high; vals == [offset] 158 putMQ (WScratch offset offset) winQ 159 return (did,dt) 160 161 finalizeWinners = do 162 winnersR <- readSTRef (mq_list winQ) 163 resetMQ winQ 164 writeSTRef storeNext (return []) 165 mapM wsToGroup (reverse winnersR) 166 167 -- goNext then ends with the next statement 168 next s1In s2In didIn dtIn offsetIn prevIn inputIn 169 170---- 171 172{-# INLINE mkTest #-} 173mkTest :: Uncons text => Bool -> WhichTest -> Index -> Char -> text -> Bool 174mkTest isMultiline = if isMultiline then test_multiline else test_singleline 175 176---- 177 178{- MUTABLE WINNER QUEUE -} 179 180data MQ s = MQ { mq_earliest :: !(STRef s Position) 181 , mq_list :: !(STRef s [WScratch]) 182 } 183 184newMQ :: S.ST s (MQ s) 185newMQ = do 186 earliest <- newSTRef maxBound 187 list <- newSTRef [] 188 return (MQ earliest list) 189 190resetMQ :: MQ s -> S.ST s () 191resetMQ (MQ {mq_earliest=earliest,mq_list=list}) = do 192 writeSTRef earliest maxBound 193 writeSTRef list [] 194 195putMQ :: WScratch -> MQ s -> S.ST s () 196putMQ ws@(WScratch {ws_start=start}) (MQ {mq_earliest=earliest,mq_list=list}) = do 197 startE <- readSTRef earliest 198 if start <= startE 199 then writeSTRef earliest start >> writeSTRef list [ws] 200 else do 201 old <- readSTRef list 202 let !rest = dropWhile (\ w -> start <= ws_start w) old 203 !new = ws : rest 204 writeSTRef list new 205 206getMQ :: Position -> MQ s -> S.ST s [WScratch] 207getMQ pos (MQ {mq_earliest=earliest,mq_list=list}) = do 208 old <- readSTRef list 209 case span (\ w -> pos <= ws_start w) old of 210 ([],ans) -> do 211 writeSTRef earliest maxBound 212 writeSTRef list [] 213 return ans 214 (new,ans) -> do 215 writeSTRef earliest (ws_start (last new)) 216 writeSTRef list new 217 return ans 218 219{- MUTABLE SCRATCH DATA STRUCTURES -} 220 221data SScratch s = SScratch { _s_1 :: !(MScratch s) 222 , _s_2 :: !(MScratch s) 223 , _s_mq :: !(MQ s) 224 } 225type MScratch s = STUArray s Index Position 226data WScratch = WScratch {ws_start,_ws_stop :: !Position} 227 deriving Show 228 229{- DEBUGGING HELPERS -} 230{- CREATING INITIAL MUTABLE SCRATCH DATA STRUCTURES -} 231 232{-# INLINE newA #-} 233newA :: (MArray (STUArray s) e (S.ST s)) => (Tag,Tag) -> e -> S.ST s (STUArray s Tag e) 234newA b_tags initial = newArray b_tags initial 235 236newScratch :: (Index,Index) -> S.ST s (SScratch s) 237newScratch b_index = do 238 s1 <- newMScratch b_index 239 s2 <- newMScratch b_index 240 winQ <- newMQ 241 return (SScratch s1 s2 winQ) 242 243newMScratch :: (Index,Index) -> S.ST s (MScratch s) 244newMScratch b_index = newA b_index (-1) 245 246{- CONVERT WINNERS TO MATCHARRAY -} 247 248wsToGroup :: WScratch -> S.ST s MatchArray 249wsToGroup (WScratch start stop) = do 250 ma <- newArray (0,0) (start,stop-start) :: S.ST s (STArray s Int (MatchOffset,MatchLength)) 251 unsafeFreeze ma 252 253