1-- | This is the non-capturing form of Text.Regex.TDFA.NewDFA.String
2module Text.Regex.TDFA.NewDFA.Engine_NC(execMatch) where
3
4import Control.Monad(when,join,filterM)
5import Data.Array.Base(unsafeRead,unsafeWrite)
6import Prelude hiding ((!!))
7
8import Data.Array.MArray(MArray(..))
9import Data.Array.Unsafe(unsafeFreeze)
10import Data.Array.IArray(Ix)
11import Data.Array.ST(STArray,STUArray)
12import qualified Data.IntMap.CharMap2 as CMap(findWithDefault)
13import qualified Data.IntMap as IMap(null,toList,keys,member)
14import qualified Data.IntSet as ISet(toAscList)
15import Data.STRef(STRef,newSTRef,readSTRef,writeSTRef)
16import qualified Control.Monad.ST.Lazy as L(runST,strictToLazyST)
17import qualified Control.Monad.ST.Strict as S(ST)
18import Data.Sequence(Seq)
19import qualified Data.ByteString.Char8 as SBS(ByteString)
20import qualified Data.ByteString.Lazy.Char8 as LBS(ByteString)
21
22import Text.Regex.Base(MatchArray,MatchOffset,MatchLength)
23import qualified Text.Regex.TDFA.IntArrTrieSet as Trie(lookupAsc)
24import Text.Regex.TDFA.Common hiding (indent)
25import Text.Regex.TDFA.NewDFA.Uncons(Uncons(uncons))
26import Text.Regex.TDFA.NewDFA.MakeTest(test_singleline,test_multiline)
27
28-- import Debug.Trace
29
30-- trace :: String -> a -> a
31-- trace _ a = a
32
33err :: String -> a
34err s = common_error "Text.Regex.TDFA.NewDFA.Engine_NC"  s
35
36{-# INLINE (!!) #-}
37(!!) :: (MArray a e (S.ST s),Ix i) => a i e -> Int -> S.ST s e
38(!!) = unsafeRead
39{-# INLINE set #-}
40set :: (MArray a e (S.ST s),Ix i) => a i e -> Int -> e -> S.ST s ()
41set = unsafeWrite
42
43{-# SPECIALIZE execMatch :: Regex -> Position -> Char -> ([] Char) -> [MatchArray] #-}
44{-# SPECIALIZE execMatch :: Regex -> Position -> Char -> (Seq Char) -> [MatchArray] #-}
45{-# SPECIALIZE execMatch :: Regex -> Position -> Char -> SBS.ByteString -> [MatchArray] #-}
46{-# SPECIALIZE execMatch :: Regex -> Position -> Char -> LBS.ByteString -> [MatchArray] #-}
47execMatch :: Uncons text => Regex -> Position -> Char -> text -> [MatchArray]
48execMatch (Regex { regex_dfa = (DFA {d_id=didIn,d_dt=dtIn})
49                 , regex_init = startState
50                 , regex_b_index = b_index
51                 , regex_trie = trie
52                 , regex_compOptions = CompOption { multiline = newline } } )
53          offsetIn prevIn inputIn = L.runST runCaptureGroup where
54
55  !test = mkTest newline
56
57  runCaptureGroup = {-# SCC "runCaptureGroup" #-} do
58    obtainNext <- L.strictToLazyST constructNewEngine
59    let loop = do vals <- L.strictToLazyST obtainNext
60                  if null vals -- force vals before defining valsRest
61                    then return []
62                    else do valsRest <- loop
63                            return (vals ++ valsRest)
64    loop
65
66  constructNewEngine :: S.ST s (S.ST s [MatchArray])
67  constructNewEngine =  {-# SCC "constructNewEngine" #-} do
68    storeNext <- newSTRef undefined
69    writeSTRef storeNext (goNext storeNext)
70    let obtainNext = join (readSTRef storeNext)
71    return obtainNext
72
73  goNext storeNext = {-# SCC "goNext" #-} do
74    (SScratch s1In s2In winQ) <- newScratch b_index
75    set s1In startState offsetIn
76    writeSTRef storeNext (err "obtainNext called while goNext is running!")
77    eliminatedStateFlag <- newSTRef False
78    let next s1 s2 did dt offset prev input = {-# SCC "goNext.next" #-}
79          case dt of
80            Testing' {dt_test=wt,dt_a=a,dt_b=b} ->
81              if test wt offset prev input
82                then next s1 s2 did a offset prev input
83                else next s1 s2 did b offset prev input
84            Simple' {dt_win=w,dt_trans=t, dt_other=o}
85              | IMap.null w ->
86                  case uncons input of
87                    Nothing -> finalizeWinners
88                    Just (c,input') -> do
89                      case CMap.findWithDefault o c t of
90                        Transition {trans_many=DFA {d_id=did',d_dt=dt'},trans_how=dtrans} ->
91                          findTrans s1 s2 did' dt' dtrans offset c input'
92              | otherwise -> do
93                  (did',dt') <- processWinner s1 did dt w offset
94                  next' s1 s2 did' dt' offset prev input
95
96        next' s1 s2 did dt offset prev input = {-# SCC "goNext'.next" #-}
97          case dt of
98            Testing' {dt_test=wt,dt_a=a,dt_b=b} ->
99              if test wt offset prev input
100                then next' s1 s2 did a offset prev input
101                else next' s1 s2 did b offset prev input
102            Simple' {dt_trans=t, dt_other=o} ->
103              case uncons input of
104                Nothing -> finalizeWinners
105                Just (c,input') -> do
106                  case CMap.findWithDefault o c t of
107                    Transition {trans_many=DFA {d_id=did',d_dt=dt'},trans_how=dtrans} ->
108                      findTrans s1 s2 did' dt' dtrans offset c input'
109
110        findTrans s1 s2 did' dt' dtrans offset prev' input' =  {-# SCC "goNext.findTrans" #-} do
111          --
112          let findTransTo (destIndex,sources) = do
113                val <- if IMap.null sources then return (succ offset)
114                         else return . minimum =<< mapM (s1 !!) (IMap.keys sources)
115                set s2 destIndex val
116                return val
117          earlyStart <- fmap minimum $ mapM findTransTo (IMap.toList dtrans)
118          --
119          earlyWin <- readSTRef (mq_earliest winQ)
120          if earlyWin < earlyStart
121            then do
122              winnersR <- getMQ earlyStart winQ
123              writeSTRef storeNext (next s2 s1 did' dt' (succ offset) prev' input')
124              mapM wsToGroup (reverse winnersR)
125            else do
126              let offset' = succ offset in seq offset' $ next s2 s1 did' dt' offset' prev' input'
127
128        processWinner s1 did dt w offset = {-# SCC "goNext.newWinnerThenProceed" #-} do
129          let getStart (sourceIndex,_) = s1 !! sourceIndex
130          vals <- mapM getStart (IMap.toList w)
131          let low = minimum vals   -- perhaps a non-empty winner
132              high = maximum vals  -- perhaps an empty winner
133          if low < offset
134            then do
135              putMQ (WScratch low offset) winQ
136              when (high==offset || IMap.member startState w) $
137                putMQ (WScratch offset offset) winQ
138              let keepState i1 = do
139                    startsAt <- s1 !! i1
140                    let keep = (startsAt <= low) || (offset <= startsAt)
141                    if keep
142                      then return True
143                      else if i1 == startState
144                             then {- check for additional empty winner -}
145                                  set s1 i1 (succ offset) >> return True
146                             else writeSTRef eliminatedStateFlag True >> return False
147              states' <- filterM keepState (ISet.toAscList did)
148              flag <- readSTRef eliminatedStateFlag
149              if flag
150                then do
151                  writeSTRef eliminatedStateFlag False
152                  let DFA {d_id=did',d_dt=dt'} = Trie.lookupAsc trie states'
153                  return (did',dt')
154                else do
155                  return (did,dt)
156            else do
157               -- offset == low == minimum vals == maximum vals == high; vals == [offset]
158               putMQ (WScratch offset offset) winQ
159               return (did,dt)
160
161        finalizeWinners = do
162          winnersR <- readSTRef (mq_list winQ)
163          resetMQ winQ
164          writeSTRef storeNext (return [])
165          mapM wsToGroup (reverse winnersR)
166
167    -- goNext then ends with the next statement
168    next s1In s2In didIn dtIn offsetIn prevIn inputIn
169
170----
171
172{-# INLINE mkTest #-}
173mkTest :: Uncons text => Bool -> WhichTest -> Index -> Char -> text -> Bool
174mkTest isMultiline = if isMultiline then test_multiline else test_singleline
175
176----
177
178{- MUTABLE WINNER QUEUE -}
179
180data MQ s = MQ { mq_earliest :: !(STRef s Position)
181               , mq_list :: !(STRef s [WScratch])
182               }
183
184newMQ :: S.ST s (MQ s)
185newMQ = do
186  earliest <- newSTRef maxBound
187  list <- newSTRef []
188  return (MQ earliest list)
189
190resetMQ :: MQ s -> S.ST s ()
191resetMQ (MQ {mq_earliest=earliest,mq_list=list}) = do
192  writeSTRef earliest maxBound
193  writeSTRef list []
194
195putMQ :: WScratch -> MQ s -> S.ST s ()
196putMQ ws@(WScratch {ws_start=start}) (MQ {mq_earliest=earliest,mq_list=list}) = do
197  startE <- readSTRef earliest
198  if start <= startE
199    then writeSTRef earliest start >> writeSTRef list [ws]
200    else do
201      old <- readSTRef list
202      let !rest = dropWhile (\ w -> start <= ws_start w) old
203          !new = ws : rest
204      writeSTRef list new
205
206getMQ :: Position -> MQ s -> S.ST s [WScratch]
207getMQ pos (MQ {mq_earliest=earliest,mq_list=list}) = do
208  old <- readSTRef list
209  case span (\ w -> pos <= ws_start w) old of
210    ([],ans) -> do
211      writeSTRef earliest maxBound
212      writeSTRef list []
213      return ans
214    (new,ans) -> do
215      writeSTRef earliest (ws_start (last new))
216      writeSTRef list new
217      return ans
218
219{- MUTABLE SCRATCH DATA STRUCTURES -}
220
221data SScratch s = SScratch { _s_1 :: !(MScratch s)
222                           , _s_2 :: !(MScratch s)
223                           , _s_mq :: !(MQ s)
224                           }
225type MScratch s = STUArray s Index Position
226data WScratch = WScratch {ws_start,_ws_stop :: !Position}
227  deriving Show
228
229{- DEBUGGING HELPERS -}
230{- CREATING INITIAL MUTABLE SCRATCH DATA STRUCTURES -}
231
232{-# INLINE newA #-}
233newA :: (MArray (STUArray s) e (S.ST s)) => (Tag,Tag) -> e -> S.ST s (STUArray s Tag e)
234newA b_tags initial = newArray b_tags initial
235
236newScratch :: (Index,Index) -> S.ST s (SScratch s)
237newScratch b_index = do
238  s1 <- newMScratch b_index
239  s2 <- newMScratch b_index
240  winQ <- newMQ
241  return (SScratch s1 s2 winQ)
242
243newMScratch :: (Index,Index) -> S.ST s (MScratch s)
244newMScratch b_index = newA b_index (-1)
245
246{- CONVERT WINNERS TO MATCHARRAY -}
247
248wsToGroup :: WScratch -> S.ST s MatchArray
249wsToGroup (WScratch start stop) = do
250  ma <- newArray (0,0) (start,stop-start)  :: S.ST s (STArray s Int (MatchOffset,MatchLength))
251  unsafeFreeze ma
252
253