1-- | The CorePattern module deconstructs the Pattern tree created by
2-- ReadRegex.parseRegex and returns a simpler Q\/P tree with
3-- annotations at each Q node.  This will be converted by the TNFA
4-- module into a QNFA finite automata.
5--
6-- Of particular note, this Pattern to Q\/P conversion creates and
7-- assigns all the internal Tags that will be used during the matching
8-- process, and associates the captures groups with the tags that
9-- represent their starting and ending locations and with their
10-- immediate parent group.
11--
12-- Each Maximize and Minimize tag is held as either a preTag or a
13-- postTag by one and only one location in the Q\/P tree.  The Orbit
14-- tags are each held by one and only one Star node.  Tags that stop a
15-- Group are also held in perhaps numerous preReset lists.
16--
17-- The additional nullQ::nullView field of Q records the potentially
18-- complex information about what tests and tags must be used if the
19-- pattern unQ::P matches 0 zero characters.  There can be redundancy
20-- in nullView, which is eliminated by cleanNullView.
21--
22-- Uses recursive do notation.
23--
24-- 2009 XXX TODO: we can avoid needing tags in the part of the pattern
25-- after the last capturing group (when right-associative).  This is
26-- flipped for left-associative where the front of the pattern before
27-- the first capturing group needs no tags.  The edge of these regions
28-- is subtle: both case needs a Maximize tag.  One ought to be able to
29-- check the Pattern: if the root is PConcat then a scan from the end
30-- (start) looking for the first with an embedded PGroup can be found
31-- and the PGroup free elements can be wrapped in some new PNOTAG
32-- semantic indicator.
33module Text.Regex.TDFA.CorePattern(Q(..),P(..),WhichTest(..),Wanted(..)
34                                  ,TestInfo,OP(..),SetTestInfo(..),NullView
35                                  ,patternToQ,cleanNullView,cannotAccept,mustAccept) where
36
37import Control.Monad.RWS {- all -}
38import Data.Array.IArray(Array,(!),accumArray,listArray)
39import Data.List(sort)
40import Data.IntMap.EnumMap2(EnumMap)
41import qualified Data.IntMap.EnumMap2 as Map(singleton,null,assocs,keysSet)
42--import Data.Maybe(isNothing)
43import Data.IntSet.EnumSet2(EnumSet)
44import qualified Data.IntSet.EnumSet2 as Set(singleton,toList,isSubsetOf)
45import Data.Semigroup as Sem
46import Text.Regex.TDFA.Common {- all -}
47import Text.Regex.TDFA.Pattern(Pattern(..),starTrans)
48-- import Debug.Trace
49
50{- By Chris Kuklewicz, 2007. BSD License, see the LICENSE file. -}
51
52
53--err :: String -> a
54--err = common_error "Text.Regex.TDFA.CorePattern"
55
56--debug :: (Show a) => a -> b -> b
57--debug _ = id
58
59-- Core Pattern Language
60data P = Empty                       -- Could be replaced by (Test Nothing)??
61       | Or [Q]
62       | Seq Q Q
63       | Star { getOrbit :: Maybe Tag -- tag to prioritize the need to keep track of length of each pass though q
64              , resetOrbits :: [Tag]  -- child star's orbits to reset (ResetOrbitTask) at all depths
65              , firstNull :: Bool     -- Usually True to mean the first pass may match 0 characters
66              , unStar :: Q}
67       | Test TestInfo               -- Require the test to be true (merge with empty as (Test (Maybe TestInfo)) ??)
68       | OneChar Pattern             -- Bring the Pattern element that accepts a character
69       | NonEmpty Q                  -- Don't let the Q pattern match nothing
70         deriving (Show,Eq)
71
72-- The diagnostics about the pattern.  Note that when unQ is 'Seq' the
73-- the preTag and postTag are Nothing but the preReset might have tags
74-- from PGroup injecting them.
75data Q = Q {nullQ :: NullView                  -- Ordered list of nullable views
76           ,takes :: (Position,Maybe Position) -- Range of number of accepted characters
77           ,preReset :: [Tag]                  -- Tags to "reset" (ResetGroupStopTask) (Only immediate children for efficiency)
78           ,postSet :: [Tag]                   -- Tags to "set" (SetGroupStopTask)
79           ,preTag,postTag :: Maybe Tag        -- Tags assigned around this pattern (TagTask)
80           ,tagged :: Bool                     -- Whether this node should be tagged -- patternToQ use only
81           ,childGroups :: Bool                -- Whether unQ has any PGroups -- patternToQ use only
82           ,wants :: Wanted                    -- What kind of continuation is used by this pattern
83           ,unQ :: P} deriving (Eq)
84
85type TestInfo = (WhichTest,DoPa)
86
87-- This is newtype'd to allow control over class instances
88-- This is a set of WhichTest where each test has associated pattern location information
89newtype SetTestInfo = SetTestInfo {getTests :: EnumMap WhichTest (EnumSet DoPa)} deriving (Eq)
90
91instance Semigroup SetTestInfo where
92  SetTestInfo x <> SetTestInfo y = SetTestInfo (x Sem.<> y)
93
94instance Monoid SetTestInfo where
95  mempty = SetTestInfo mempty
96  mappend = (Sem.<>)
97
98instance Show SetTestInfo where
99  show (SetTestInfo sti) = "SetTestInfo "++show (mapSnd (Set.toList) $ Map.assocs sti)
100
101-- There may be several distinct ways for a subtree to conditionally
102-- (i.e. with a Test) or unconditionally accept 0 characters.  These
103-- are in the list in order of preference, with most preferred listed
104-- first.
105type NullView = [(SetTestInfo,TagList)]  -- Ordered list of null views, each is a set of tests and tags
106
107-- During the depth first traversal, children are told about tags by the parent.
108-- They may change Apply to Advice and they may generate new tags.
109data HandleTag = NoTag             -- No tag at this boundary
110               | Advice Tag        -- tag at this boundary, applied at higher level in tree
111               | Apply Tag         -- tag at this boundary, may be applied at this node or passed to one child
112                 deriving (Show)
113
114-- Nodes in the tree are labeled by the type kind of continuation they
115-- prefer to be passed when processing.  This makes it possible to
116-- create a smaller number of QNFA states and avoid creating wasteful
117-- QNFA states that won't be reachable in the final automata.
118--
119-- In practice WantsBoth is treated identically to WantsQNFA and
120-- WantsBoth could be removed.
121data Wanted = WantsQNFA | WantsQT | WantsBoth | WantsEither deriving (Eq,Show)
122
123instance Show Q where
124  show = showQ
125
126showQ :: Q -> String
127showQ q = "Q { nullQ = "++show (nullQ q)++
128        "\n  , takes = "++show (takes q)++
129        "\n  , preReset = "++show (preReset q)++
130        "\n  , postSet = "++show (postSet q)++
131        "\n  , preTag = "++show (preTag q)++
132        "\n  , postTag = "++show (postTag q)++
133        "\n  , tagged = "++show (tagged q)++
134        "\n  , wants = "++show (wants q)++
135        "\n  , unQ = "++ indent' (unQ q)++" }"
136   where indent' = unlines . (\s -> case s of
137                                      [] -> []
138                                      (h:t) -> h : (map (spaces ++) t)) . lines . show
139         spaces = replicate 10 ' '
140
141-- Smart constructors for NullView
142notNull :: NullView
143notNull = []
144
145-- Shorthand for combining a preTag and a postTag
146-- preTags :: Maybe Tag -> Maybe Tag -> TagList
147-- preTags a b = promote a `mappend` promote b
148--   where promote = maybe [] (\x -> [(x,PreUpdate TagTask)])
149
150promotePreTag :: HandleTag -> TagList
151promotePreTag = maybe [] (\x -> [(x,PreUpdate TagTask)]) . apply
152
153makeEmptyNullView :: HandleTag -> HandleTag -> NullView
154makeEmptyNullView a b = [(mempty, promotePreTag a ++ promotePreTag b)]
155
156makeTestNullView ::  TestInfo -> HandleTag -> HandleTag -> NullView
157makeTestNullView (w,d) a b = [(SetTestInfo (Map.singleton w (Set.singleton d)), promotePreTag a ++ promotePreTag b)]
158
159tagWrapNullView :: HandleTag -> HandleTag -> NullView -> NullView
160tagWrapNullView a b oldNV =
161  case (promotePreTag a, promotePreTag b) of
162    ([],[]) -> oldNV
163    (pre,post) -> do
164      (oldTests,oldTasks) <- oldNV
165      return (oldTests,pre++oldTasks++post)
166
167-- For PGroup, need to prepend reset tasks before others in nullView
168addGroupResetsToNullView :: [Tag] -> Tag -> NullView -> NullView
169addGroupResetsToNullView groupResets groupSet nv = [ (test, prepend (append tags) ) | (test,tags) <- nv ]
170  where prepend = foldr (\h t -> (h:).t) id . map (\tag->(tag,PreUpdate ResetGroupStopTask)) $ groupResets
171        append = (++[(groupSet,PreUpdate SetGroupStopTask)])
172
173-- For PStar, need to put in the orbit TagTasks
174orbitWrapNullView :: Maybe Tag -> [Tag] -> NullView -> NullView
175orbitWrapNullView mOrbit orbitResets oldNV =
176  case (mOrbit,orbitResets) of
177    (Nothing,[]) -> oldNV
178    (Nothing,_) -> do (oldTests,oldTasks) <- oldNV
179                      return (oldTests,prepend oldTasks)
180    (Just o,_) -> do (oldTests,oldTasks) <- oldNV
181                     return (oldTests,prepend $ [(o,PreUpdate EnterOrbitTask)] ++ oldTasks ++ [(o,PreUpdate LeaveOrbitTask)])
182  where prepend = foldr (\h t -> (h:).t) id . map (\tag->(tag,PreUpdate ResetOrbitTask)) $ orbitResets
183
184-- The NullViews are ordered, and later test sets that contain the
185-- tests from any earlier entry will never be chosen.  This function
186-- returns a list with these redundant elements removed.  Note that
187-- the first unconditional entry in the list will be the last entry of
188-- the returned list since the empty set is a subset of any other set.
189cleanNullView :: NullView -> NullView
190cleanNullView [] = []
191cleanNullView (first@(SetTestInfo sti,_):rest) | Map.null sti = first : []  -- optimization
192                                               | otherwise =
193  first : cleanNullView (filter (not . (setTI `Set.isSubsetOf`) . Map.keysSet . getTests . fst) rest)
194  where setTI = Map.keysSet sti
195
196-- Ordered Sequence of two NullViews: all ordered combinations of tests and tags.
197-- Order of <- s1 and <- s2 is deliberately chosen to maintain preference priority
198mergeNullViews :: NullView -> NullView -> NullView
199mergeNullViews s1 s2 = cleanNullView $ do
200  (test1,tag1) <- s1
201  (test2,tag2) <- s2
202  return (mappend test1 test2,mappend tag1 tag2)
203-- mergeNullViews = cleanNullView $ liftM2 (mappend *** mappend)
204
205-- Concatenated two ranges of number of accepted characters
206seqTake :: (Int, Maybe Int) -> (Int, Maybe Int) -> (Int, Maybe Int)
207seqTake (x1,y1) (x2,y2) = (x1+x2,liftM2 (+) y1 y2)
208
209-- Parallel combination of list of ranges of number of accepted characters
210orTakes :: [(Int, Maybe Int)] -> (Int,Maybe Int)
211orTakes [] = (0,Just 0)
212orTakes ts = let (xs,ys) = unzip ts
213             in (minimum xs, foldl1 (liftM2 max) ys)
214
215-- Invariant: apply (toAdvice _ ) == mempty
216apply :: HandleTag -> Maybe Tag
217apply (Apply tag) = Just tag
218apply _ = Nothing
219toAdvice :: HandleTag -> HandleTag
220toAdvice (Apply tag) = Advice tag
221toAdvice s = s
222noTag :: HandleTag -> Bool
223noTag NoTag = True
224noTag _ = False
225fromHandleTag :: HandleTag -> Tag
226fromHandleTag (Apply tag) = tag
227fromHandleTag (Advice tag) = tag
228fromHandleTag _ = error "fromHandleTag"
229
230-- Predicates on the range of number of accepted  characters
231varies :: Q -> Bool
232varies Q {takes = (_,Nothing)} = True
233varies Q {takes = (x,Just y)} = x/=y
234
235mustAccept :: Q -> Bool
236mustAccept q = (0/=) . fst . takes $ q
237
238canAccept :: Q -> Bool
239canAccept q = maybe True (0/=) $ snd . takes $ q
240
241cannotAccept :: Q -> Bool
242cannotAccept q = maybe False (0==) $ snd . takes $ q
243
244-- This converts then input Pattern to an analyzed Q structure with
245-- the tags assigned.
246--
247-- The analysis is filled in by a depth first search and the tags are
248-- created top down and passed to children.  Thus information flows up
249-- from the dfs of the children and simultaneously down in the form of
250-- pre and post HandleTag data.  This bidirectional flow is handled
251-- declaratively by using the MonadFix (i.e. mdo).
252--
253-- Invariant: A tag should exist in Q in exactly one place (and will
254-- be in a preTag,postTag, or getOrbit field).  This is partly because
255-- PGroup needs to know the tags are around precisely the expression
256-- that it wants to record.  If the same tag were in other branches
257-- then this would no longer be true.  The tag may or may not also
258-- show up in one or more preReset list or resetOrbits list.
259--
260-- This invariant is enforced by each node either taking
261-- responsibility (apply) for a passed in / created tag or sending it
262-- to exactly one child node.  Other child nodes need to receive it
263-- via toAdvice.  Leaf nodes are forced to apply any passed tags.
264--
265-- There is a final "qwin of Q {postTag=ISet.singleton 1}" and an
266-- implied initial index tag of 0.
267--
268-- favoring pushing Apply into the child postTag makes PGroup happier
269
270type PM = RWS (Maybe GroupIndex) [Either Tag GroupInfo] ([OP]->[OP],Tag)
271type HHQ = HandleTag  -- m1 : info about left boundaary / preTag
272        -> HandleTag  -- m2 : info about right boundary / postTag
273        -> PM Q
274
275-- There is no group 0 here, since it is always the whole match and has no parent of its own
276makeGroupArray :: GroupIndex -> [GroupInfo] -> Array GroupIndex [GroupInfo]
277makeGroupArray maxGroupIndex groups = accumArray (\earlier later -> later:earlier) [] (1,maxGroupIndex) filler
278    where filler = map (\gi -> (thisIndex gi,gi)) groups
279
280fromRight :: [Either Tag GroupInfo] -> [GroupInfo]
281fromRight [] = []
282fromRight ((Right x):xs) = x:fromRight xs
283fromRight ((Left _):xs) = fromRight xs
284
285partitionEither :: [Either Tag GroupInfo] -> ([Tag],[GroupInfo])
286partitionEither = helper id id where
287  helper :: ([Tag]->[Tag]) -> ([GroupInfo]->[GroupInfo]) -> [Either Tag GroupInfo] -> ([Tag],[GroupInfo])
288  helper ls rs [] = (ls [],rs [])
289  helper ls rs ((Right x):xs) = helper  ls      (rs.(x:)) xs
290  helper ls rs ((Left  x):xs) = helper (ls.(x:)) rs       xs
291
292-- Partial function: assumes starTrans has been run on the Pattern
293-- Note that the lazy dependency chain for this very zigzag:
294--   varies information is sent up the tree
295--   handle tags depend on that and sends m1 m2 down the tree
296--     makeGroup sends some tags to the writer (Right _)
297--     withParent listens to children send group info to writer
298--       and lazily looks resetGroupTags from aGroups, the result of all writer (Right _)
299--       preReset stores the resetGroupTags result of the lookup in the tree
300--     makeOrbit sends some tags to the writer (Left _)
301--     withOrbit listens to children send orbit info to writer for resetOrbitTags
302--   nullQ depends m1 m2 and resetOrbitTags and resetGroupTags and is sent up the tree
303patternToQ :: CompOption -> (Pattern,(GroupIndex,DoPa)) -> (Q,Array Tag OP,Array GroupIndex [GroupInfo])
304patternToQ compOpt (pOrig,(maxGroupIndex,_)) = (tnfa,aTags,aGroups) where
305  (tnfa,(tag_dlist,nextTag),groups) = runRWS monad startReader startState
306  aTags = listArray (0,pred nextTag) (tag_dlist [])
307  aGroups = makeGroupArray maxGroupIndex (fromRight groups)
308
309  -- implicitly inside a PGroup 0 converted into a GroupInfo 0 undefined 0 1
310  monad = go (starTrans pOrig) (Advice 0) (Advice 1)
311  -- startReader is accessed by getParentIndex and changed by nonCapture and withParent
312  startReader :: Maybe GroupIndex
313  startReader = Just 0                           -- start inside group 0, capturing enabled
314  -- The startState is only acted upon in the "uniq" command
315  -- Tag 0 is Minimized and Tag 1 is maximized, next tag has value of 2
316  -- This is regardless of right or left associativity
317  startState :: ([OP]->[OP],Tag)
318  startState = ( (Minimize:) . (Maximize:) , 2)
319
320  -- uniq uses MonadState and always returns an "Apply _" tag
321  {-# INLINE uniq #-}
322  uniq :: String -> PM HandleTag
323  uniq _msg = fmap Apply (uniq' Maximize)
324--  uniq _msg = do x <- fmap Apply (uniq' Maximize)
325--                trace ('\n':msg ++ " Maximize "++show x) $ return x
326--                return x
327
328  ignore :: String -> PM Tag
329  ignore _msg = uniq' Ignore
330--  ignore _msg = do x <- uniq' Ignore
331--                  trace ('\n':msg ++ " Ignore "++show x) $ return x
332--                  return x
333
334  {-# NOINLINE uniq' #-}
335  uniq' :: OP -> PM Tag
336  uniq' newOp = do
337    (op,s) <- get                -- generate the next tag with bias newOp
338    let op' = op . (newOp:)
339        s' = succ s
340    put $! (op',s')
341    return s
342
343  {-# INLINE makeOrbit #-}
344  -- Specialize the monad operations and give more meaningful names
345  -- makeOrbit uses MonadState(uniq) and MonadWriter(tell/Left)
346  makeOrbit :: PM (Maybe Tag)
347  makeOrbit = do x <- uniq' Orbit
348--                 trace ('\n':"PStar Orbit "++show x) $ do
349                 tell [Left x]
350                 return (Just x)
351
352  {-# INLINE withOrbit #-}
353  -- withOrbit uses MonadWriter(listens to makeOrbit/Left), collects
354  -- children at all depths
355  withOrbit :: PM a -> PM (a,[Tag])
356  withOrbit = listens childStars
357    where childStars x = let (ts,_) = partitionEither x in ts
358
359  {-# INLINE makeGroup #-}
360  -- makeGroup usesMonadWriter(tell/Right)
361  makeGroup :: GroupInfo -> PM ()
362  makeGroup = tell . (:[]) . Right
363
364  {-# INLINE getParentIndex #-}
365  -- getParentIndex uses MonadReader(ask)
366  getParentIndex :: PM (Maybe GroupIndex)
367  getParentIndex = ask
368
369  {-# INLINE nonCapture #-}
370  -- nonCapture uses MonadReader(local) to suppress getParentIndex to return Nothing
371  nonCapture :: PM  a -> PM a
372  nonCapture = local (const Nothing)
373
374  -- withParent uses MonadReader(local) to set getParentIndex to return (Just this)
375  -- withParent uses MonadWriter(listens to makeGroup/Right) to return contained group indices (stopTag)
376  -- withParent is only safe if getParentIndex has been checked to be not equal to Nothing (see PGroup below)
377  -- Note use of laziness: the immediate children's group index is used to look up all copies of the
378  -- group in aGroups, including copies that are not immediate children.
379  withParent :: GroupIndex -> PM a -> PM (a,[Tag])
380  withParent this = local (const (Just this)) . listens childGroupInfo
381    where childGroupInfo x =
382            let (_,gs) = partitionEither x
383                children :: [GroupIndex]
384                children = norep . sort . map thisIndex
385                           -- filter to get only immediate children (efficiency)
386                           . filter ((this==).parentIndex) $ gs
387            in concatMap (map flagTag . (aGroups!)) (this:children)
388
389  -- combineConcat is a partial function: Must not pass in an empty list
390  -- Policy choices:
391  --  * pass tags to apply to children and have no preTag or postTag here (so none addded to nullQ)
392  --  * middle 'mid' tag: give to left/front child as postTag so a Group there might claim it as a stopTag
393  --  * if parent is Group then preReset will become non-empty
394  combineConcat :: [Pattern] -> HHQ
395  combineConcat | rightAssoc compOpt = foldr1 combineSeq . map go
396                | otherwise          = foldl1 combineSeq . map go -- libtre default
397    where {-# INLINE front'end #-}
398          front'end | rightAssoc compOpt = liftM2 (,)
399                    | otherwise = flip (liftM2 (flip (,)))
400          combineSeq :: HHQ -> HHQ -> HHQ
401          combineSeq pFront pEnd = (\ m1 m2 -> mdo
402            let bothVary = varies qFront && varies qEnd
403            a <- if noTag m1 && bothVary then uniq "combineSeq start" else return m1
404            b <- if noTag m2 && bothVary then uniq "combineSeq stop" else return m2
405            mid <- case (noTag a,canAccept qFront,noTag b,canAccept qEnd) of
406                     (False,False,_,_) -> return (toAdvice a)
407                     (_,_,False,False) -> return (toAdvice b)
408                     _ -> if tagged qFront || tagged qEnd then uniq "combineSeq mid" else return NoTag
409      --      qFront <- pFront a mid
410      --      qEnd <- pEnd (toAdvice mid) b
411            (qFront,qEnd) <- front'end (pFront a mid) (pEnd (toAdvice mid) b)
412            -- XXX: Perhaps a "produces" should be created to compliment "wants",
413            -- then "produces qEnd" could be compared to "wants qFront"
414            let wanted = if WantsEither == wants qEnd then wants qFront else wants qEnd
415            return $ Q { nullQ = mergeNullViews (nullQ qFront) (nullQ qEnd)
416                             , takes = seqTake (takes qFront) (takes qEnd)
417                             , preReset = [], postSet = [], preTag = Nothing, postTag = Nothing
418                             , tagged = bothVary
419                             , childGroups = childGroups qFront || childGroups qEnd
420                             , wants = wanted
421                             , unQ = Seq qFront qEnd }
422                                   )
423  go :: Pattern -> HHQ
424  go pIn m1 m2 =
425    let die = error $ "patternToQ cannot handle "++show pIn
426        nil = return $ Q {nullQ=makeEmptyNullView m1 m2
427                         ,takes=(0,Just 0)
428                         ,preReset=[],postSet=[],preTag=apply m1,postTag=apply m2
429                         ,tagged=False,childGroups=False,wants=WantsEither
430                         ,unQ=Empty}
431        one = return $ Q {nullQ=notNull
432                         ,takes=(1,Just 1)
433                         ,preReset=[],postSet=[],preTag=apply m1,postTag=apply m2
434                         ,tagged=False,childGroups=False,wants=WantsQNFA
435                         ,unQ = OneChar pIn}
436        test myTest = return $ Q {nullQ=makeTestNullView myTest m1 m2
437                                 ,takes=(0,Just 0)
438                                 ,preReset=[],postSet=[],preTag=apply m1,postTag=apply m2
439                                 ,tagged=False,childGroups=False,wants=WantsQT
440                                 ,unQ=Test myTest }
441        xtra = newSyntax compOpt
442    in case pIn of
443         PEmpty -> nil
444         POr [] -> nil
445         POr [branch] -> go branch m1 m2
446         POr branches -> mdo
447           -- 2009 : The PNonEmpty p as POr [PEmpty,p] takes no branch tracking tag.
448           --        I claim this is because only accepting branches need tags,
449           --        and the last accepting branch does not need a tag.
450           --        Non-accepting possibilities can all commute to the front and
451           --        become part of the nullQ.  The accepting bits then need prioritizing.
452           --    Does the above require changes in POr handling in TNFA?  Yes.
453           --    Have to always use nullQ instead of "recapitulating" it.
454           --    Could also create a constant-writing tag instead of many index tags.
455           -- Exasperation: This POr recursive mdo is very easy to make loop and lockup the program
456           -- if needTags is False then there is no way to disambiguate branches so fewer tags are needed
457           let needUniqTags = childGroups ans
458           let needTags = varies ans || childGroups ans -- childGroups detects that "abc|a(b)c" needs tags
459           a <- if noTag m1 && needTags then uniq "POr start" else return m1 -- whole POr
460           b <- if noTag m2 && needTags then uniq "POr stop" else return m2 -- whole POr
461           let aAdvice = toAdvice a -- all branches share 'aAdvice'
462               bAdvice = toAdvice b -- last branch gets 'bAdvice', others may get own tag
463               -- Due to the recursive-do, it seems that I have to put the if needTags into the op'
464               newUniq = if needUniqTags then uniq "POr branch" else return bAdvice
465--           trace ("\nPOr sub "++show aAdvice++" "++show bAdvice++"needsTags is "++show needTags) $ return ()
466           -- The "bs" values are allocated in left-to-right order before the children in "qs"
467           -- optimiztion: low priority for last branch is implicit, do not create separate tag here.
468           bs <- fmap (++[bAdvice]) $ replicateM (pred $ length branches) newUniq -- 2 <= length ps
469           -- create all the child branches in left-to-right order after the "bs"
470           qs <- forM (zip branches bs) (\(branch,bTag) ->  (go branch aAdvice bTag))
471           let wqs = map wants qs
472               wanted = if any (WantsBoth==) wqs then WantsBoth
473                          else case (any (WantsQNFA==) wqs,any (WantsQT==) wqs) of
474                                 (True,True) -> WantsBoth
475                                 (True,False) -> WantsQNFA
476                                 (False,True) -> WantsQT
477                                 (False,False) -> WantsEither
478               nullView = cleanNullView . tagWrapNullView a b . concatMap nullQ $ qs
479               -- The nullView computed above takes the nullQ of the branches and combines them.  This
480               -- assumes that the pre/post tags of the children are also part of the nullQ values.  So
481               -- for consistency, POr must then add its own pre/post tags to its nullQ value.  Note that
482               -- concatMap sets the left-to-right preference when choosing the null views.
483           let ans = Q { nullQ = nullView
484                       , takes = orTakes . map takes $ qs
485                       , preReset = [], postSet = []
486                       , preTag = apply a, postTag = apply b
487                       , tagged = needTags
488                       , childGroups = any childGroups qs
489                       , wants = wanted
490                       , unQ = Or qs }
491           return ans
492         PConcat [] -> nil -- fatal to pass [] to combineConcat
493         PConcat ps -> combineConcat ps m1 m2
494         PStar mayFirstBeNull p -> mdo
495           let accepts    = canAccept q
496               -- if needsOrbit is False then there is no need to disambiguate captures on each orbit
497               -- Both checks are useful because (varies q) of True does not imply (childGroups q) of True when under PNonCapture
498               needsOrbit = varies q && childGroups q
499               -- if needsOrbit then must check start/stop before the Orbit tag
500               -- if accepts then must check start/stop of whole pattern
501               needsTags  = needsOrbit || accepts       -- important that needsOrbit implies needsTags
502           a <- if noTag m1 && needsTags then uniq "PStar start" else return m1
503           b <- if noTag m2 && needsTags then uniq "PStar stop" else return m2
504           mOrbit <- if needsOrbit then makeOrbit else return Nothing -- any Orbit tag is created after the pre and post tags
505--           test1 <- if tagged q then uniq "not-TEST1" Minimize else return NoTag
506-- XXX XXX 1.1.5 testing second NoTag replaced with (toAdvice b)
507           (q,resetOrbitTags) <- withOrbit (go p NoTag (toAdvice b)) -- all contained orbit tags get listened to (not including this one).
508           let nullView | mayFirstBeNull = cleanNullView $ childViews ++ skipView
509                        | otherwise = skipView
510                 where childViews = tagWrapNullView a b . orbitWrapNullView mOrbit resetOrbitTags $ nullQ q
511                       skipView = makeEmptyNullView a b
512           return $ Q { nullQ = nullView
513                      , takes = (0,if accepts then Nothing else (Just 0))
514                      , preReset = [], postSet = []
515                      , preTag = apply a, postTag = apply b
516                      , tagged = needsTags
517                      , childGroups = childGroups q
518                      , wants = WantsQT
519                      , unQ =Star { getOrbit = mOrbit
520                                  , resetOrbits = resetOrbitTags
521                                  , firstNull = mayFirstBeNull
522                                  , unStar = q } }
523         PCarat dopa -> test (Test_BOL,dopa)
524         PDollar dopa -> test (Test_EOL,dopa)
525         PChar {} -> one
526         PDot {} -> one
527         PAny {} -> one
528         PAnyNot {} -> one
529         -- CompOption's newSyntax enables these escaped anchors
530         PEscape dopa '`'  | xtra -> test (Test_BOB,dopa)
531         PEscape dopa '\'' | xtra -> test (Test_EOB,dopa)
532         PEscape dopa '<'  | xtra -> test (Test_BOW,dopa)
533         PEscape dopa '>'  | xtra -> test (Test_EOW,dopa)
534         PEscape dopa 'b'  | xtra -> test (Test_EdgeWord,dopa)
535         PEscape dopa 'B'  | xtra -> test (Test_NotEdgeWord,dopa)
536         -- otherwise escape codes are just the escaped character
537         PEscape {} -> one
538
539         -- A PGroup node in the Pattern tree does not become a node
540         -- in the Q/P tree. A PGroup can share and pass along a
541         -- preTag (with Advice) with other branches, but will pass
542         -- down an Apply postTag.
543         --
544         -- If the parent index is Nothing then this is part of a
545         -- non-capturing subtree and ignored.  This is a lazy and
546         -- efficient alternative to rebuidling the tree with PGroup
547         -- Nothing replacing PGroup (Just _).
548         --
549         -- Guarded by the getParentIndex /= Nothing check is the
550         -- withParent command.
551         --
552         PGroup Nothing p -> go p m1 m2
553         PGroup (Just this) p -> do
554           mParent <- getParentIndex
555           case mParent of
556             Nothing -> go p m1 m2 -- just like PGroup Nothing p
557             Just parent -> do
558               -- 'a' may be Advice or Apply from parent or Apply created here
559               a <- if noTag m1 then uniq "PGroup start" else return m1
560               b <- if noTag m2 then uniq "PGroup stop" else return m2
561               flag <- ignore "PGroup ignore"
562{-
563               -- 'b' may be Apply from parent or Apply created here
564               b <- if isNothing (apply m2) then uniq "PGroup" else return m2
565-}
566               (q,resetGroupTags) <- withParent this (go p a b)  -- all immediate child groups stop tags get listened to.
567               -- 2009: makeGroup performs a tell, why after withParent? I am no longer sure.
568               makeGroup (GroupInfo this parent (fromHandleTag a) (fromHandleTag b) flag)
569               return $ q { nullQ = addGroupResetsToNullView resetGroupTags flag (nullQ q)
570                          , tagged = True
571                          , childGroups = True
572                          , preReset = resetGroupTags `mappend` (preReset q)
573                          , postSet = (postSet q) `mappend` [flag]
574                          }
575
576         -- A PNonCapture node in the Pattern tree does not become a
577         -- node in the Q/P tree.  It sets the parent to Nothing while
578         -- processing the sub-tree.
579         PNonCapture p -> nonCapture (go p m1 m2)
580
581         -- these are here for completeness of the case branches, currently starTrans replaces them all
582         PPlus {} -> die
583         PQuest {} -> die
584         PBound {} -> die
585         -- PNonEmpty is deprecated, and not produced in Pattern by starTrans anymore
586         PNonEmpty {} -> die
587
588{-
589Similar to change in WinTags for QT/QNFA:
590Change the NullView to use a tasktags instead of wintags since they are all PreUpdate
591
592         -- PNonEmpty means the child pattern p can be skipped by
593         -- bypassing the pattern.  This is only used in the case p
594         -- can accept 0 and can accept more than zero characters
595         -- (thus the assertions, enforcted by CorePattern.starTrans).
596         -- The important thing about this case is intercept the
597         -- "accept 0" possibility and replace with "skip".
598         PNonEmpty p -> mdo
599           let needsTags = canAccept q
600           a <- if noTag m1 && needsTags then uniq Minimize else return m1
601           b <- if noTag m2 && needsTags then uniq Maximize else return m2
602           q <- go p (toAdvice a) (toAdvice b)
603           when (not needsTags) (err $ "PNonEmpty could not accept characters: "++show (p,pOrig))
604           when (mustAccept q) (err $ "patternToQ : PNonEmpty provided with a *mustAccept* pattern: "++show (p,pOrig))
605           return $ Q { nullQ = emptyNull (preTags (apply a) (apply b)) -- The meaning of NonEmpty
606                      , takes = (0,snd (takes q))                       -- like Or, drop lower bound to 0
607                      , preReset = []
608                      , preTag = apply a, postTag = apply b             -- own the closing tag so it will not end a PGroup
609                      , tagged = needsTags
610                      , childGroups = childGroups q
611                      , wants = wants q  -- the test case is "x" =~ "(.|$){1,3}"
612                      , unQ = NonEmpty q }
613
614-}
615{-
616emptyNull :: TagList -> NullView
617emptyNull tags = (mempty, tags) : []
618
619testNull :: TestInfo -> TagList -> NullView
620testNull (w,d) tags = (SetTestInfo (Map.singleton w (Set.singleton d)), tags) : []
621
622-- Prepend tags to nullView
623addTagsToNullView :: TagList -> NullView -> NullView
624addTagsToNullView [] oldNV = oldNV
625addTagsToNullView tags oldNV= do
626  (oldTest,oldTags) <- oldNV
627  return (oldTest,tags `mappend` oldTags)
628
629-}
630
631
632-- xxx todo
633--
634-- see of PNonEmpty -> NonEmpty -> TNFA is really smarter than POr about tags
635