1-- | The CorePattern module deconstructs the Pattern tree created by 2-- ReadRegex.parseRegex and returns a simpler Q\/P tree with 3-- annotations at each Q node. This will be converted by the TNFA 4-- module into a QNFA finite automata. 5-- 6-- Of particular note, this Pattern to Q\/P conversion creates and 7-- assigns all the internal Tags that will be used during the matching 8-- process, and associates the captures groups with the tags that 9-- represent their starting and ending locations and with their 10-- immediate parent group. 11-- 12-- Each Maximize and Minimize tag is held as either a preTag or a 13-- postTag by one and only one location in the Q\/P tree. The Orbit 14-- tags are each held by one and only one Star node. Tags that stop a 15-- Group are also held in perhaps numerous preReset lists. 16-- 17-- The additional nullQ::nullView field of Q records the potentially 18-- complex information about what tests and tags must be used if the 19-- pattern unQ::P matches 0 zero characters. There can be redundancy 20-- in nullView, which is eliminated by cleanNullView. 21-- 22-- Uses recursive do notation. 23-- 24-- 2009 XXX TODO: we can avoid needing tags in the part of the pattern 25-- after the last capturing group (when right-associative). This is 26-- flipped for left-associative where the front of the pattern before 27-- the first capturing group needs no tags. The edge of these regions 28-- is subtle: both case needs a Maximize tag. One ought to be able to 29-- check the Pattern: if the root is PConcat then a scan from the end 30-- (start) looking for the first with an embedded PGroup can be found 31-- and the PGroup free elements can be wrapped in some new PNOTAG 32-- semantic indicator. 33module Text.Regex.TDFA.CorePattern(Q(..),P(..),WhichTest(..),Wanted(..) 34 ,TestInfo,OP(..),SetTestInfo(..),NullView 35 ,patternToQ,cleanNullView,cannotAccept,mustAccept) where 36 37import Control.Monad.RWS {- all -} 38import Data.Array.IArray(Array,(!),accumArray,listArray) 39import Data.List(sort) 40import Data.IntMap.EnumMap2(EnumMap) 41import qualified Data.IntMap.EnumMap2 as Map(singleton,null,assocs,keysSet) 42--import Data.Maybe(isNothing) 43import Data.IntSet.EnumSet2(EnumSet) 44import qualified Data.IntSet.EnumSet2 as Set(singleton,toList,isSubsetOf) 45import Data.Semigroup as Sem 46import Text.Regex.TDFA.Common {- all -} 47import Text.Regex.TDFA.Pattern(Pattern(..),starTrans) 48-- import Debug.Trace 49 50{- By Chris Kuklewicz, 2007. BSD License, see the LICENSE file. -} 51 52 53--err :: String -> a 54--err = common_error "Text.Regex.TDFA.CorePattern" 55 56--debug :: (Show a) => a -> b -> b 57--debug _ = id 58 59-- Core Pattern Language 60data P = Empty -- Could be replaced by (Test Nothing)?? 61 | Or [Q] 62 | Seq Q Q 63 | Star { getOrbit :: Maybe Tag -- tag to prioritize the need to keep track of length of each pass though q 64 , resetOrbits :: [Tag] -- child star's orbits to reset (ResetOrbitTask) at all depths 65 , firstNull :: Bool -- Usually True to mean the first pass may match 0 characters 66 , unStar :: Q} 67 | Test TestInfo -- Require the test to be true (merge with empty as (Test (Maybe TestInfo)) ??) 68 | OneChar Pattern -- Bring the Pattern element that accepts a character 69 | NonEmpty Q -- Don't let the Q pattern match nothing 70 deriving (Show,Eq) 71 72-- The diagnostics about the pattern. Note that when unQ is 'Seq' the 73-- the preTag and postTag are Nothing but the preReset might have tags 74-- from PGroup injecting them. 75data Q = Q {nullQ :: NullView -- Ordered list of nullable views 76 ,takes :: (Position,Maybe Position) -- Range of number of accepted characters 77 ,preReset :: [Tag] -- Tags to "reset" (ResetGroupStopTask) (Only immediate children for efficiency) 78 ,postSet :: [Tag] -- Tags to "set" (SetGroupStopTask) 79 ,preTag,postTag :: Maybe Tag -- Tags assigned around this pattern (TagTask) 80 ,tagged :: Bool -- Whether this node should be tagged -- patternToQ use only 81 ,childGroups :: Bool -- Whether unQ has any PGroups -- patternToQ use only 82 ,wants :: Wanted -- What kind of continuation is used by this pattern 83 ,unQ :: P} deriving (Eq) 84 85type TestInfo = (WhichTest,DoPa) 86 87-- This is newtype'd to allow control over class instances 88-- This is a set of WhichTest where each test has associated pattern location information 89newtype SetTestInfo = SetTestInfo {getTests :: EnumMap WhichTest (EnumSet DoPa)} deriving (Eq) 90 91instance Semigroup SetTestInfo where 92 SetTestInfo x <> SetTestInfo y = SetTestInfo (x Sem.<> y) 93 94instance Monoid SetTestInfo where 95 mempty = SetTestInfo mempty 96 mappend = (Sem.<>) 97 98instance Show SetTestInfo where 99 show (SetTestInfo sti) = "SetTestInfo "++show (mapSnd (Set.toList) $ Map.assocs sti) 100 101-- There may be several distinct ways for a subtree to conditionally 102-- (i.e. with a Test) or unconditionally accept 0 characters. These 103-- are in the list in order of preference, with most preferred listed 104-- first. 105type NullView = [(SetTestInfo,TagList)] -- Ordered list of null views, each is a set of tests and tags 106 107-- During the depth first traversal, children are told about tags by the parent. 108-- They may change Apply to Advice and they may generate new tags. 109data HandleTag = NoTag -- No tag at this boundary 110 | Advice Tag -- tag at this boundary, applied at higher level in tree 111 | Apply Tag -- tag at this boundary, may be applied at this node or passed to one child 112 deriving (Show) 113 114-- Nodes in the tree are labeled by the type kind of continuation they 115-- prefer to be passed when processing. This makes it possible to 116-- create a smaller number of QNFA states and avoid creating wasteful 117-- QNFA states that won't be reachable in the final automata. 118-- 119-- In practice WantsBoth is treated identically to WantsQNFA and 120-- WantsBoth could be removed. 121data Wanted = WantsQNFA | WantsQT | WantsBoth | WantsEither deriving (Eq,Show) 122 123instance Show Q where 124 show = showQ 125 126showQ :: Q -> String 127showQ q = "Q { nullQ = "++show (nullQ q)++ 128 "\n , takes = "++show (takes q)++ 129 "\n , preReset = "++show (preReset q)++ 130 "\n , postSet = "++show (postSet q)++ 131 "\n , preTag = "++show (preTag q)++ 132 "\n , postTag = "++show (postTag q)++ 133 "\n , tagged = "++show (tagged q)++ 134 "\n , wants = "++show (wants q)++ 135 "\n , unQ = "++ indent' (unQ q)++" }" 136 where indent' = unlines . (\s -> case s of 137 [] -> [] 138 (h:t) -> h : (map (spaces ++) t)) . lines . show 139 spaces = replicate 10 ' ' 140 141-- Smart constructors for NullView 142notNull :: NullView 143notNull = [] 144 145-- Shorthand for combining a preTag and a postTag 146-- preTags :: Maybe Tag -> Maybe Tag -> TagList 147-- preTags a b = promote a `mappend` promote b 148-- where promote = maybe [] (\x -> [(x,PreUpdate TagTask)]) 149 150promotePreTag :: HandleTag -> TagList 151promotePreTag = maybe [] (\x -> [(x,PreUpdate TagTask)]) . apply 152 153makeEmptyNullView :: HandleTag -> HandleTag -> NullView 154makeEmptyNullView a b = [(mempty, promotePreTag a ++ promotePreTag b)] 155 156makeTestNullView :: TestInfo -> HandleTag -> HandleTag -> NullView 157makeTestNullView (w,d) a b = [(SetTestInfo (Map.singleton w (Set.singleton d)), promotePreTag a ++ promotePreTag b)] 158 159tagWrapNullView :: HandleTag -> HandleTag -> NullView -> NullView 160tagWrapNullView a b oldNV = 161 case (promotePreTag a, promotePreTag b) of 162 ([],[]) -> oldNV 163 (pre,post) -> do 164 (oldTests,oldTasks) <- oldNV 165 return (oldTests,pre++oldTasks++post) 166 167-- For PGroup, need to prepend reset tasks before others in nullView 168addGroupResetsToNullView :: [Tag] -> Tag -> NullView -> NullView 169addGroupResetsToNullView groupResets groupSet nv = [ (test, prepend (append tags) ) | (test,tags) <- nv ] 170 where prepend = foldr (\h t -> (h:).t) id . map (\tag->(tag,PreUpdate ResetGroupStopTask)) $ groupResets 171 append = (++[(groupSet,PreUpdate SetGroupStopTask)]) 172 173-- For PStar, need to put in the orbit TagTasks 174orbitWrapNullView :: Maybe Tag -> [Tag] -> NullView -> NullView 175orbitWrapNullView mOrbit orbitResets oldNV = 176 case (mOrbit,orbitResets) of 177 (Nothing,[]) -> oldNV 178 (Nothing,_) -> do (oldTests,oldTasks) <- oldNV 179 return (oldTests,prepend oldTasks) 180 (Just o,_) -> do (oldTests,oldTasks) <- oldNV 181 return (oldTests,prepend $ [(o,PreUpdate EnterOrbitTask)] ++ oldTasks ++ [(o,PreUpdate LeaveOrbitTask)]) 182 where prepend = foldr (\h t -> (h:).t) id . map (\tag->(tag,PreUpdate ResetOrbitTask)) $ orbitResets 183 184-- The NullViews are ordered, and later test sets that contain the 185-- tests from any earlier entry will never be chosen. This function 186-- returns a list with these redundant elements removed. Note that 187-- the first unconditional entry in the list will be the last entry of 188-- the returned list since the empty set is a subset of any other set. 189cleanNullView :: NullView -> NullView 190cleanNullView [] = [] 191cleanNullView (first@(SetTestInfo sti,_):rest) | Map.null sti = first : [] -- optimization 192 | otherwise = 193 first : cleanNullView (filter (not . (setTI `Set.isSubsetOf`) . Map.keysSet . getTests . fst) rest) 194 where setTI = Map.keysSet sti 195 196-- Ordered Sequence of two NullViews: all ordered combinations of tests and tags. 197-- Order of <- s1 and <- s2 is deliberately chosen to maintain preference priority 198mergeNullViews :: NullView -> NullView -> NullView 199mergeNullViews s1 s2 = cleanNullView $ do 200 (test1,tag1) <- s1 201 (test2,tag2) <- s2 202 return (mappend test1 test2,mappend tag1 tag2) 203-- mergeNullViews = cleanNullView $ liftM2 (mappend *** mappend) 204 205-- Concatenated two ranges of number of accepted characters 206seqTake :: (Int, Maybe Int) -> (Int, Maybe Int) -> (Int, Maybe Int) 207seqTake (x1,y1) (x2,y2) = (x1+x2,liftM2 (+) y1 y2) 208 209-- Parallel combination of list of ranges of number of accepted characters 210orTakes :: [(Int, Maybe Int)] -> (Int,Maybe Int) 211orTakes [] = (0,Just 0) 212orTakes ts = let (xs,ys) = unzip ts 213 in (minimum xs, foldl1 (liftM2 max) ys) 214 215-- Invariant: apply (toAdvice _ ) == mempty 216apply :: HandleTag -> Maybe Tag 217apply (Apply tag) = Just tag 218apply _ = Nothing 219toAdvice :: HandleTag -> HandleTag 220toAdvice (Apply tag) = Advice tag 221toAdvice s = s 222noTag :: HandleTag -> Bool 223noTag NoTag = True 224noTag _ = False 225fromHandleTag :: HandleTag -> Tag 226fromHandleTag (Apply tag) = tag 227fromHandleTag (Advice tag) = tag 228fromHandleTag _ = error "fromHandleTag" 229 230-- Predicates on the range of number of accepted characters 231varies :: Q -> Bool 232varies Q {takes = (_,Nothing)} = True 233varies Q {takes = (x,Just y)} = x/=y 234 235mustAccept :: Q -> Bool 236mustAccept q = (0/=) . fst . takes $ q 237 238canAccept :: Q -> Bool 239canAccept q = maybe True (0/=) $ snd . takes $ q 240 241cannotAccept :: Q -> Bool 242cannotAccept q = maybe False (0==) $ snd . takes $ q 243 244-- This converts then input Pattern to an analyzed Q structure with 245-- the tags assigned. 246-- 247-- The analysis is filled in by a depth first search and the tags are 248-- created top down and passed to children. Thus information flows up 249-- from the dfs of the children and simultaneously down in the form of 250-- pre and post HandleTag data. This bidirectional flow is handled 251-- declaratively by using the MonadFix (i.e. mdo). 252-- 253-- Invariant: A tag should exist in Q in exactly one place (and will 254-- be in a preTag,postTag, or getOrbit field). This is partly because 255-- PGroup needs to know the tags are around precisely the expression 256-- that it wants to record. If the same tag were in other branches 257-- then this would no longer be true. The tag may or may not also 258-- show up in one or more preReset list or resetOrbits list. 259-- 260-- This invariant is enforced by each node either taking 261-- responsibility (apply) for a passed in / created tag or sending it 262-- to exactly one child node. Other child nodes need to receive it 263-- via toAdvice. Leaf nodes are forced to apply any passed tags. 264-- 265-- There is a final "qwin of Q {postTag=ISet.singleton 1}" and an 266-- implied initial index tag of 0. 267-- 268-- favoring pushing Apply into the child postTag makes PGroup happier 269 270type PM = RWS (Maybe GroupIndex) [Either Tag GroupInfo] ([OP]->[OP],Tag) 271type HHQ = HandleTag -- m1 : info about left boundaary / preTag 272 -> HandleTag -- m2 : info about right boundary / postTag 273 -> PM Q 274 275-- There is no group 0 here, since it is always the whole match and has no parent of its own 276makeGroupArray :: GroupIndex -> [GroupInfo] -> Array GroupIndex [GroupInfo] 277makeGroupArray maxGroupIndex groups = accumArray (\earlier later -> later:earlier) [] (1,maxGroupIndex) filler 278 where filler = map (\gi -> (thisIndex gi,gi)) groups 279 280fromRight :: [Either Tag GroupInfo] -> [GroupInfo] 281fromRight [] = [] 282fromRight ((Right x):xs) = x:fromRight xs 283fromRight ((Left _):xs) = fromRight xs 284 285partitionEither :: [Either Tag GroupInfo] -> ([Tag],[GroupInfo]) 286partitionEither = helper id id where 287 helper :: ([Tag]->[Tag]) -> ([GroupInfo]->[GroupInfo]) -> [Either Tag GroupInfo] -> ([Tag],[GroupInfo]) 288 helper ls rs [] = (ls [],rs []) 289 helper ls rs ((Right x):xs) = helper ls (rs.(x:)) xs 290 helper ls rs ((Left x):xs) = helper (ls.(x:)) rs xs 291 292-- Partial function: assumes starTrans has been run on the Pattern 293-- Note that the lazy dependency chain for this very zigzag: 294-- varies information is sent up the tree 295-- handle tags depend on that and sends m1 m2 down the tree 296-- makeGroup sends some tags to the writer (Right _) 297-- withParent listens to children send group info to writer 298-- and lazily looks resetGroupTags from aGroups, the result of all writer (Right _) 299-- preReset stores the resetGroupTags result of the lookup in the tree 300-- makeOrbit sends some tags to the writer (Left _) 301-- withOrbit listens to children send orbit info to writer for resetOrbitTags 302-- nullQ depends m1 m2 and resetOrbitTags and resetGroupTags and is sent up the tree 303patternToQ :: CompOption -> (Pattern,(GroupIndex,DoPa)) -> (Q,Array Tag OP,Array GroupIndex [GroupInfo]) 304patternToQ compOpt (pOrig,(maxGroupIndex,_)) = (tnfa,aTags,aGroups) where 305 (tnfa,(tag_dlist,nextTag),groups) = runRWS monad startReader startState 306 aTags = listArray (0,pred nextTag) (tag_dlist []) 307 aGroups = makeGroupArray maxGroupIndex (fromRight groups) 308 309 -- implicitly inside a PGroup 0 converted into a GroupInfo 0 undefined 0 1 310 monad = go (starTrans pOrig) (Advice 0) (Advice 1) 311 -- startReader is accessed by getParentIndex and changed by nonCapture and withParent 312 startReader :: Maybe GroupIndex 313 startReader = Just 0 -- start inside group 0, capturing enabled 314 -- The startState is only acted upon in the "uniq" command 315 -- Tag 0 is Minimized and Tag 1 is maximized, next tag has value of 2 316 -- This is regardless of right or left associativity 317 startState :: ([OP]->[OP],Tag) 318 startState = ( (Minimize:) . (Maximize:) , 2) 319 320 -- uniq uses MonadState and always returns an "Apply _" tag 321 {-# INLINE uniq #-} 322 uniq :: String -> PM HandleTag 323 uniq _msg = fmap Apply (uniq' Maximize) 324-- uniq _msg = do x <- fmap Apply (uniq' Maximize) 325-- trace ('\n':msg ++ " Maximize "++show x) $ return x 326-- return x 327 328 ignore :: String -> PM Tag 329 ignore _msg = uniq' Ignore 330-- ignore _msg = do x <- uniq' Ignore 331-- trace ('\n':msg ++ " Ignore "++show x) $ return x 332-- return x 333 334 {-# NOINLINE uniq' #-} 335 uniq' :: OP -> PM Tag 336 uniq' newOp = do 337 (op,s) <- get -- generate the next tag with bias newOp 338 let op' = op . (newOp:) 339 s' = succ s 340 put $! (op',s') 341 return s 342 343 {-# INLINE makeOrbit #-} 344 -- Specialize the monad operations and give more meaningful names 345 -- makeOrbit uses MonadState(uniq) and MonadWriter(tell/Left) 346 makeOrbit :: PM (Maybe Tag) 347 makeOrbit = do x <- uniq' Orbit 348-- trace ('\n':"PStar Orbit "++show x) $ do 349 tell [Left x] 350 return (Just x) 351 352 {-# INLINE withOrbit #-} 353 -- withOrbit uses MonadWriter(listens to makeOrbit/Left), collects 354 -- children at all depths 355 withOrbit :: PM a -> PM (a,[Tag]) 356 withOrbit = listens childStars 357 where childStars x = let (ts,_) = partitionEither x in ts 358 359 {-# INLINE makeGroup #-} 360 -- makeGroup usesMonadWriter(tell/Right) 361 makeGroup :: GroupInfo -> PM () 362 makeGroup = tell . (:[]) . Right 363 364 {-# INLINE getParentIndex #-} 365 -- getParentIndex uses MonadReader(ask) 366 getParentIndex :: PM (Maybe GroupIndex) 367 getParentIndex = ask 368 369 {-# INLINE nonCapture #-} 370 -- nonCapture uses MonadReader(local) to suppress getParentIndex to return Nothing 371 nonCapture :: PM a -> PM a 372 nonCapture = local (const Nothing) 373 374 -- withParent uses MonadReader(local) to set getParentIndex to return (Just this) 375 -- withParent uses MonadWriter(listens to makeGroup/Right) to return contained group indices (stopTag) 376 -- withParent is only safe if getParentIndex has been checked to be not equal to Nothing (see PGroup below) 377 -- Note use of laziness: the immediate children's group index is used to look up all copies of the 378 -- group in aGroups, including copies that are not immediate children. 379 withParent :: GroupIndex -> PM a -> PM (a,[Tag]) 380 withParent this = local (const (Just this)) . listens childGroupInfo 381 where childGroupInfo x = 382 let (_,gs) = partitionEither x 383 children :: [GroupIndex] 384 children = norep . sort . map thisIndex 385 -- filter to get only immediate children (efficiency) 386 . filter ((this==).parentIndex) $ gs 387 in concatMap (map flagTag . (aGroups!)) (this:children) 388 389 -- combineConcat is a partial function: Must not pass in an empty list 390 -- Policy choices: 391 -- * pass tags to apply to children and have no preTag or postTag here (so none addded to nullQ) 392 -- * middle 'mid' tag: give to left/front child as postTag so a Group there might claim it as a stopTag 393 -- * if parent is Group then preReset will become non-empty 394 combineConcat :: [Pattern] -> HHQ 395 combineConcat | rightAssoc compOpt = foldr1 combineSeq . map go 396 | otherwise = foldl1 combineSeq . map go -- libtre default 397 where {-# INLINE front'end #-} 398 front'end | rightAssoc compOpt = liftM2 (,) 399 | otherwise = flip (liftM2 (flip (,))) 400 combineSeq :: HHQ -> HHQ -> HHQ 401 combineSeq pFront pEnd = (\ m1 m2 -> mdo 402 let bothVary = varies qFront && varies qEnd 403 a <- if noTag m1 && bothVary then uniq "combineSeq start" else return m1 404 b <- if noTag m2 && bothVary then uniq "combineSeq stop" else return m2 405 mid <- case (noTag a,canAccept qFront,noTag b,canAccept qEnd) of 406 (False,False,_,_) -> return (toAdvice a) 407 (_,_,False,False) -> return (toAdvice b) 408 _ -> if tagged qFront || tagged qEnd then uniq "combineSeq mid" else return NoTag 409 -- qFront <- pFront a mid 410 -- qEnd <- pEnd (toAdvice mid) b 411 (qFront,qEnd) <- front'end (pFront a mid) (pEnd (toAdvice mid) b) 412 -- XXX: Perhaps a "produces" should be created to compliment "wants", 413 -- then "produces qEnd" could be compared to "wants qFront" 414 let wanted = if WantsEither == wants qEnd then wants qFront else wants qEnd 415 return $ Q { nullQ = mergeNullViews (nullQ qFront) (nullQ qEnd) 416 , takes = seqTake (takes qFront) (takes qEnd) 417 , preReset = [], postSet = [], preTag = Nothing, postTag = Nothing 418 , tagged = bothVary 419 , childGroups = childGroups qFront || childGroups qEnd 420 , wants = wanted 421 , unQ = Seq qFront qEnd } 422 ) 423 go :: Pattern -> HHQ 424 go pIn m1 m2 = 425 let die = error $ "patternToQ cannot handle "++show pIn 426 nil = return $ Q {nullQ=makeEmptyNullView m1 m2 427 ,takes=(0,Just 0) 428 ,preReset=[],postSet=[],preTag=apply m1,postTag=apply m2 429 ,tagged=False,childGroups=False,wants=WantsEither 430 ,unQ=Empty} 431 one = return $ Q {nullQ=notNull 432 ,takes=(1,Just 1) 433 ,preReset=[],postSet=[],preTag=apply m1,postTag=apply m2 434 ,tagged=False,childGroups=False,wants=WantsQNFA 435 ,unQ = OneChar pIn} 436 test myTest = return $ Q {nullQ=makeTestNullView myTest m1 m2 437 ,takes=(0,Just 0) 438 ,preReset=[],postSet=[],preTag=apply m1,postTag=apply m2 439 ,tagged=False,childGroups=False,wants=WantsQT 440 ,unQ=Test myTest } 441 xtra = newSyntax compOpt 442 in case pIn of 443 PEmpty -> nil 444 POr [] -> nil 445 POr [branch] -> go branch m1 m2 446 POr branches -> mdo 447 -- 2009 : The PNonEmpty p as POr [PEmpty,p] takes no branch tracking tag. 448 -- I claim this is because only accepting branches need tags, 449 -- and the last accepting branch does not need a tag. 450 -- Non-accepting possibilities can all commute to the front and 451 -- become part of the nullQ. The accepting bits then need prioritizing. 452 -- Does the above require changes in POr handling in TNFA? Yes. 453 -- Have to always use nullQ instead of "recapitulating" it. 454 -- Could also create a constant-writing tag instead of many index tags. 455 -- Exasperation: This POr recursive mdo is very easy to make loop and lockup the program 456 -- if needTags is False then there is no way to disambiguate branches so fewer tags are needed 457 let needUniqTags = childGroups ans 458 let needTags = varies ans || childGroups ans -- childGroups detects that "abc|a(b)c" needs tags 459 a <- if noTag m1 && needTags then uniq "POr start" else return m1 -- whole POr 460 b <- if noTag m2 && needTags then uniq "POr stop" else return m2 -- whole POr 461 let aAdvice = toAdvice a -- all branches share 'aAdvice' 462 bAdvice = toAdvice b -- last branch gets 'bAdvice', others may get own tag 463 -- Due to the recursive-do, it seems that I have to put the if needTags into the op' 464 newUniq = if needUniqTags then uniq "POr branch" else return bAdvice 465-- trace ("\nPOr sub "++show aAdvice++" "++show bAdvice++"needsTags is "++show needTags) $ return () 466 -- The "bs" values are allocated in left-to-right order before the children in "qs" 467 -- optimiztion: low priority for last branch is implicit, do not create separate tag here. 468 bs <- fmap (++[bAdvice]) $ replicateM (pred $ length branches) newUniq -- 2 <= length ps 469 -- create all the child branches in left-to-right order after the "bs" 470 qs <- forM (zip branches bs) (\(branch,bTag) -> (go branch aAdvice bTag)) 471 let wqs = map wants qs 472 wanted = if any (WantsBoth==) wqs then WantsBoth 473 else case (any (WantsQNFA==) wqs,any (WantsQT==) wqs) of 474 (True,True) -> WantsBoth 475 (True,False) -> WantsQNFA 476 (False,True) -> WantsQT 477 (False,False) -> WantsEither 478 nullView = cleanNullView . tagWrapNullView a b . concatMap nullQ $ qs 479 -- The nullView computed above takes the nullQ of the branches and combines them. This 480 -- assumes that the pre/post tags of the children are also part of the nullQ values. So 481 -- for consistency, POr must then add its own pre/post tags to its nullQ value. Note that 482 -- concatMap sets the left-to-right preference when choosing the null views. 483 let ans = Q { nullQ = nullView 484 , takes = orTakes . map takes $ qs 485 , preReset = [], postSet = [] 486 , preTag = apply a, postTag = apply b 487 , tagged = needTags 488 , childGroups = any childGroups qs 489 , wants = wanted 490 , unQ = Or qs } 491 return ans 492 PConcat [] -> nil -- fatal to pass [] to combineConcat 493 PConcat ps -> combineConcat ps m1 m2 494 PStar mayFirstBeNull p -> mdo 495 let accepts = canAccept q 496 -- if needsOrbit is False then there is no need to disambiguate captures on each orbit 497 -- Both checks are useful because (varies q) of True does not imply (childGroups q) of True when under PNonCapture 498 needsOrbit = varies q && childGroups q 499 -- if needsOrbit then must check start/stop before the Orbit tag 500 -- if accepts then must check start/stop of whole pattern 501 needsTags = needsOrbit || accepts -- important that needsOrbit implies needsTags 502 a <- if noTag m1 && needsTags then uniq "PStar start" else return m1 503 b <- if noTag m2 && needsTags then uniq "PStar stop" else return m2 504 mOrbit <- if needsOrbit then makeOrbit else return Nothing -- any Orbit tag is created after the pre and post tags 505-- test1 <- if tagged q then uniq "not-TEST1" Minimize else return NoTag 506-- XXX XXX 1.1.5 testing second NoTag replaced with (toAdvice b) 507 (q,resetOrbitTags) <- withOrbit (go p NoTag (toAdvice b)) -- all contained orbit tags get listened to (not including this one). 508 let nullView | mayFirstBeNull = cleanNullView $ childViews ++ skipView 509 | otherwise = skipView 510 where childViews = tagWrapNullView a b . orbitWrapNullView mOrbit resetOrbitTags $ nullQ q 511 skipView = makeEmptyNullView a b 512 return $ Q { nullQ = nullView 513 , takes = (0,if accepts then Nothing else (Just 0)) 514 , preReset = [], postSet = [] 515 , preTag = apply a, postTag = apply b 516 , tagged = needsTags 517 , childGroups = childGroups q 518 , wants = WantsQT 519 , unQ =Star { getOrbit = mOrbit 520 , resetOrbits = resetOrbitTags 521 , firstNull = mayFirstBeNull 522 , unStar = q } } 523 PCarat dopa -> test (Test_BOL,dopa) 524 PDollar dopa -> test (Test_EOL,dopa) 525 PChar {} -> one 526 PDot {} -> one 527 PAny {} -> one 528 PAnyNot {} -> one 529 -- CompOption's newSyntax enables these escaped anchors 530 PEscape dopa '`' | xtra -> test (Test_BOB,dopa) 531 PEscape dopa '\'' | xtra -> test (Test_EOB,dopa) 532 PEscape dopa '<' | xtra -> test (Test_BOW,dopa) 533 PEscape dopa '>' | xtra -> test (Test_EOW,dopa) 534 PEscape dopa 'b' | xtra -> test (Test_EdgeWord,dopa) 535 PEscape dopa 'B' | xtra -> test (Test_NotEdgeWord,dopa) 536 -- otherwise escape codes are just the escaped character 537 PEscape {} -> one 538 539 -- A PGroup node in the Pattern tree does not become a node 540 -- in the Q/P tree. A PGroup can share and pass along a 541 -- preTag (with Advice) with other branches, but will pass 542 -- down an Apply postTag. 543 -- 544 -- If the parent index is Nothing then this is part of a 545 -- non-capturing subtree and ignored. This is a lazy and 546 -- efficient alternative to rebuidling the tree with PGroup 547 -- Nothing replacing PGroup (Just _). 548 -- 549 -- Guarded by the getParentIndex /= Nothing check is the 550 -- withParent command. 551 -- 552 PGroup Nothing p -> go p m1 m2 553 PGroup (Just this) p -> do 554 mParent <- getParentIndex 555 case mParent of 556 Nothing -> go p m1 m2 -- just like PGroup Nothing p 557 Just parent -> do 558 -- 'a' may be Advice or Apply from parent or Apply created here 559 a <- if noTag m1 then uniq "PGroup start" else return m1 560 b <- if noTag m2 then uniq "PGroup stop" else return m2 561 flag <- ignore "PGroup ignore" 562{- 563 -- 'b' may be Apply from parent or Apply created here 564 b <- if isNothing (apply m2) then uniq "PGroup" else return m2 565-} 566 (q,resetGroupTags) <- withParent this (go p a b) -- all immediate child groups stop tags get listened to. 567 -- 2009: makeGroup performs a tell, why after withParent? I am no longer sure. 568 makeGroup (GroupInfo this parent (fromHandleTag a) (fromHandleTag b) flag) 569 return $ q { nullQ = addGroupResetsToNullView resetGroupTags flag (nullQ q) 570 , tagged = True 571 , childGroups = True 572 , preReset = resetGroupTags `mappend` (preReset q) 573 , postSet = (postSet q) `mappend` [flag] 574 } 575 576 -- A PNonCapture node in the Pattern tree does not become a 577 -- node in the Q/P tree. It sets the parent to Nothing while 578 -- processing the sub-tree. 579 PNonCapture p -> nonCapture (go p m1 m2) 580 581 -- these are here for completeness of the case branches, currently starTrans replaces them all 582 PPlus {} -> die 583 PQuest {} -> die 584 PBound {} -> die 585 -- PNonEmpty is deprecated, and not produced in Pattern by starTrans anymore 586 PNonEmpty {} -> die 587 588{- 589Similar to change in WinTags for QT/QNFA: 590Change the NullView to use a tasktags instead of wintags since they are all PreUpdate 591 592 -- PNonEmpty means the child pattern p can be skipped by 593 -- bypassing the pattern. This is only used in the case p 594 -- can accept 0 and can accept more than zero characters 595 -- (thus the assertions, enforcted by CorePattern.starTrans). 596 -- The important thing about this case is intercept the 597 -- "accept 0" possibility and replace with "skip". 598 PNonEmpty p -> mdo 599 let needsTags = canAccept q 600 a <- if noTag m1 && needsTags then uniq Minimize else return m1 601 b <- if noTag m2 && needsTags then uniq Maximize else return m2 602 q <- go p (toAdvice a) (toAdvice b) 603 when (not needsTags) (err $ "PNonEmpty could not accept characters: "++show (p,pOrig)) 604 when (mustAccept q) (err $ "patternToQ : PNonEmpty provided with a *mustAccept* pattern: "++show (p,pOrig)) 605 return $ Q { nullQ = emptyNull (preTags (apply a) (apply b)) -- The meaning of NonEmpty 606 , takes = (0,snd (takes q)) -- like Or, drop lower bound to 0 607 , preReset = [] 608 , preTag = apply a, postTag = apply b -- own the closing tag so it will not end a PGroup 609 , tagged = needsTags 610 , childGroups = childGroups q 611 , wants = wants q -- the test case is "x" =~ "(.|$){1,3}" 612 , unQ = NonEmpty q } 613 614-} 615{- 616emptyNull :: TagList -> NullView 617emptyNull tags = (mempty, tags) : [] 618 619testNull :: TestInfo -> TagList -> NullView 620testNull (w,d) tags = (SetTestInfo (Map.singleton w (Set.singleton d)), tags) : [] 621 622-- Prepend tags to nullView 623addTagsToNullView :: TagList -> NullView -> NullView 624addTagsToNullView [] oldNV = oldNV 625addTagsToNullView tags oldNV= do 626 (oldTest,oldTags) <- oldNV 627 return (oldTest,tags `mappend` oldTags) 628 629-} 630 631 632-- xxx todo 633-- 634-- see of PNonEmpty -> NonEmpty -> TNFA is really smarter than POr about tags 635