1{-# OPTIONS_GHC -fno-warn-orphans #-} 2{-| 3This exports instances of the high level API and the medium level 4API of 'compile','execute', and 'regexec'. 5-} 6{- Copyright : (c) Chris Kuklewicz 2007 -} 7module Text.Regex.PCRE.Sequence( 8 -- ** Types 9 Regex, 10 MatchOffset, 11 MatchLength, 12 CompOption(CompOption), 13 ExecOption(ExecOption), 14 ReturnCode, 15 WrapError, 16 -- ** Miscellaneous 17 unusedOffset, 18 getVersion, 19 -- ** Medium level API functions 20 compile, 21 execute, 22 regexec, 23 -- ** Constants for CompOption 24 compBlank, 25 compAnchored, 26 compAutoCallout, 27 compCaseless, 28 compDollarEndOnly, 29 compDotAll, 30 compExtended, 31 compExtra, 32 compFirstLine, 33 compMultiline, 34 compNoAutoCapture, 35 compUngreedy, 36 compUTF8, 37 compNoUTF8Check, 38 -- ** Constants for ExecOption 39 execBlank, 40 execAnchored, 41 execNotBOL, 42 execNotEOL, 43 execNotEmpty, 44 execNoUTF8Check, 45 execPartial 46 ) where 47 48import Prelude hiding (fail) 49import Control.Monad.Fail (MonadFail(fail)) 50 51import Text.Regex.PCRE.Wrap -- all 52--import Foreign.C.String(withCStringLen,withCString) 53import Data.Array(Array,listArray) 54import System.IO.Unsafe(unsafePerformIO) 55import Text.Regex.Base.RegexLike(RegexMaker(..),RegexLike(..),RegexContext(..),MatchLength,MatchOffset,Extract(..)) 56import Text.Regex.Base.Impl(polymatch,polymatchM) 57import Data.Sequence as S hiding (length) 58import qualified Data.Sequence as S (length) 59import Foreign.C.String 60import Foreign.Marshal.Array 61import Foreign.Marshal.Alloc 62import Foreign.Storable 63 64instance RegexContext Regex (Seq Char) (Seq Char) where 65 match = polymatch 66 matchM = polymatchM 67 68unwrap :: (Show e) => Either e v -> IO v 69unwrap x = case x of Left err -> fail ("Text.Regex.PCRE.Sequence died: "++ show err) 70 Right v -> return v 71 72instance RegexMaker Regex CompOption ExecOption (Seq Char) where 73 makeRegexOpts c e pattern = unsafePerformIO $ 74 compile c e pattern >>= unwrap 75 makeRegexOptsM c e pattern = either (fail.show) return $ unsafePerformIO $ 76 compile c e pattern 77 78instance RegexLike Regex (Seq Char) where 79 matchTest regex str = unsafePerformIO $ 80 withSeq str (wrapTest 0 regex) >>= unwrap 81 matchOnce regex str = unsafePerformIO $ 82 execute regex str >>= unwrap 83 matchAll regex str = unsafePerformIO $ 84 withSeq str (wrapMatchAll regex) >>= unwrap 85 matchCount regex str = unsafePerformIO $ 86 withSeq str (wrapCount regex) >>= unwrap 87 88-- | Compiles a regular expression 89compile :: CompOption -- ^ Flags (summed together) 90 -> ExecOption -- ^ Flags (summed together) 91 -> (Seq Char) -- ^ The regular expression to compile 92 -> IO (Either (MatchOffset,String) Regex) -- ^ Returns: an error string and offset or the compiled regular expression 93compile c e pattern = withSeq0 pattern (wrapCompile c e) 94 95-- | Matches a regular expression against a string 96execute :: Regex -- ^ Compiled regular expression 97 -> (Seq Char) -- ^ (Seq Char) to match against 98 -> IO (Either WrapError (Maybe (Array Int (MatchOffset,MatchLength)))) 99 -- ^ Returns: 'Nothing' if the regex did not match the 100 -- string, or: 101 -- 'Just' an array of (offset,length) pairs where index 0 is whole match, and the rest are the captured subexpressions. 102execute regex str = do 103 maybeStartEnd <- withSeq str (wrapMatch 0 regex) 104 case maybeStartEnd of 105 Right Nothing -> return (Right Nothing) 106-- Right (Just []) -> fail "got [] back!" -- should never happen 107 Right (Just parts) -> 108 return . Right . Just . listArray (0,pred (length parts)) 109 . map (\(s,e)->(fromIntegral s, fromIntegral (e-s))) $ parts 110 Left err -> return (Left err) 111 112-- | execute match and extract substrings rather than just offsets 113regexec :: Regex -- ^ compiled regular expression 114 -> (Seq Char) -- ^ string to match 115 -> IO (Either WrapError (Maybe ((Seq Char), (Seq Char),(Seq Char), [(Seq Char)]))) 116 -- ^ Returns: Nothing if no match, else 117 -- (text before match, text after match, array of matches with 0 being the whole match) 118regexec regex str = do 119 let getSub (start,stop) | start == unusedOffset = S.empty 120 | otherwise = extract (start,stop-start) str 121 matchedParts [] = (S.empty,S.empty,str,[]) -- no information 122 matchedParts (matchedStartStop@(start,stop):subStartStop) = 123 (before start str 124 ,getSub matchedStartStop 125 ,after stop str 126 ,map getSub subStartStop) 127 maybeStartEnd <- withSeq str (wrapMatch 0 regex) 128 case maybeStartEnd of 129 Right Nothing -> return (Right Nothing) 130-- Right (Just []) -> fail "got [] back!" -- should never happen 131 Right (Just parts) -> return . Right . Just . matchedParts $ parts 132 Left err -> return (Left err) 133 134withSeq :: Seq Char -> (CStringLen -> IO a) -> IO a 135withSeq s f = 136 let -- Ensure null at end of s 137 len = S.length s 138 pokes p a | seq p (seq a False) = undefined 139 | otherwise = 140 case viewl a of 141 EmptyL -> return () 142 c :< a' -> poke p (castCharToCChar c) >> pokes (advancePtr p 1) a' 143 in allocaBytes (S.length s) (\ptr -> pokes ptr s >> f (ptr,len)) 144 145withSeq0 :: Seq Char -> (CString -> IO a) -> IO a 146withSeq0 s f = 147 let -- Ensure null at end of s 148 s' = case viewr s of -- bang !s' 149 EmptyR -> singleton '\0' 150 _ :> '\0' -> s 151 _ -> s |> '\0' 152 pokes p a | seq p (seq a False) = undefined 153 | otherwise = 154 case viewl a of -- bang pokes !p !a 155 EmptyL -> return () 156 c :< a' -> poke p (castCharToCChar c) >> pokes (advancePtr p 1) a' 157 in allocaBytes (S.length s') (\ptr -> pokes ptr s' >> f ptr) 158