1{-# OPTIONS_GHC -fno-warn-orphans #-}
2{-|
3This exports instances of the high level API and the medium level
4API of 'compile','execute', and 'regexec'.
5-}
6{- Copyright   :  (c) Chris Kuklewicz 2007 -}
7module Text.Regex.PCRE.Sequence(
8  -- ** Types
9  Regex,
10  MatchOffset,
11  MatchLength,
12  CompOption(CompOption),
13  ExecOption(ExecOption),
14  ReturnCode,
15  WrapError,
16  -- ** Miscellaneous
17  unusedOffset,
18  getVersion,
19  -- ** Medium level API functions
20  compile,
21  execute,
22  regexec,
23  -- ** Constants for CompOption
24  compBlank,
25  compAnchored,
26  compAutoCallout,
27  compCaseless,
28  compDollarEndOnly,
29  compDotAll,
30  compExtended,
31  compExtra,
32  compFirstLine,
33  compMultiline,
34  compNoAutoCapture,
35  compUngreedy,
36  compUTF8,
37  compNoUTF8Check,
38  -- ** Constants for ExecOption
39  execBlank,
40  execAnchored,
41  execNotBOL,
42  execNotEOL,
43  execNotEmpty,
44  execNoUTF8Check,
45  execPartial
46  ) where
47
48import Prelude hiding (fail)
49import Control.Monad.Fail (MonadFail(fail))
50
51import Text.Regex.PCRE.Wrap -- all
52--import Foreign.C.String(withCStringLen,withCString)
53import Data.Array(Array,listArray)
54import System.IO.Unsafe(unsafePerformIO)
55import Text.Regex.Base.RegexLike(RegexMaker(..),RegexLike(..),RegexContext(..),MatchLength,MatchOffset,Extract(..))
56import Text.Regex.Base.Impl(polymatch,polymatchM)
57import Data.Sequence as S hiding (length)
58import qualified Data.Sequence as S (length)
59import Foreign.C.String
60import Foreign.Marshal.Array
61import Foreign.Marshal.Alloc
62import Foreign.Storable
63
64instance RegexContext Regex (Seq Char) (Seq Char) where
65  match = polymatch
66  matchM = polymatchM
67
68unwrap :: (Show e) => Either e v -> IO v
69unwrap x = case x of Left err -> fail ("Text.Regex.PCRE.Sequence died: "++ show err)
70                     Right v -> return v
71
72instance RegexMaker Regex CompOption ExecOption (Seq Char) where
73  makeRegexOpts c e pattern = unsafePerformIO $
74    compile c e pattern >>= unwrap
75  makeRegexOptsM c e pattern = either (fail.show) return $ unsafePerformIO $
76    compile c e pattern
77
78instance RegexLike Regex (Seq Char) where
79  matchTest regex str = unsafePerformIO $
80    withSeq str (wrapTest 0 regex) >>= unwrap
81  matchOnce regex str = unsafePerformIO $
82    execute regex str >>= unwrap
83  matchAll regex str = unsafePerformIO $
84    withSeq str (wrapMatchAll regex) >>= unwrap
85  matchCount regex str = unsafePerformIO $
86    withSeq str (wrapCount regex) >>= unwrap
87
88-- | Compiles a regular expression
89compile :: CompOption -- ^ Flags (summed together)
90        -> ExecOption -- ^ Flags (summed together)
91        -> (Seq Char)     -- ^ The regular expression to compile
92        -> IO (Either (MatchOffset,String) Regex) -- ^ Returns: an error string and offset or the compiled regular expression
93compile c e pattern = withSeq0 pattern (wrapCompile c e)
94
95-- | Matches a regular expression against a string
96execute :: Regex      -- ^ Compiled regular expression
97        -> (Seq Char)     -- ^ (Seq Char) to match against
98        -> IO (Either WrapError (Maybe (Array Int (MatchOffset,MatchLength))))
99                -- ^ Returns: 'Nothing' if the regex did not match the
100                -- string, or:
101                --   'Just' an array of (offset,length) pairs where index 0 is whole match, and the rest are the captured subexpressions.
102execute regex str = do
103  maybeStartEnd <- withSeq str (wrapMatch 0 regex)
104  case maybeStartEnd of
105    Right Nothing -> return (Right Nothing)
106--  Right (Just []) -> fail "got [] back!" -- should never happen
107    Right (Just parts) ->
108      return . Right . Just . listArray (0,pred (length parts))
109      . map (\(s,e)->(fromIntegral s, fromIntegral (e-s))) $ parts
110    Left err -> return (Left err)
111
112-- | execute match and extract substrings rather than just offsets
113regexec  :: Regex      -- ^ compiled regular expression
114         -> (Seq Char)     -- ^ string to match
115         -> IO (Either WrapError (Maybe ((Seq Char), (Seq Char),(Seq Char), [(Seq Char)])))
116                      -- ^ Returns: Nothing if no match, else
117                      --   (text before match, text after match, array of matches with 0 being the whole match)
118regexec regex str = do
119  let getSub (start,stop) | start == unusedOffset = S.empty
120                          | otherwise = extract (start,stop-start) str
121      matchedParts [] = (S.empty,S.empty,str,[]) -- no information
122      matchedParts (matchedStartStop@(start,stop):subStartStop) =
123        (before start str
124        ,getSub matchedStartStop
125        ,after stop str
126        ,map getSub subStartStop)
127  maybeStartEnd <- withSeq str (wrapMatch 0 regex)
128  case maybeStartEnd of
129    Right Nothing -> return (Right Nothing)
130--  Right (Just []) -> fail "got [] back!" -- should never happen
131    Right (Just parts) -> return . Right . Just . matchedParts $ parts
132    Left err -> return (Left err)
133
134withSeq :: Seq Char -> (CStringLen -> IO a) -> IO a
135withSeq s f =
136  let -- Ensure null at end of s
137      len = S.length s
138      pokes p a | seq p (seq a False) = undefined
139                | otherwise =
140        case viewl a of
141          EmptyL -> return ()
142          c :< a' -> poke p (castCharToCChar c) >> pokes (advancePtr p 1) a'
143  in allocaBytes (S.length s) (\ptr -> pokes ptr s >> f (ptr,len))
144
145withSeq0 :: Seq Char -> (CString -> IO a) -> IO a
146withSeq0 s f =
147  let -- Ensure null at end of s
148      s' = case viewr s of                -- bang !s'
149             EmptyR -> singleton '\0'
150             _ :> '\0' -> s
151             _ -> s |> '\0'
152      pokes p a | seq p (seq a False) = undefined
153                | otherwise =
154        case viewl a of         -- bang pokes !p !a
155          EmptyL -> return ()
156          c :< a' -> poke p (castCharToCChar c) >> pokes (advancePtr p 1) a'
157  in allocaBytes (S.length s') (\ptr -> pokes ptr s' >> f ptr)
158