1{-# LANGUAGE MultiParamTypeClasses, FlexibleInstances, FlexibleContexts, TypeSynonymInstances #-}
2{-# OPTIONS_GHC -fno-warn-orphans #-}
3{-|
4
5Module      :  Text.Regex.Base.Context
6Copyright   :  (c) Chris Kuklewicz 2006
7SPDX-License-Identifier: BSD-3-Clause
8
9Maintainer  :  hvr@gnu.org
10Stability   :  experimental
11Portability :  non-portable (MPTC+FD)
12
13This is a module of instances of 'RegexContext' (defined in
14"Text.Regex.Base.RegexLike").  Nothing else is exported.  This is
15usually imported via the "Text.Regex.Base" convenience package which
16itself is re-exported from newer @Text.Regex.XXX@ modules provided by
17the different @regex-xxx@ backends.
18
19These instances work for all the supported types and backends
20interchangeably.  These instances provide the different results that
21can be gotten from a 'match' or 'matchM' operation (often via the @=~@ and
22@=~~@ operators with combine 'makeRegex' with 'match' and 'matchM'
23respectively).  This module name is @Context@ because they operators are
24context dependent: use them in a context that expects an 'Int' and you
25get a count of matches, use them in a 'Bool' context and get 'True' if
26there is a match, etc.
27
28@'RegexContext' a b c@ takes a regular expression suppied in a type @a@
29generated by 'RegexMaker' and a target text supplied in type @b@ to a
30result type @c@ using the 'match' class function.  The 'matchM' class
31function works like 'match' unless there is no match found, in which
32case it calls 'fail' in the (arbitrary) monad context.
33
34There are a few type synonyms from "Text.Regex.Base.RegexLike" that are used here:
35
36@
37-- | 0 based index from start of source, or (-1) for unused
38type MatchOffset = Int
39-- | non-negative length of a match
40type MatchLength = Int
41type MatchArray = Array Int (MatchOffset, MatchLength)
42type MatchText source = Array Int (source, (MatchOffset, MatchLength))
43@
44
45There are also a few newtypes that used to prevent any possible
46overlap of types, which were not needed for GHC's late overlap
47detection but are needed for use in Hugs.
48
49@
50newtype AllSubmatches     f b = AllSubmatches     { getAllSubmatches     :: f b }
51newtype AllTextSubmatches f b = AllTextSubmatches { getAllTextSubmatches :: f b }
52newtype AllMatches        f b = AllMatches        { getAllMatches        :: f b }
53newtype AllTextMatches    f b = AllTextMatches    { getAllTextMatches    :: f b }
54@
55
56The newtypes' @f@ parameters are the containers, usually @[]@ or
57@Array Int@, (where the arrays all have lower bound 0).
58
59The two @Submatches@ newtypes return only information on the first
60match.  The other two newtypes return information on all the
61non-overlapping matches.  The two @Text@ newtypes are used to mark
62result types that contain the same type as the target text.
63
64Where provided, noncaptured submatches will have a 'MatchOffset' of
65(-1) and non-negative otherwise.  The semantics of submatches depend
66on the backend and its compile and execution options.  Where provided,
67'MatchLength' will always be non-negative.  Arrays with no elements
68are returned with bounds of (1,0).  Arrays with elements will have a
69lower bound of 0.
70
71XXX THIS HADDOCK DOCUMENTATION IS OUT OF DATE XXX
72
73These are for finding the first match in the target text:
74
75
76@ 'RegexContext' a b Bool @:
77  Whether there is any match or not.
78
79
80@ 'RegexContext' a b () @:
81  Useful as a guard with @matchM@ or @=~~@ in a monad, since failure to match calls 'fail'.
82
83
84@ 'RegexContext' a b b @:
85  This returns the text of the whole match.
86  It will return 'empty' from the 'Extract' type class if there is no match.
87  These are defined in each backend module, but documented here for convenience.
88
89
90@ 'RegexContext' a b ('MatchOffset', 'MatchLength') @:
91  This returns the initial index and length of the whole match.
92  MatchLength will always be non-negative, and 0 for a failed match.
93
94
95@ 'RegexContext' a b ('MatchResult' b) @: The
96  'MatchResult' structure with details for the match.  This is the
97  structure copied from the old @JRegex@ pacakge.
98
99
100@ 'RegexContext' a b (b, b, b) @:
101  The text before the match, the text of the match, the text after the match
102
103
104@ 'RegexContext' a b (b, 'MatchText' b, b) @:
105  The text before the match, the details of the match, and the text after the match
106
107
108@ 'RegexContext' a b (b, b, b, [b]) @:
109  The text before the match, the text of the match, the text after the
110  match, and a list of the text of the 1st and higher sub-parts of the
111  match.  This is the same return value as used in the old
112  @Text.Regex@ API.
113
114Two containers of the submatch offset information:
115
116
117@ 'RegexContext' a b 'MatchArray' @:
118  Array of @('MatchOffset', 'MatchLength')@ for all the sub matches.
119  The whole match is at the intial 0th index.
120  Noncaptured submatches will have a @'MatchOffset'@ of (-1)
121  The array will have no elements and bounds (1,0) if there is no match.
122
123
124@ 'RegexContext' a b ('AllSubmatches' [] ('MatchOffset', 'MatchLength') @:
125  List of @('MatchOffset', 'MatchLength')@
126  The whole match is the first element, the rest are the submatches (if any) in order.
127  The list is empty if there is no match.
128
129Two containers of the submatch text and offset information:
130
131@ 'RegexContext' a b ('AllTextSubmatches' (Array Int) (b, ('MatchOffset', 'MatchLength'))) @
132
133@ 'RegexContext' a b ('AllTextSubmatches' [] (b, ('MatchOffset', 'MatchLength')))  @
134
135Two containers of the submatch text information:
136
137@ 'RegexContext' a b ('AllTextSubmatches' [] b) @
138
139@ 'RegexContext' a b ('AllTextSubmatches' (Array Int) b) @
140
141These instances are for all the matches (non-overlapping).  Note that
142backends are supposed to supply 'RegexLike' instances for which the
143default 'matchAll' and 'matchAllText' stop searching after returning
144any successful but empty match.
145
146
147@ 'RegexContext' a b Int @:
148  The number of matches, non-negative.
149
150Two containers for locations of all matches:
151
152@ 'RegexContext' a b ('AllMatches' [] ('MatchOffset', 'MatchLength')) @
153
154@ 'RegexContext' a b ('AllMatches' (Array Int) ('MatchOffset', 'MatchLength')) @
155
156Two containers for the locations of all matches and their submatches:
157
158@ 'RegexContext' a b ['MatchArray'] @
159
160@ 'RegexContext' a b ('AllMatches' (Array Int) 'MatchArray') @
161
162Two containers for the text and locations of all matches and their submatches:
163
164@ 'RegexContext' a b ['MatchText' b] @
165
166@ 'RegexContext' a b ('AllTextMatches' (Array Int) ('MatchText' b)) @
167
168Two containers for text of all matches:
169@ 'RegexContext' a b ('AllTextMatches' [] b) @
170
171@ 'RegexContext' a b ('AllTextMatches' (Array Int) b) @
172
173Four containers for text of all matches and their submatches:
174
175@ 'RegexContext' a b [[b]] @
176
177@ 'RegexContext' a b ('AllTextMatches' (Array Int) [b]) @
178
179@ 'RegexContext' a b ('AllTextMatches' [] (Array Int b)) @
180
181@ 'RegexContext' a b ('AllTextMatches' (Array Int) (Array Int b)) @
182
183Unused matches are 'empty' (defined via 'Extract')
184
185-}
186
187module Text.Regex.Base.Context() where
188
189import Prelude hiding (fail)
190import Control.Monad.Fail (MonadFail(fail)) -- see 'regexFailed'
191
192import Control.Monad(liftM)
193import Data.Array(Array,(!),elems,listArray)
194--  import Data.Maybe(maybe)
195import Text.Regex.Base.RegexLike(RegexLike(..),RegexContext(..)
196  ,AllSubmatches(..),AllTextSubmatches(..),AllMatches(..),AllTextMatches(..)
197  ,MatchResult(..),Extract(empty),MatchOffset,MatchLength,MatchArray,MatchText)
198
199
200{-
201-- Get the ByteString type for mood/doom
202import Data.ByteString(ByteString)
203-- Get the Regex types for the mood/doom workaround
204import qualified Text.Regex.Lib.WrapPosix as R1(Regex)
205import qualified Text.Regex.Lib.WrapPCRE as R2(Regex)
206import qualified Text.Regex.Lib.WrapLazy as R3(Regex)
207import qualified Text.Regex.Lib.WrapDFAEngine as R4(Regex)
208-- Get the RegexLike instances
209import Text.Regex.Lib.StringPosix()
210import Text.Regex.Lib.StringPCRE()
211import Text.Regex.Lib.StringLazy()
212import Text.Regex.Lib.StringDFAEngine()
213import Text.Regex.Lib.ByteStringPosix()
214import Text.Regex.Lib.ByteStringPCRE()
215import Text.Regex.Lib.ByteStringLazy()
216import Text.Regex.Lib.ByteStringDFAEngine()
217-}
218{-
219
220mood :: (RegexLike a b) => a -> b -> b
221{-# INLINE mood #-}
222mood r s = case matchOnceText r s of
223    Nothing -> empty
224    Just (_, ma, _) -> fst (ma ! 0)
225
226doom :: (RegexLike a b,Monad m) => a -> b -> m b
227{-# INLINE doom #-}
228doom =  actOn (\ (_, ma, _) -> fst (ma ! 0))
229
230{- These run afoul of various restrictions if I say
231   "instance RegexContext a b b where"
232   so I am listing these cases explicitly
233-}
234
235instance RegexContext R1.Regex String String where match = mood; matchM = doom
236instance RegexContext R2.Regex String String where match = mood; matchM = doom
237instance RegexContext R3.Regex String String where match = mood; matchM = doom
238instance RegexContext R4.Regex String String where match = mood; matchM = doom
239instance RegexContext R1.Regex ByteString ByteString where match = mood; matchM = doom
240instance RegexContext R2.Regex ByteString ByteString where match = mood; matchM = doom
241instance RegexContext R3.Regex ByteString ByteString where match = mood; matchM = doom
242instance RegexContext R4.Regex ByteString ByteString where match = mood; matchM = doom
243-}
244
245
246nullArray :: Array Int a
247{-# INLINE nullArray #-}
248nullArray = listArray (1,0) []
249
250nullFail :: (RegexContext regex source (AllMatches [] target),MonadFail m) => regex -> source -> m (AllMatches [] target)
251{-# INLINE nullFail #-}
252nullFail r s = case match r s of
253                 (AllMatches []) -> regexFailed
254                 xs -> return xs
255
256nullFailText :: (RegexContext regex source (AllTextMatches [] target),MonadFail m) => regex -> source -> m (AllTextMatches [] target)
257{-# INLINE nullFailText #-}
258nullFailText r s = case match r s of
259                     (AllTextMatches []) -> regexFailed
260                     xs -> return xs
261
262nullFail' :: (RegexContext regex source ([] target),MonadFail m) => regex -> source -> m ([] target)
263{-# INLINE nullFail' #-}
264nullFail' r s = case match r s of
265                 ([]) -> regexFailed
266                 xs -> return xs
267
268regexFailed :: (MonadFail m) => m b
269{-# INLINE regexFailed #-}
270regexFailed =  fail $ "regex failed to match"
271
272actOn :: (RegexLike r s,MonadFail m) => ((s,MatchText s,s)->t) -> r -> s -> m t
273{-# INLINE actOn #-}
274actOn f r s = case matchOnceText r s of
275    Nothing -> regexFailed
276    Just preMApost -> return (f preMApost)
277
278-- ** Instances based on matchTest ()
279
280instance (RegexLike a b) => RegexContext a b Bool where
281  match = matchTest
282  matchM r s = case match r s of
283                 False -> regexFailed
284                 True -> return True
285
286instance (RegexLike a b) => RegexContext a b () where
287  match _ _ = ()
288  matchM r s = case matchTest r s of
289                 False -> regexFailed
290                 True -> return ()
291
292-- ** Instance based on matchCount
293
294instance (RegexLike a b) => RegexContext a b Int where
295  match = matchCount
296  matchM r s = case match r s of
297                 0 -> regexFailed
298                 x -> return x
299
300-- ** Instances based on matchOnce,matchOnceText
301
302instance (RegexLike a b) => RegexContext a b (MatchOffset,MatchLength) where
303  match r s = maybe (-1,0) (! 0) (matchOnce r s)
304  matchM r s = maybe regexFailed (return . (! 0)) (matchOnce r s)
305
306instance (RegexLike a b) => RegexContext a b (MatchResult b) where
307  match r s = maybe (MR {mrBefore = s,mrMatch = empty,mrAfter = empty
308                        ,mrSubs = nullArray,mrSubList = []}) id (matchM r s)
309  matchM = actOn (\(pre,ma,post) ->
310     let ((whole,_):subs) = elems ma
311     in MR { mrBefore = pre
312           , mrMatch = whole
313           , mrAfter = post
314           , mrSubs = fmap fst ma
315           , mrSubList = map fst subs })
316
317instance (RegexLike a b) => RegexContext a b (b,MatchText b,b) where
318  match r s = maybe (s,nullArray,empty) id (matchOnceText r s)
319  matchM r s = maybe regexFailed return (matchOnceText r s)
320
321instance (RegexLike a b) => RegexContext a b (b,b,b) where
322  match r s = maybe (s,empty,empty) id (matchM r s)
323  matchM = actOn (\(pre,ma,post) -> let ((whole,_):_) = elems ma
324                                    in (pre,whole,post))
325
326instance (RegexLike a b) => RegexContext a b (b,b,b,[b]) where
327  match r s = maybe (s,empty,empty,[]) id (matchM r s)
328  matchM = actOn (\(pre,ma,post) -> let ((whole,_):subs) = elems ma
329                                    in (pre,whole,post,map fst subs))
330
331-- now AllSubmatches wrapper
332instance (RegexLike a b) => RegexContext a b MatchArray where
333  match r s = maybe nullArray id (matchOnce r s)
334  matchM r s = maybe regexFailed return (matchOnce r s)
335instance (RegexLike a b) => RegexContext a b (AllSubmatches [] (MatchOffset,MatchLength)) where
336  match r s = maybe (AllSubmatches []) id (matchM r s)
337  matchM r s = case matchOnce r s of
338                 Nothing -> regexFailed
339                 Just ma -> return (AllSubmatches (elems ma))
340
341-- essentially AllSubmatches applied to (MatchText b)
342instance (RegexLike a b) => RegexContext a b (AllTextSubmatches (Array Int) (b, (MatchOffset, MatchLength))) where
343  match r s = maybe (AllTextSubmatches nullArray) id (matchM r s)
344  matchM r s = actOn (\(_,ma,_) -> AllTextSubmatches ma) r s
345instance (RegexLike a b) => RegexContext a b (AllTextSubmatches [] (b, (MatchOffset, MatchLength))) where
346  match r s = maybe (AllTextSubmatches []) id (matchM r s)
347  matchM r s = actOn (\(_,ma,_) -> AllTextSubmatches (elems ma)) r s
348
349instance (RegexLike a b) => RegexContext a b (AllTextSubmatches [] b) where
350  match r s = maybe (AllTextSubmatches []) id (matchM r s)
351  matchM r s = liftM AllTextSubmatches $ actOn (\(_,ma,_) -> map fst . elems $ ma) r s
352instance (RegexLike a b) => RegexContext a b (AllTextSubmatches (Array Int) b) where
353  match r s = maybe (AllTextSubmatches nullArray) id (matchM r s)
354  matchM r s = liftM AllTextSubmatches $ actOn (\(_,ma,_) -> fmap fst ma) r s
355
356-- ** Instances based on matchAll,matchAllText
357
358instance (RegexLike a b) => RegexContext a b (AllMatches [] (MatchOffset,MatchLength)) where
359  match r s = AllMatches [ ma ! 0 | ma <- matchAll r s ]
360  matchM r s = nullFail r s
361instance (RegexLike a b) => RegexContext a b (AllMatches (Array Int) (MatchOffset,MatchLength)) where
362  match r s = maybe (AllMatches nullArray) id (matchM r s)
363  matchM r s = case match r s of
364                 (AllMatches []) -> regexFailed
365                 (AllMatches pairs) -> return . AllMatches . listArray (0,pred $ length pairs) $ pairs
366
367-- No AllMatches wrapper
368instance (RegexLike a b) => RegexContext a b [MatchArray] where
369  match = matchAll
370  matchM = nullFail'
371instance (RegexLike a b) => RegexContext a b (AllMatches (Array Int) MatchArray) where
372  match r s = maybe (AllMatches nullArray) id (matchM r s)
373  matchM r s = case match r s of
374                 [] -> regexFailed
375                 mas -> return . AllMatches . listArray (0,pred $ length mas) $ mas
376
377-- No AllTextMatches wrapper
378instance (RegexLike a b) => RegexContext a b [MatchText b] where
379  match = matchAllText
380  matchM = nullFail'
381instance (RegexLike a b) => RegexContext a b (AllTextMatches (Array Int) (MatchText b)) where
382  match r s = maybe (AllTextMatches nullArray) id (matchM r s)
383  matchM r s = case match r s of
384                 ([]) -> regexFailed
385                 (mts) -> return . AllTextMatches . listArray (0,pred $ length mts) $ mts
386
387instance (RegexLike a b) => RegexContext a b (AllTextMatches [] b) where
388  match r s = AllTextMatches [ fst (ma ! 0) | ma <- matchAllText r s ]
389  matchM r s = nullFailText r s
390instance (RegexLike a b) => RegexContext a b (AllTextMatches (Array Int) b) where
391  match r s = maybe (AllTextMatches nullArray) id (matchM r s)
392  matchM r s = case match r s of
393                 (AllTextMatches []) -> regexFailed
394                 (AllTextMatches bs) -> return . AllTextMatches . listArray (0,pred $ length bs) $ bs
395
396-- No AllTextMatches wrapper
397instance (RegexLike a b) => RegexContext a b [[b]] where
398  match r s = [ map fst (elems ma) | ma <- matchAllText r s ]
399  matchM r s = nullFail' r s
400instance (RegexLike a b) => RegexContext a b (AllTextMatches (Array Int) [b]) where
401  match r s = maybe (AllTextMatches nullArray) id (matchM r s)
402  matchM r s = case match r s of
403                 ([]) -> regexFailed
404                 (ls) -> return . AllTextMatches . listArray (0,pred $ length ls) $ ls
405instance (RegexLike a b) => RegexContext a b (AllTextMatches [] (Array Int b)) where
406  match r s = AllTextMatches [ fmap fst ma | ma <- matchAllText r s ]
407  matchM r s = nullFailText r s
408instance (RegexLike a b) => RegexContext a b (AllTextMatches (Array Int) (Array Int b)) where
409  match r s = maybe (AllTextMatches nullArray) id (matchM r s)
410  matchM r s = case match r s of
411                 (AllTextMatches []) -> regexFailed
412                 (AllTextMatches as) -> return . AllTextMatches . listArray (0,pred $ length as) $ as
413