1-- |
2-- Module      :  Text.Megaparsec.Error
3-- Copyright   :  © 2015–present Megaparsec contributors
4-- License     :  FreeBSD
5--
6-- Maintainer  :  Mark Karpov <markkarpov92@gmail.com>
7-- Stability   :  experimental
8-- Portability :  portable
9--
10-- Parse errors. The current version of Megaparsec supports well-typed
11-- errors instead of 'String'-based ones. This gives a lot of flexibility in
12-- describing what exactly went wrong as well as a way to return arbitrary
13-- data in case of failure.
14--
15-- You probably do not want to import this module directly because
16-- "Text.Megaparsec" re-exports it anyway.
17
18{-# LANGUAGE BangPatterns         #-}
19{-# LANGUAGE DeriveDataTypeable   #-}
20{-# LANGUAGE DeriveFunctor        #-}
21{-# LANGUAGE DeriveGeneric        #-}
22{-# LANGUAGE FlexibleContexts     #-}
23{-# LANGUAGE FlexibleInstances    #-}
24{-# LANGUAGE LambdaCase           #-}
25{-# LANGUAGE RecordWildCards      #-}
26{-# LANGUAGE ScopedTypeVariables  #-}
27{-# LANGUAGE StandaloneDeriving   #-}
28{-# LANGUAGE UndecidableInstances #-}
29
30module Text.Megaparsec.Error
31  ( -- * Parse error type
32    ErrorItem (..)
33  , ErrorFancy (..)
34  , ParseError (..)
35  , mapParseError
36  , errorOffset
37  , setErrorOffset
38  , ParseErrorBundle (..)
39  , attachSourcePos
40    -- * Pretty-printing
41  , ShowErrorComponent (..)
42  , errorBundlePretty
43  , parseErrorPretty
44  , parseErrorTextPretty )
45where
46
47import Control.DeepSeq
48import Control.Exception
49import Control.Monad.State.Strict
50import Data.Data (Data)
51import Data.List (intercalate)
52import Data.List.NonEmpty (NonEmpty (..))
53import Data.Maybe (isNothing)
54import Data.Proxy
55import Data.Set (Set)
56import Data.Typeable (Typeable)
57import Data.Void
58import GHC.Generics
59import Text.Megaparsec.Pos
60import Text.Megaparsec.State
61import Text.Megaparsec.Stream
62import qualified Data.List.NonEmpty as NE
63import qualified Data.Set           as E
64
65----------------------------------------------------------------------------
66-- Parse error type
67
68-- | Data type that is used to represent “unexpected\/expected” items in
69-- 'ParseError'. The data type is parametrized over the token type @t@.
70--
71-- @since 5.0.0
72
73data ErrorItem t
74  = Tokens (NonEmpty t)      -- ^ Non-empty stream of tokens
75  | Label (NonEmpty Char)    -- ^ Label (cannot be empty)
76  | EndOfInput               -- ^ End of input
77  deriving (Show, Read, Eq, Ord, Data, Typeable, Generic, Functor)
78
79instance NFData t => NFData (ErrorItem t)
80
81-- | Additional error data, extendable by user. When no custom data is
82-- necessary, the type is typically indexed by 'Void' to “cancel” the
83-- 'ErrorCustom' constructor.
84--
85-- @since 6.0.0
86
87data ErrorFancy e
88  = ErrorFail String
89    -- ^ 'fail' has been used in parser monad
90  | ErrorIndentation Ordering Pos Pos
91    -- ^ Incorrect indentation error: desired ordering between reference
92    -- level and actual level, reference indentation level, actual
93    -- indentation level
94  | ErrorCustom e
95    -- ^ Custom error data
96  deriving (Show, Read, Eq, Ord, Data, Typeable, Generic, Functor)
97
98instance NFData a => NFData (ErrorFancy a) where
99  rnf (ErrorFail str) = rnf str
100  rnf (ErrorIndentation ord ref act) = ord `seq` rnf ref `seq` rnf act
101  rnf (ErrorCustom a) = rnf a
102
103-- | @'ParseError' s e@ represents a parse error parametrized over the
104-- stream type @s@ and the custom data @e@.
105--
106-- 'Semigroup' and 'Monoid' instances of the data type allow to merge parse
107-- errors from different branches of parsing. When merging two
108-- 'ParseError's, the longest match is preferred; if positions are the same,
109-- custom data sets and collections of message items are combined. Note that
110-- fancy errors take precedence over trivial errors in merging.
111--
112-- @since 7.0.0
113
114data ParseError s e
115  = TrivialError Int (Maybe (ErrorItem (Token s))) (Set (ErrorItem (Token s)))
116    -- ^ Trivial errors, generated by Megaparsec's machinery. The data
117    -- constructor includes the offset of error, unexpected token (if any),
118    -- and expected tokens.
119    --
120    -- Type of the first argument was changed in the version /7.0.0/.
121  | FancyError Int (Set (ErrorFancy e))
122    -- ^ Fancy, custom errors.
123    --
124    -- Type of the first argument was changed in the version /7.0.0/.
125  deriving (Typeable, Generic)
126
127deriving instance ( Show (Token s)
128                  , Show e
129                  ) => Show (ParseError s e)
130
131deriving instance ( Eq (Token s)
132                  , Eq e
133                  ) => Eq (ParseError s e)
134
135deriving instance ( Data s
136                  , Data (Token s)
137                  , Ord (Token s)
138                  , Data e
139                  , Ord e
140                  ) => Data (ParseError s e)
141
142instance ( NFData (Token s)
143         , NFData e
144         ) => NFData (ParseError s e)
145
146instance (Stream s, Ord e) => Semigroup (ParseError s e) where
147  (<>) = mergeError
148  {-# INLINE (<>) #-}
149
150instance (Stream s, Ord e) => Monoid (ParseError s e) where
151  mempty  = TrivialError 0 Nothing E.empty
152  mappend = (<>)
153  {-# INLINE mappend #-}
154
155instance ( Show s
156         , Show (Token s)
157         , Show e
158         , ShowErrorComponent e
159         , Stream s
160         , Typeable s
161         , Typeable e )
162  => Exception (ParseError s e) where
163  displayException = parseErrorPretty
164
165-- | Modify the custom data component in a parse error. This could be done
166-- via 'fmap' if not for the 'Ord' constraint.
167--
168-- @since 7.0.0
169
170mapParseError :: Ord e'
171  => (e -> e')
172  -> ParseError s e
173  -> ParseError s e'
174mapParseError _ (TrivialError o u p) = TrivialError o u p
175mapParseError f (FancyError o x) = FancyError o (E.map (fmap f) x)
176
177-- | Get offset of 'ParseError'.
178--
179-- @since 7.0.0
180
181errorOffset :: ParseError s e -> Int
182errorOffset (TrivialError o _ _) = o
183errorOffset (FancyError   o _)   = o
184
185-- | Set offset of 'ParseError'.
186--
187-- @since 8.0.0
188
189setErrorOffset :: Int -> ParseError s e -> ParseError s e
190setErrorOffset o (TrivialError _ u p) = TrivialError o u p
191setErrorOffset o (FancyError _ x) = FancyError o x
192
193-- | Merge two error data structures into one joining their collections of
194-- message items and preferring the longest match. In other words, earlier
195-- error message is discarded. This may seem counter-intuitive, but
196-- 'mergeError' is only used to merge error messages of alternative branches
197-- of parsing and in this case longest match should be preferred.
198
199mergeError :: (Stream s, Ord e)
200  => ParseError s e
201  -> ParseError s e
202  -> ParseError s e
203mergeError e1 e2 =
204  case errorOffset e1 `compare` errorOffset e2 of
205    LT -> e2
206    EQ ->
207      case (e1, e2) of
208        (TrivialError s1 u1 p1, TrivialError _ u2 p2) ->
209          TrivialError s1 (n u1 u2) (E.union p1 p2)
210        (FancyError {}, TrivialError {}) -> e1
211        (TrivialError {}, FancyError {}) -> e2
212        (FancyError s1 x1, FancyError _ x2) ->
213          FancyError s1 (E.union x1 x2)
214    GT -> e1
215  where
216    -- NOTE The logic behind this merging is that since we only combine
217    -- parse errors that happen at exactly the same position, all the
218    -- unexpected items will be prefixes of input stream at that position or
219    -- labels referring to the same thing. Our aim here is to choose the
220    -- longest prefix (merging with labels and end of input is somewhat
221    -- arbitrary, but is necessary because otherwise we can't make
222    -- ParseError lawful Monoid and have nice parse errors at the same
223    -- time).
224    n Nothing  Nothing = Nothing
225    n (Just x) Nothing = Just x
226    n Nothing (Just y) = Just y
227    n (Just x) (Just y) = Just (max x y)
228{-# INLINE mergeError #-}
229
230-- | A non-empty collection of 'ParseError's equipped with 'PosState' that
231-- allows to pretty-print the errors efficiently and correctly.
232--
233-- @since 7.0.0
234
235data ParseErrorBundle s e = ParseErrorBundle
236  { bundleErrors :: NonEmpty (ParseError s e)
237    -- ^ A collection of 'ParseError's that is sorted by parse error offsets
238  , bundlePosState :: PosState s
239    -- ^ State that is used for line\/column calculation
240  } deriving (Generic)
241
242deriving instance ( Show s
243                  , Show (Token s)
244                  , Show e
245                  ) => Show (ParseErrorBundle s e)
246
247deriving instance ( Eq s
248                  , Eq (Token s)
249                  , Eq e
250                  ) => Eq (ParseErrorBundle s e)
251
252deriving instance ( Typeable s
253                  , Typeable (Token s)
254                  , Typeable e
255                  ) => Typeable (ParseErrorBundle s e)
256
257deriving instance ( Data s
258                  , Data (Token s)
259                  , Ord (Token s)
260                  , Data e
261                  , Ord e
262                  ) => Data (ParseErrorBundle s e)
263
264instance ( NFData s
265         , NFData (Token s)
266         , NFData e
267         ) => NFData (ParseErrorBundle s e)
268
269instance ( Show s
270         , Show (Token s)
271         , Show e
272         , ShowErrorComponent e
273         , Stream s
274         , Typeable s
275         , Typeable e
276         ) => Exception (ParseErrorBundle s e) where
277  displayException = errorBundlePretty
278
279-- | Attach 'SourcePos'es to items in a 'Traversable' container given that
280-- there is a projection allowing to get an offset per item.
281--
282-- Items must be in ascending order with respect to their offsets.
283--
284-- @since 7.0.0
285
286attachSourcePos
287  :: (Traversable t, Stream s)
288  => (a -> Int) -- ^ How to project offset from an item (e.g. 'errorOffset')
289  -> t a               -- ^ The collection of items
290  -> PosState s        -- ^ Initial 'PosState'
291  -> (t (a, SourcePos), PosState s) -- ^ The collection with 'SourcePos'es
292                                    -- added and the final 'PosState'
293attachSourcePos projectOffset xs = runState (traverse f xs)
294  where
295    f a = do
296      pst <- get
297      let pst' = reachOffsetNoLine (projectOffset a) pst
298      put pst'
299      return (a, pstateSourcePos pst')
300{-# INLINEABLE attachSourcePos #-}
301
302----------------------------------------------------------------------------
303-- Pretty-printing
304
305-- | The type class defines how to print a custom component of 'ParseError'.
306--
307-- @since 5.0.0
308
309class Ord a => ShowErrorComponent a where
310
311  -- | Pretty-print a component of 'ParseError'.
312
313  showErrorComponent :: a -> String
314
315  -- | Length of the error component in characters, used for highlighting of
316  -- parse errors in input string.
317  --
318  -- @since 7.0.0
319
320  errorComponentLen :: a -> Int
321  errorComponentLen _ = 1
322
323instance ShowErrorComponent Void where
324  showErrorComponent = absurd
325
326-- | Pretty-print a 'ParseErrorBundle'. All 'ParseError's in the bundle will
327-- be pretty-printed in order together with the corresponding offending
328-- lines by doing a single efficient pass over the input stream. The
329-- rendered 'String' always ends with a newline.
330--
331-- @since 7.0.0
332
333errorBundlePretty
334  :: forall s e. ( Stream s
335                 , ShowErrorComponent e
336                 )
337  => ParseErrorBundle s e -- ^ Parse error bundle to display
338  -> String               -- ^ Textual rendition of the bundle
339errorBundlePretty ParseErrorBundle {..} =
340  let (r, _) = foldl f (id, bundlePosState) bundleErrors
341  in drop 1 (r "")
342  where
343    f :: (ShowS, PosState s)
344      -> ParseError s e
345      -> (ShowS, PosState s)
346    f (o, !pst) e = (o . (outChunk ++), pst')
347      where
348        (sline, pst') = reachOffset (errorOffset e) pst
349        epos = pstateSourcePos pst'
350        outChunk =
351          "\n" <> sourcePosPretty epos <> ":\n" <>
352          padding <> "|\n" <>
353          lineNumber <> " | " <> sline <> "\n" <>
354          padding <> "| " <> rpadding <> pointer <> "\n" <>
355          parseErrorTextPretty e
356        lineNumber = (show . unPos . sourceLine) epos
357        padding = replicate (length lineNumber + 1) ' '
358        rpadding =
359          if pointerLen > 0
360            then replicate rpshift ' '
361            else ""
362        rpshift = unPos (sourceColumn epos) - 1
363        pointer = replicate pointerLen '^'
364        pointerLen =
365          if rpshift + elen > slineLen
366            then slineLen - rpshift + 1
367            else elen
368        slineLen = length sline
369        pxy = Proxy :: Proxy s
370        elen =
371          case e of
372            TrivialError _ Nothing _ -> 1
373            TrivialError _ (Just x) _ -> errorItemLength pxy x
374            FancyError _ xs ->
375              E.foldl' (\a b -> max a (errorFancyLength b)) 1 xs
376
377-- | Pretty-print a 'ParseError'. The rendered 'String' always ends with a
378-- newline.
379--
380-- @since 5.0.0
381
382parseErrorPretty
383  :: (Stream s, ShowErrorComponent e)
384  => ParseError s e    -- ^ Parse error to render
385  -> String            -- ^ Result of rendering
386parseErrorPretty e =
387  "offset=" <> show (errorOffset e) <> ":\n" <> parseErrorTextPretty e
388
389-- | Pretty-print a textual part of a 'ParseError', that is, everything
390-- except for its position. The rendered 'String' always ends with a
391-- newline.
392--
393-- @since 5.1.0
394
395parseErrorTextPretty
396  :: forall s e. (Stream s, ShowErrorComponent e)
397  => ParseError s e    -- ^ Parse error to render
398  -> String            -- ^ Result of rendering
399parseErrorTextPretty (TrivialError _ us ps) =
400  if isNothing us && E.null ps
401    then "unknown parse error\n"
402    else messageItemsPretty "unexpected " (showErrorItem pxy `E.map` maybe E.empty E.singleton us) <>
403         messageItemsPretty "expecting "  (showErrorItem pxy `E.map` ps)
404  where
405    pxy = Proxy :: Proxy s
406parseErrorTextPretty (FancyError _ xs) =
407  if E.null xs
408    then "unknown fancy parse error\n"
409    else unlines (showErrorFancy <$> E.toAscList xs)
410
411----------------------------------------------------------------------------
412-- Helpers
413
414-- | Pretty-print an 'ErrorItem'.
415
416showErrorItem :: Stream s => Proxy s -> ErrorItem (Token s) -> String
417showErrorItem pxy = \case
418    Tokens   ts -> showTokens pxy ts
419    Label label -> NE.toList label
420    EndOfInput  -> "end of input"
421
422-- | Get length of the “pointer” to display under a given 'ErrorItem'.
423
424errorItemLength :: Stream s => Proxy s -> ErrorItem (Token s) -> Int
425errorItemLength pxy = \case
426  Tokens ts -> tokensLength pxy ts
427  _         -> 1
428
429-- | Pretty-print an 'ErrorFancy'.
430
431showErrorFancy :: ShowErrorComponent e => ErrorFancy e -> String
432showErrorFancy = \case
433  ErrorFail msg -> msg
434  ErrorIndentation ord ref actual ->
435    "incorrect indentation (got " <> show (unPos actual) <>
436    ", should be " <> p <> show (unPos ref) <> ")"
437    where
438      p = case ord of
439            LT -> "less than "
440            EQ -> "equal to "
441            GT -> "greater than "
442  ErrorCustom a -> showErrorComponent a
443
444-- | Get length of the “pointer” to display under a given 'ErrorFancy'.
445
446errorFancyLength :: ShowErrorComponent e => ErrorFancy e -> Int
447errorFancyLength = \case
448  ErrorCustom a -> errorComponentLen a
449  _             -> 1
450
451-- | Transforms a list of error messages into their textual representation.
452
453messageItemsPretty
454  :: String            -- ^ Prefix to prepend
455  -> Set String        -- ^ Collection of messages
456  -> String            -- ^ Result of rendering
457messageItemsPretty prefix ts
458  | E.null ts = ""
459  | otherwise =
460    prefix <> (orList . NE.fromList . E.toAscList) ts <> "\n"
461
462-- | Print a pretty list where items are separated with commas and the word
463-- “or” according to the rules of English punctuation.
464
465orList :: NonEmpty String -> String
466orList (x:|[])  = x
467orList (x:|[y]) = x <> " or " <> y
468orList xs       = intercalate ", " (NE.init xs) <> ", or " <> NE.last xs
469