1-- |
2-- Module      :  Text.Megaparsec.Error
3-- Copyright   :  © 2015–2019 Megaparsec contributors
4-- License     :  FreeBSD
5--
6-- Maintainer  :  Mark Karpov <markkarpov92@gmail.com>
7-- Stability   :  experimental
8-- Portability :  portable
9--
10-- Parse errors. The current version of Megaparsec supports well-typed
11-- errors instead of 'String'-based ones. This gives a lot of flexibility in
12-- describing what exactly went wrong as well as a way to return arbitrary
13-- data in case of failure.
14--
15-- You probably do not want to import this module directly because
16-- "Text.Megaparsec" re-exports it anyway.
17
18{-# LANGUAGE BangPatterns        #-}
19{-# LANGUAGE CPP                 #-}
20{-# LANGUAGE DeriveDataTypeable  #-}
21{-# LANGUAGE DeriveFunctor       #-}
22{-# LANGUAGE DeriveGeneric       #-}
23{-# LANGUAGE FlexibleContexts    #-}
24{-# LANGUAGE FlexibleInstances   #-}
25{-# LANGUAGE LambdaCase          #-}
26{-# LANGUAGE RecordWildCards     #-}
27{-# LANGUAGE ScopedTypeVariables #-}
28{-# LANGUAGE StandaloneDeriving  #-}
29{-# LANGUAGE UndecidableInstances#-}
30
31module Text.Megaparsec.Error
32  ( -- * Parse error type
33    ErrorItem (..)
34  , ErrorFancy (..)
35  , ParseError (..)
36  , mapParseError
37  , errorOffset
38  , ParseErrorBundle (..)
39  , attachSourcePos
40    -- * Pretty-printing
41  , ShowErrorComponent (..)
42  , errorBundlePretty
43  , parseErrorPretty
44  , parseErrorTextPretty )
45where
46
47import Control.DeepSeq
48import Control.Exception
49import Control.Monad.State.Strict
50import Data.Data (Data)
51import Data.List (intercalate)
52import Data.List.NonEmpty (NonEmpty (..))
53import Data.Maybe (isNothing)
54import Data.Proxy
55import Data.Set (Set)
56import Data.Typeable (Typeable)
57import Data.Void
58import GHC.Generics
59import Text.Megaparsec.Pos
60import Text.Megaparsec.State
61import Text.Megaparsec.Stream
62import qualified Data.List.NonEmpty as NE
63import qualified Data.Set           as E
64
65#if !MIN_VERSION_base(4,11,0)
66import Data.Semigroup
67#endif
68
69----------------------------------------------------------------------------
70-- Parse error type
71
72-- | Data type that is used to represent “unexpected\/expected” items in
73-- 'ParseError'. The data type is parametrized over the token type @t@.
74--
75-- @since 5.0.0
76
77data ErrorItem t
78  = Tokens (NonEmpty t)      -- ^ Non-empty stream of tokens
79  | Label (NonEmpty Char)    -- ^ Label (cannot be empty)
80  | EndOfInput               -- ^ End of input
81  deriving (Show, Read, Eq, Ord, Data, Typeable, Generic, Functor)
82
83instance NFData t => NFData (ErrorItem t)
84
85-- | Additional error data, extendable by user. When no custom data is
86-- necessary, the type is typically indexed by 'Void' to “cancel” the
87-- 'ErrorCustom' constructor.
88--
89-- @since 6.0.0
90
91data ErrorFancy e
92  = ErrorFail String
93    -- ^ 'fail' has been used in parser monad
94  | ErrorIndentation Ordering Pos Pos
95    -- ^ Incorrect indentation error: desired ordering between reference
96    -- level and actual level, reference indentation level, actual
97    -- indentation level
98  | ErrorCustom e
99    -- ^ Custom error data
100  deriving (Show, Read, Eq, Ord, Data, Typeable, Generic, Functor)
101
102instance NFData a => NFData (ErrorFancy a) where
103  rnf (ErrorFail str) = rnf str
104  rnf (ErrorIndentation ord ref act) = ord `seq` rnf ref `seq` rnf act
105  rnf (ErrorCustom a) = rnf a
106
107-- | @'ParseError' s e@ represents a parse error parametrized over the
108-- stream type @s@ and the custom data @e@.
109--
110-- 'Semigroup' and 'Monoid' instances of the data type allow to merge parse
111-- errors from different branches of parsing. When merging two
112-- 'ParseError's, the longest match is preferred; if positions are the same,
113-- custom data sets and collections of message items are combined. Note that
114-- fancy errors take precedence over trivial errors in merging.
115--
116-- @since 7.0.0
117
118data ParseError s e
119  = TrivialError Int (Maybe (ErrorItem (Token s))) (Set (ErrorItem (Token s)))
120    -- ^ Trivial errors, generated by Megaparsec's machinery. The data
121    -- constructor includes the offset of error, unexpected token (if any),
122    -- and expected tokens.
123    --
124    -- Type of the first argument was changed in the version /7.0.0/.
125  | FancyError Int (Set (ErrorFancy e))
126    -- ^ Fancy, custom errors.
127    --
128    -- Type of the first argument was changed in the version /7.0.0/.
129  deriving (Typeable, Generic)
130
131deriving instance ( Show (Token s)
132                  , Show e
133                  ) => Show (ParseError s e)
134
135deriving instance ( Eq (Token s)
136                  , Eq e
137                  ) => Eq (ParseError s e)
138
139deriving instance ( Data s
140                  , Data (Token s)
141                  , Ord (Token s)
142                  , Data e
143                  , Ord e
144                  ) => Data (ParseError s e)
145
146instance ( NFData (Token s)
147         , NFData e
148         ) => NFData (ParseError s e)
149
150instance (Stream s, Ord e) => Semigroup (ParseError s e) where
151  (<>) = mergeError
152  {-# INLINE (<>) #-}
153
154instance (Stream s, Ord e) => Monoid (ParseError s e) where
155  mempty  = TrivialError 0 Nothing E.empty
156  mappend = (<>)
157  {-# INLINE mappend #-}
158
159instance ( Show s
160         , Show (Token s)
161         , Show e
162         , ShowErrorComponent e
163         , Stream s
164         , Typeable s
165         , Typeable e )
166  => Exception (ParseError s e) where
167  displayException = parseErrorPretty
168
169-- | Modify the custom data component in a parse error. This could be done
170-- via 'fmap' if not for the 'Ord' constraint.
171--
172-- @since 7.0.0
173
174mapParseError :: Ord e'
175  => (e -> e')
176  -> ParseError s e
177  -> ParseError s e'
178mapParseError _ (TrivialError o u p) = TrivialError o u p
179mapParseError f (FancyError o x) = FancyError o (E.map (fmap f) x)
180
181-- | Get offset of given 'ParseError'.
182--
183-- @since 7.0.0
184
185errorOffset :: ParseError s e -> Int
186errorOffset (TrivialError o _ _) = o
187errorOffset (FancyError   o _)   = o
188
189-- | Merge two error data structures into one joining their collections of
190-- message items and preferring the longest match. In other words, earlier
191-- error message is discarded. This may seem counter-intuitive, but
192-- 'mergeError' is only used to merge error messages of alternative branches
193-- of parsing and in this case longest match should be preferred.
194
195mergeError :: (Stream s, Ord e)
196  => ParseError s e
197  -> ParseError s e
198  -> ParseError s e
199mergeError e1 e2 =
200  case errorOffset e1 `compare` errorOffset e2 of
201    LT -> e2
202    EQ ->
203      case (e1, e2) of
204        (TrivialError s1 u1 p1, TrivialError _ u2 p2) ->
205          TrivialError s1 (n u1 u2) (E.union p1 p2)
206        (FancyError {}, TrivialError {}) -> e1
207        (TrivialError {}, FancyError {}) -> e2
208        (FancyError s1 x1, FancyError _ x2) ->
209          FancyError s1 (E.union x1 x2)
210    GT -> e1
211  where
212    -- NOTE The logic behind this merging is that since we only combine
213    -- parse errors that happen at exactly the same position, all the
214    -- unexpected items will be prefixes of input stream at that position or
215    -- labels referring to the same thing. Our aim here is to choose the
216    -- longest prefix (merging with labels and end of input is somewhat
217    -- arbitrary, but is necessary because otherwise we can't make
218    -- ParseError lawful Monoid and have nice parse errors at the same
219    -- time).
220    n Nothing  Nothing = Nothing
221    n (Just x) Nothing = Just x
222    n Nothing (Just y) = Just y
223    n (Just x) (Just y) = Just (max x y)
224{-# INLINE mergeError #-}
225
226-- | A non-empty collection of 'ParseError's equipped with 'PosState' that
227-- allows to pretty-print the errors efficiently and correctly.
228--
229-- @since 7.0.0
230
231data ParseErrorBundle s e = ParseErrorBundle
232  { bundleErrors :: NonEmpty (ParseError s e)
233    -- ^ A collection of 'ParseError's that is sorted by parse error offsets
234  , bundlePosState :: PosState s
235    -- ^ State that is used for line\/column calculation
236  } deriving (Generic)
237
238deriving instance ( Show s
239                  , Show (Token s)
240                  , Show e
241                  ) => Show (ParseErrorBundle s e)
242
243deriving instance ( Eq s
244                  , Eq (Token s)
245                  , Eq e
246                  ) => Eq (ParseErrorBundle s e)
247
248deriving instance ( Typeable s
249                  , Typeable (Token s)
250                  , Typeable e
251                  ) => Typeable (ParseErrorBundle s e)
252
253deriving instance ( Data s
254                  , Data (Token s)
255                  , Ord (Token s)
256                  , Data e
257                  , Ord e
258                  ) => Data (ParseErrorBundle s e)
259
260instance ( NFData s
261         , NFData (Token s)
262         , NFData e
263         ) => NFData (ParseErrorBundle s e)
264
265instance ( Show s
266         , Show (Token s)
267         , Show e
268         , ShowErrorComponent e
269         , Stream s
270         , Typeable s
271         , Typeable e
272         ) => Exception (ParseErrorBundle s e) where
273  displayException = errorBundlePretty
274
275-- | Attach 'SourcePos'es to items in a 'Traversable' container given that
276-- there is a projection allowing to get an offset per item.
277--
278-- Items must be in ascending order with respect to their offsets.
279--
280-- @since 7.0.0
281
282attachSourcePos
283  :: (Traversable t, Stream s)
284  => (a -> Int) -- ^ How to project offset from an item (e.g. 'errorOffset')
285  -> t a               -- ^ The collection of items
286  -> PosState s        -- ^ Initial 'PosState'
287  -> (t (a, SourcePos), PosState s) -- ^ The collection with 'SourcePos'es
288                                    -- added and the final 'PosState'
289attachSourcePos projectOffset xs = runState (traverse f xs)
290  where
291    f a = do
292      pst <- get
293      let (spos, pst') = reachOffsetNoLine (projectOffset a) pst
294      put pst'
295      return (a, spos)
296{-# INLINEABLE attachSourcePos #-}
297
298----------------------------------------------------------------------------
299-- Pretty-printing
300
301-- | The type class defines how to print a custom component of 'ParseError'.
302--
303-- @since 5.0.0
304
305class Ord a => ShowErrorComponent a where
306
307  -- | Pretty-print a component of 'ParseError'.
308
309  showErrorComponent :: a -> String
310
311  -- | Length of the error component in characters, used for highlighting of
312  -- parse errors in input string.
313  --
314  -- @since 7.0.0
315
316  errorComponentLen :: a -> Int
317  errorComponentLen _ = 1
318
319instance ShowErrorComponent Void where
320  showErrorComponent = absurd
321
322-- | Pretty-print a 'ParseErrorBundle'. All 'ParseError's in the bundle will
323-- be pretty-printed in order together with the corresponding offending
324-- lines by doing a single efficient pass over the input stream. The
325-- rendered 'String' always ends with a newline.
326--
327-- @since 7.0.0
328
329errorBundlePretty
330  :: forall s e. ( Stream s
331                 , ShowErrorComponent e
332                 )
333  => ParseErrorBundle s e -- ^ Parse error bundle to display
334  -> String               -- ^ Textual rendition of the bundle
335errorBundlePretty ParseErrorBundle {..} =
336  let (r, _) = foldl f (id, bundlePosState) bundleErrors
337  in drop 1 (r "")
338  where
339    f :: (ShowS, PosState s)
340      -> ParseError s e
341      -> (ShowS, PosState s)
342    f (o, !pst) e = (o . (outChunk ++), pst')
343      where
344        (epos, sline, pst') = reachOffset (errorOffset e) pst
345        outChunk =
346          "\n" <> sourcePosPretty epos <> ":\n" <>
347          padding <> "|\n" <>
348          lineNumber <> " | " <> sline <> "\n" <>
349          padding <> "| " <> rpadding <> pointer <> "\n" <>
350          parseErrorTextPretty e
351        lineNumber = (show . unPos . sourceLine) epos
352        padding = replicate (length lineNumber + 1) ' '
353        rpadding =
354          if pointerLen > 0
355            then replicate rpshift ' '
356            else ""
357        rpshift = unPos (sourceColumn epos) - 1
358        pointer = replicate pointerLen '^'
359        pointerLen =
360          if rpshift + elen > slineLen
361            then slineLen - rpshift + 1
362            else elen
363        slineLen = length sline
364        elen =
365          case e of
366            TrivialError _ Nothing _ -> 1
367            TrivialError _ (Just x) _ -> errorItemLength x
368            FancyError _ xs ->
369              E.foldl' (\a b -> max a (errorFancyLength b)) 1 xs
370
371-- | Pretty-print a 'ParseError'. The rendered 'String' always ends with a
372-- newline.
373--
374-- @since 5.0.0
375
376parseErrorPretty
377  :: (Stream s, ShowErrorComponent e)
378  => ParseError s e    -- ^ Parse error to render
379  -> String            -- ^ Result of rendering
380parseErrorPretty e =
381  "offset=" <> show (errorOffset e) <> ":\n" <> parseErrorTextPretty e
382
383-- | Pretty-print a textual part of a 'ParseError', that is, everything
384-- except for its position. The rendered 'String' always ends with a
385-- newline.
386--
387-- @since 5.1.0
388
389parseErrorTextPretty
390  :: forall s e. (Stream s, ShowErrorComponent e)
391  => ParseError s e    -- ^ Parse error to render
392  -> String            -- ^ Result of rendering
393parseErrorTextPretty (TrivialError _ us ps) =
394  if isNothing us && E.null ps
395    then "unknown parse error\n"
396    else messageItemsPretty "unexpected " (showErrorItem pxy `E.map` maybe E.empty E.singleton us) <>
397         messageItemsPretty "expecting "  (showErrorItem pxy `E.map` ps)
398  where
399    pxy = Proxy :: Proxy s
400parseErrorTextPretty (FancyError _ xs) =
401  if E.null xs
402    then "unknown fancy parse error\n"
403    else unlines (showErrorFancy <$> E.toAscList xs)
404
405----------------------------------------------------------------------------
406-- Helpers
407
408-- | Pretty-print an 'ErrorItem'.
409
410showErrorItem :: Stream s => Proxy s -> ErrorItem (Token s) -> String
411showErrorItem pxy = \case
412    Tokens   ts -> showTokens pxy ts
413    Label label -> NE.toList label
414    EndOfInput  -> "end of input"
415
416-- | Get length of the “pointer” to display under a given 'ErrorItem'.
417
418errorItemLength :: ErrorItem t -> Int
419errorItemLength = \case
420  Tokens ts -> NE.length ts
421  _         -> 1
422
423-- | Pretty-print an 'ErrorFancy'.
424
425showErrorFancy :: ShowErrorComponent e => ErrorFancy e -> String
426showErrorFancy = \case
427  ErrorFail msg -> msg
428  ErrorIndentation ord ref actual ->
429    "incorrect indentation (got " <> show (unPos actual) <>
430    ", should be " <> p <> show (unPos ref) <> ")"
431    where
432      p = case ord of
433            LT -> "less than "
434            EQ -> "equal to "
435            GT -> "greater than "
436  ErrorCustom a -> showErrorComponent a
437
438-- | Get length of the “pointer” to display under a given 'ErrorFancy'.
439
440errorFancyLength :: ShowErrorComponent e => ErrorFancy e -> Int
441errorFancyLength = \case
442  ErrorCustom a -> errorComponentLen a
443  _             -> 1
444
445-- | Transforms a list of error messages into their textual representation.
446
447messageItemsPretty
448  :: String            -- ^ Prefix to prepend
449  -> Set String        -- ^ Collection of messages
450  -> String            -- ^ Result of rendering
451messageItemsPretty prefix ts
452  | E.null ts = ""
453  | otherwise =
454    prefix <> (orList . NE.fromList . E.toAscList) ts <> "\n"
455
456-- | Print a pretty list where items are separated with commas and the word
457-- “or” according to the rules of English punctuation.
458
459orList :: NonEmpty String -> String
460orList (x:|[])  = x
461orList (x:|[y]) = x <> " or " <> y
462orList xs       = intercalate ", " (NE.init xs) <> ", or " <> NE.last xs
463