1{-# LANGUAGE BangPatterns #-}
2{-# LANGUAGE DeriveDataTypeable #-}
3{-# LANGUAGE DeriveFunctor #-}
4{-# LANGUAGE DeriveGeneric #-}
5{-# LANGUAGE FlexibleContexts #-}
6{-# LANGUAGE FlexibleInstances #-}
7{-# LANGUAGE LambdaCase #-}
8{-# LANGUAGE RecordWildCards #-}
9{-# LANGUAGE Safe #-}
10{-# LANGUAGE ScopedTypeVariables #-}
11{-# LANGUAGE StandaloneDeriving #-}
12{-# LANGUAGE UndecidableInstances #-}
13
14-- |
15-- Module      :  Text.Megaparsec.Error
16-- Copyright   :  © 2015–present Megaparsec contributors
17-- License     :  FreeBSD
18--
19-- Maintainer  :  Mark Karpov <markkarpov92@gmail.com>
20-- Stability   :  experimental
21-- Portability :  portable
22--
23-- Parse errors. The current version of Megaparsec supports well-typed
24-- errors instead of 'String'-based ones. This gives a lot of flexibility in
25-- describing what exactly went wrong as well as a way to return arbitrary
26-- data in case of failure.
27--
28-- You probably do not want to import this module directly because
29-- "Text.Megaparsec" re-exports it anyway.
30module Text.Megaparsec.Error
31  ( -- * Parse error type
32    ErrorItem (..),
33    ErrorFancy (..),
34    ParseError (..),
35    mapParseError,
36    errorOffset,
37    setErrorOffset,
38    ParseErrorBundle (..),
39    attachSourcePos,
40
41    -- * Pretty-printing
42    ShowErrorComponent (..),
43    errorBundlePretty,
44    parseErrorPretty,
45    parseErrorTextPretty,
46  )
47where
48
49import Control.DeepSeq
50import Control.Exception
51import Control.Monad.State.Strict
52import Data.Data (Data)
53import Data.List (intercalate)
54import Data.List.NonEmpty (NonEmpty (..))
55import qualified Data.List.NonEmpty as NE
56import Data.Maybe (isNothing)
57import Data.Proxy
58import Data.Set (Set)
59import qualified Data.Set as E
60import Data.Typeable (Typeable)
61import Data.Void
62import GHC.Generics
63import Text.Megaparsec.Pos
64import Text.Megaparsec.State
65import Text.Megaparsec.Stream
66
67----------------------------------------------------------------------------
68-- Parse error type
69
70-- | Data type that is used to represent “unexpected\/expected” items in
71-- 'ParseError'. The data type is parametrized over the token type @t@.
72--
73-- @since 5.0.0
74data ErrorItem t
75  = -- | Non-empty stream of tokens
76    Tokens (NonEmpty t)
77  | -- | Label (cannot be empty)
78    Label (NonEmpty Char)
79  | -- | End of input
80    EndOfInput
81  deriving (Show, Read, Eq, Ord, Data, Typeable, Generic, Functor)
82
83instance NFData t => NFData (ErrorItem t)
84
85-- | Additional error data, extendable by user. When no custom data is
86-- necessary, the type is typically indexed by 'Void' to “cancel” the
87-- 'ErrorCustom' constructor.
88--
89-- @since 6.0.0
90data ErrorFancy e
91  = -- | 'fail' has been used in parser monad
92    ErrorFail String
93  | -- | Incorrect indentation error: desired ordering between reference
94    -- level and actual level, reference indentation level, actual
95    -- indentation level
96    ErrorIndentation Ordering Pos Pos
97  | -- | Custom error data
98    ErrorCustom e
99  deriving (Show, Read, Eq, Ord, Data, Typeable, Generic, Functor)
100
101instance NFData a => NFData (ErrorFancy a) where
102  rnf (ErrorFail str) = rnf str
103  rnf (ErrorIndentation ord ref act) = ord `seq` rnf ref `seq` rnf act
104  rnf (ErrorCustom a) = rnf a
105
106-- | @'ParseError' s e@ represents a parse error parametrized over the
107-- stream type @s@ and the custom data @e@.
108--
109-- 'Semigroup' and 'Monoid' instances of the data type allow to merge parse
110-- errors from different branches of parsing. When merging two
111-- 'ParseError's, the longest match is preferred; if positions are the same,
112-- custom data sets and collections of message items are combined. Note that
113-- fancy errors take precedence over trivial errors in merging.
114--
115-- @since 7.0.0
116data ParseError s e
117  = -- | Trivial errors, generated by Megaparsec's machinery. The data
118    -- constructor includes the offset of error, unexpected token (if any),
119    -- and expected tokens.
120    --
121    -- Type of the first argument was changed in the version /7.0.0/.
122    TrivialError Int (Maybe (ErrorItem (Token s))) (Set (ErrorItem (Token s)))
123  | -- | Fancy, custom errors.
124    --
125    -- Type of the first argument was changed in the version /7.0.0/.
126    FancyError Int (Set (ErrorFancy e))
127  deriving (Typeable, Generic)
128
129deriving instance
130  ( Show (Token s),
131    Show e
132  ) =>
133  Show (ParseError s e)
134
135deriving instance
136  ( Eq (Token s),
137    Eq e
138  ) =>
139  Eq (ParseError s e)
140
141deriving instance
142  ( Data s,
143    Data (Token s),
144    Ord (Token s),
145    Data e,
146    Ord e
147  ) =>
148  Data (ParseError s e)
149
150instance
151  ( NFData (Token s),
152    NFData e
153  ) =>
154  NFData (ParseError s e)
155
156instance (Stream s, Ord e) => Semigroup (ParseError s e) where
157  (<>) = mergeError
158  {-# INLINE (<>) #-}
159
160instance (Stream s, Ord e) => Monoid (ParseError s e) where
161  mempty = TrivialError 0 Nothing E.empty
162  mappend = (<>)
163  {-# INLINE mappend #-}
164
165instance
166  ( Show s,
167    Show (Token s),
168    Show e,
169    ShowErrorComponent e,
170    VisualStream s,
171    Typeable s,
172    Typeable e
173  ) =>
174  Exception (ParseError s e)
175  where
176  displayException = parseErrorPretty
177
178-- | Modify the custom data component in a parse error. This could be done
179-- via 'fmap' if not for the 'Ord' constraint.
180--
181-- @since 7.0.0
182mapParseError ::
183  Ord e' =>
184  (e -> e') ->
185  ParseError s e ->
186  ParseError s e'
187mapParseError _ (TrivialError o u p) = TrivialError o u p
188mapParseError f (FancyError o x) = FancyError o (E.map (fmap f) x)
189
190-- | Get offset of 'ParseError'.
191--
192-- @since 7.0.0
193errorOffset :: ParseError s e -> Int
194errorOffset (TrivialError o _ _) = o
195errorOffset (FancyError o _) = o
196
197-- | Set offset of 'ParseError'.
198--
199-- @since 8.0.0
200setErrorOffset :: Int -> ParseError s e -> ParseError s e
201setErrorOffset o (TrivialError _ u p) = TrivialError o u p
202setErrorOffset o (FancyError _ x) = FancyError o x
203
204-- | Merge two error data structures into one joining their collections of
205-- message items and preferring the longest match. In other words, earlier
206-- error message is discarded. This may seem counter-intuitive, but
207-- 'mergeError' is only used to merge error messages of alternative branches
208-- of parsing and in this case longest match should be preferred.
209mergeError ::
210  (Stream s, Ord e) =>
211  ParseError s e ->
212  ParseError s e ->
213  ParseError s e
214mergeError e1 e2 =
215  case errorOffset e1 `compare` errorOffset e2 of
216    LT -> e2
217    EQ ->
218      case (e1, e2) of
219        (TrivialError s1 u1 p1, TrivialError _ u2 p2) ->
220          TrivialError s1 (n u1 u2) (E.union p1 p2)
221        (FancyError {}, TrivialError {}) -> e1
222        (TrivialError {}, FancyError {}) -> e2
223        (FancyError s1 x1, FancyError _ x2) ->
224          FancyError s1 (E.union x1 x2)
225    GT -> e1
226  where
227    -- NOTE The logic behind this merging is that since we only combine
228    -- parse errors that happen at exactly the same position, all the
229    -- unexpected items will be prefixes of input stream at that position or
230    -- labels referring to the same thing. Our aim here is to choose the
231    -- longest prefix (merging with labels and end of input is somewhat
232    -- arbitrary, but is necessary because otherwise we can't make
233    -- ParseError lawful Monoid and have nice parse errors at the same
234    -- time).
235    n Nothing Nothing = Nothing
236    n (Just x) Nothing = Just x
237    n Nothing (Just y) = Just y
238    n (Just x) (Just y) = Just (max x y)
239{-# INLINE mergeError #-}
240
241-- | A non-empty collection of 'ParseError's equipped with 'PosState' that
242-- allows to pretty-print the errors efficiently and correctly.
243--
244-- @since 7.0.0
245data ParseErrorBundle s e = ParseErrorBundle
246  { -- | A collection of 'ParseError's that is sorted by parse error offsets
247    bundleErrors :: NonEmpty (ParseError s e),
248    -- | State that is used for line\/column calculation
249    bundlePosState :: PosState s
250  }
251  deriving (Generic)
252
253deriving instance
254  ( Show s,
255    Show (Token s),
256    Show e
257  ) =>
258  Show (ParseErrorBundle s e)
259
260deriving instance
261  ( Eq s,
262    Eq (Token s),
263    Eq e
264  ) =>
265  Eq (ParseErrorBundle s e)
266
267deriving instance
268  ( Typeable s,
269    Typeable (Token s),
270    Typeable e
271  ) =>
272  Typeable (ParseErrorBundle s e)
273
274deriving instance
275  ( Data s,
276    Data (Token s),
277    Ord (Token s),
278    Data e,
279    Ord e
280  ) =>
281  Data (ParseErrorBundle s e)
282
283instance
284  ( NFData s,
285    NFData (Token s),
286    NFData e
287  ) =>
288  NFData (ParseErrorBundle s e)
289
290instance
291  ( Show s,
292    Show (Token s),
293    Show e,
294    ShowErrorComponent e,
295    VisualStream s,
296    TraversableStream s,
297    Typeable s,
298    Typeable e
299  ) =>
300  Exception (ParseErrorBundle s e)
301  where
302  displayException = errorBundlePretty
303
304-- | Attach 'SourcePos'es to items in a 'Traversable' container given that
305-- there is a projection allowing to get an offset per item.
306--
307-- Items must be in ascending order with respect to their offsets.
308--
309-- @since 7.0.0
310attachSourcePos ::
311  (Traversable t, TraversableStream s) =>
312  -- | How to project offset from an item (e.g. 'errorOffset')
313  (a -> Int) ->
314  -- | The collection of items
315  t a ->
316  -- | Initial 'PosState'
317  PosState s ->
318  -- | The collection with 'SourcePos'es
319  -- added and the final 'PosState'
320  (t (a, SourcePos), PosState s)
321attachSourcePos projectOffset xs = runState (traverse f xs)
322  where
323    f a = do
324      pst <- get
325      let pst' = reachOffsetNoLine (projectOffset a) pst
326      put pst'
327      return (a, pstateSourcePos pst')
328{-# INLINEABLE attachSourcePos #-}
329
330----------------------------------------------------------------------------
331-- Pretty-printing
332
333-- | The type class defines how to print a custom component of 'ParseError'.
334--
335-- @since 5.0.0
336class Ord a => ShowErrorComponent a where
337  -- | Pretty-print a component of 'ParseError'.
338  showErrorComponent :: a -> String
339
340  -- | Length of the error component in characters, used for highlighting of
341  -- parse errors in input string.
342  --
343  -- @since 7.0.0
344  errorComponentLen :: a -> Int
345  errorComponentLen _ = 1
346
347instance ShowErrorComponent Void where
348  showErrorComponent = absurd
349
350-- | Pretty-print a 'ParseErrorBundle'. All 'ParseError's in the bundle will
351-- be pretty-printed in order together with the corresponding offending
352-- lines by doing a single efficient pass over the input stream. The
353-- rendered 'String' always ends with a newline.
354--
355-- @since 7.0.0
356errorBundlePretty ::
357  forall s e.
358  ( VisualStream s,
359    TraversableStream s,
360    ShowErrorComponent e
361  ) =>
362  -- | Parse error bundle to display
363  ParseErrorBundle s e ->
364  -- | Textual rendition of the bundle
365  String
366errorBundlePretty ParseErrorBundle {..} =
367  let (r, _) = foldl f (id, bundlePosState) bundleErrors
368   in drop 1 (r "")
369  where
370    f ::
371      (ShowS, PosState s) ->
372      ParseError s e ->
373      (ShowS, PosState s)
374    f (o, !pst) e = (o . (outChunk ++), pst')
375      where
376        (msline, pst') = reachOffset (errorOffset e) pst
377        epos = pstateSourcePos pst'
378        outChunk =
379          "\n" <> sourcePosPretty epos <> ":\n"
380            <> offendingLine
381            <> parseErrorTextPretty e
382        offendingLine =
383          case msline of
384            Nothing -> ""
385            Just sline ->
386              let rpadding =
387                    if pointerLen > 0
388                      then replicate rpshift ' '
389                      else ""
390                  pointerLen =
391                    if rpshift + elen > slineLen
392                      then slineLen - rpshift + 1
393                      else elen
394                  pointer = replicate pointerLen '^'
395                  lineNumber = (show . unPos . sourceLine) epos
396                  padding = replicate (length lineNumber + 1) ' '
397                  rpshift = unPos (sourceColumn epos) - 1
398                  slineLen = length sline
399               in padding <> "|\n" <> lineNumber <> " | " <> sline
400                    <> "\n"
401                    <> padding
402                    <> "| "
403                    <> rpadding
404                    <> pointer
405                    <> "\n"
406        pxy = Proxy :: Proxy s
407        elen =
408          case e of
409            TrivialError _ Nothing _ -> 1
410            TrivialError _ (Just x) _ -> errorItemLength pxy x
411            FancyError _ xs ->
412              E.foldl' (\a b -> max a (errorFancyLength b)) 1 xs
413
414-- | Pretty-print a 'ParseError'. The rendered 'String' always ends with a
415-- newline.
416--
417-- @since 5.0.0
418parseErrorPretty ::
419  (VisualStream s, ShowErrorComponent e) =>
420  -- | Parse error to render
421  ParseError s e ->
422  -- | Result of rendering
423  String
424parseErrorPretty e =
425  "offset=" <> show (errorOffset e) <> ":\n" <> parseErrorTextPretty e
426
427-- | Pretty-print a textual part of a 'ParseError', that is, everything
428-- except for its position. The rendered 'String' always ends with a
429-- newline.
430--
431-- @since 5.1.0
432parseErrorTextPretty ::
433  forall s e.
434  (VisualStream s, ShowErrorComponent e) =>
435  -- | Parse error to render
436  ParseError s e ->
437  -- | Result of rendering
438  String
439parseErrorTextPretty (TrivialError _ us ps) =
440  if isNothing us && E.null ps
441    then "unknown parse error\n"
442    else
443      messageItemsPretty "unexpected " (showErrorItem pxy `E.map` maybe E.empty E.singleton us)
444        <> messageItemsPretty "expecting " (showErrorItem pxy `E.map` ps)
445  where
446    pxy = Proxy :: Proxy s
447parseErrorTextPretty (FancyError _ xs) =
448  if E.null xs
449    then "unknown fancy parse error\n"
450    else unlines (showErrorFancy <$> E.toAscList xs)
451
452----------------------------------------------------------------------------
453-- Helpers
454
455-- | Pretty-print an 'ErrorItem'.
456showErrorItem :: VisualStream s => Proxy s -> ErrorItem (Token s) -> String
457showErrorItem pxy = \case
458  Tokens ts -> showTokens pxy ts
459  Label label -> NE.toList label
460  EndOfInput -> "end of input"
461
462-- | Get length of the “pointer” to display under a given 'ErrorItem'.
463errorItemLength :: VisualStream s => Proxy s -> ErrorItem (Token s) -> Int
464errorItemLength pxy = \case
465  Tokens ts -> tokensLength pxy ts
466  _ -> 1
467
468-- | Pretty-print an 'ErrorFancy'.
469showErrorFancy :: ShowErrorComponent e => ErrorFancy e -> String
470showErrorFancy = \case
471  ErrorFail msg -> msg
472  ErrorIndentation ord ref actual ->
473    "incorrect indentation (got " <> show (unPos actual)
474      <> ", should be "
475      <> p
476      <> show (unPos ref)
477      <> ")"
478    where
479      p = case ord of
480        LT -> "less than "
481        EQ -> "equal to "
482        GT -> "greater than "
483  ErrorCustom a -> showErrorComponent a
484
485-- | Get length of the “pointer” to display under a given 'ErrorFancy'.
486errorFancyLength :: ShowErrorComponent e => ErrorFancy e -> Int
487errorFancyLength = \case
488  ErrorCustom a -> errorComponentLen a
489  _ -> 1
490
491-- | Transforms a list of error messages into their textual representation.
492messageItemsPretty ::
493  -- | Prefix to prepend
494  String ->
495  -- | Collection of messages
496  Set String ->
497  -- | Result of rendering
498  String
499messageItemsPretty prefix ts
500  | E.null ts = ""
501  | otherwise =
502    prefix <> (orList . NE.fromList . E.toAscList) ts <> "\n"
503
504-- | Print a pretty list where items are separated with commas and the word
505-- “or” according to the rules of English punctuation.
506orList :: NonEmpty String -> String
507orList (x :| []) = x
508orList (x :| [y]) = x <> " or " <> y
509orList xs = intercalate ", " (NE.init xs) <> ", or " <> NE.last xs
510