1-- | 2-- Module : Text.Megaparsec.Error 3-- Copyright : © 2015–present Megaparsec contributors 4-- License : FreeBSD 5-- 6-- Maintainer : Mark Karpov <markkarpov92@gmail.com> 7-- Stability : experimental 8-- Portability : portable 9-- 10-- Parse errors. The current version of Megaparsec supports well-typed 11-- errors instead of 'String'-based ones. This gives a lot of flexibility in 12-- describing what exactly went wrong as well as a way to return arbitrary 13-- data in case of failure. 14-- 15-- You probably do not want to import this module directly because 16-- "Text.Megaparsec" re-exports it anyway. 17 18{-# LANGUAGE BangPatterns #-} 19{-# LANGUAGE DeriveDataTypeable #-} 20{-# LANGUAGE DeriveFunctor #-} 21{-# LANGUAGE DeriveGeneric #-} 22{-# LANGUAGE FlexibleContexts #-} 23{-# LANGUAGE FlexibleInstances #-} 24{-# LANGUAGE LambdaCase #-} 25{-# LANGUAGE RecordWildCards #-} 26{-# LANGUAGE ScopedTypeVariables #-} 27{-# LANGUAGE StandaloneDeriving #-} 28{-# LANGUAGE UndecidableInstances #-} 29 30module Text.Megaparsec.Error 31 ( -- * Parse error type 32 ErrorItem (..) 33 , ErrorFancy (..) 34 , ParseError (..) 35 , mapParseError 36 , errorOffset 37 , setErrorOffset 38 , ParseErrorBundle (..) 39 , attachSourcePos 40 -- * Pretty-printing 41 , ShowErrorComponent (..) 42 , errorBundlePretty 43 , parseErrorPretty 44 , parseErrorTextPretty ) 45where 46 47import Control.DeepSeq 48import Control.Exception 49import Control.Monad.State.Strict 50import Data.Data (Data) 51import Data.List (intercalate) 52import Data.List.NonEmpty (NonEmpty (..)) 53import Data.Maybe (isNothing) 54import Data.Proxy 55import Data.Set (Set) 56import Data.Typeable (Typeable) 57import Data.Void 58import GHC.Generics 59import Text.Megaparsec.Pos 60import Text.Megaparsec.State 61import Text.Megaparsec.Stream 62import qualified Data.List.NonEmpty as NE 63import qualified Data.Set as E 64 65---------------------------------------------------------------------------- 66-- Parse error type 67 68-- | Data type that is used to represent “unexpected\/expected” items in 69-- 'ParseError'. The data type is parametrized over the token type @t@. 70-- 71-- @since 5.0.0 72 73data ErrorItem t 74 = Tokens (NonEmpty t) -- ^ Non-empty stream of tokens 75 | Label (NonEmpty Char) -- ^ Label (cannot be empty) 76 | EndOfInput -- ^ End of input 77 deriving (Show, Read, Eq, Ord, Data, Typeable, Generic, Functor) 78 79instance NFData t => NFData (ErrorItem t) 80 81-- | Additional error data, extendable by user. When no custom data is 82-- necessary, the type is typically indexed by 'Void' to “cancel” the 83-- 'ErrorCustom' constructor. 84-- 85-- @since 6.0.0 86 87data ErrorFancy e 88 = ErrorFail String 89 -- ^ 'fail' has been used in parser monad 90 | ErrorIndentation Ordering Pos Pos 91 -- ^ Incorrect indentation error: desired ordering between reference 92 -- level and actual level, reference indentation level, actual 93 -- indentation level 94 | ErrorCustom e 95 -- ^ Custom error data 96 deriving (Show, Read, Eq, Ord, Data, Typeable, Generic, Functor) 97 98instance NFData a => NFData (ErrorFancy a) where 99 rnf (ErrorFail str) = rnf str 100 rnf (ErrorIndentation ord ref act) = ord `seq` rnf ref `seq` rnf act 101 rnf (ErrorCustom a) = rnf a 102 103-- | @'ParseError' s e@ represents a parse error parametrized over the 104-- stream type @s@ and the custom data @e@. 105-- 106-- 'Semigroup' and 'Monoid' instances of the data type allow to merge parse 107-- errors from different branches of parsing. When merging two 108-- 'ParseError's, the longest match is preferred; if positions are the same, 109-- custom data sets and collections of message items are combined. Note that 110-- fancy errors take precedence over trivial errors in merging. 111-- 112-- @since 7.0.0 113 114data ParseError s e 115 = TrivialError Int (Maybe (ErrorItem (Token s))) (Set (ErrorItem (Token s))) 116 -- ^ Trivial errors, generated by Megaparsec's machinery. The data 117 -- constructor includes the offset of error, unexpected token (if any), 118 -- and expected tokens. 119 -- 120 -- Type of the first argument was changed in the version /7.0.0/. 121 | FancyError Int (Set (ErrorFancy e)) 122 -- ^ Fancy, custom errors. 123 -- 124 -- Type of the first argument was changed in the version /7.0.0/. 125 deriving (Typeable, Generic) 126 127deriving instance ( Show (Token s) 128 , Show e 129 ) => Show (ParseError s e) 130 131deriving instance ( Eq (Token s) 132 , Eq e 133 ) => Eq (ParseError s e) 134 135deriving instance ( Data s 136 , Data (Token s) 137 , Ord (Token s) 138 , Data e 139 , Ord e 140 ) => Data (ParseError s e) 141 142instance ( NFData (Token s) 143 , NFData e 144 ) => NFData (ParseError s e) 145 146instance (Stream s, Ord e) => Semigroup (ParseError s e) where 147 (<>) = mergeError 148 {-# INLINE (<>) #-} 149 150instance (Stream s, Ord e) => Monoid (ParseError s e) where 151 mempty = TrivialError 0 Nothing E.empty 152 mappend = (<>) 153 {-# INLINE mappend #-} 154 155instance ( Show s 156 , Show (Token s) 157 , Show e 158 , ShowErrorComponent e 159 , Stream s 160 , Typeable s 161 , Typeable e ) 162 => Exception (ParseError s e) where 163 displayException = parseErrorPretty 164 165-- | Modify the custom data component in a parse error. This could be done 166-- via 'fmap' if not for the 'Ord' constraint. 167-- 168-- @since 7.0.0 169 170mapParseError :: Ord e' 171 => (e -> e') 172 -> ParseError s e 173 -> ParseError s e' 174mapParseError _ (TrivialError o u p) = TrivialError o u p 175mapParseError f (FancyError o x) = FancyError o (E.map (fmap f) x) 176 177-- | Get offset of 'ParseError'. 178-- 179-- @since 7.0.0 180 181errorOffset :: ParseError s e -> Int 182errorOffset (TrivialError o _ _) = o 183errorOffset (FancyError o _) = o 184 185-- | Set offset of 'ParseError'. 186-- 187-- @since 8.0.0 188 189setErrorOffset :: Int -> ParseError s e -> ParseError s e 190setErrorOffset o (TrivialError _ u p) = TrivialError o u p 191setErrorOffset o (FancyError _ x) = FancyError o x 192 193-- | Merge two error data structures into one joining their collections of 194-- message items and preferring the longest match. In other words, earlier 195-- error message is discarded. This may seem counter-intuitive, but 196-- 'mergeError' is only used to merge error messages of alternative branches 197-- of parsing and in this case longest match should be preferred. 198 199mergeError :: (Stream s, Ord e) 200 => ParseError s e 201 -> ParseError s e 202 -> ParseError s e 203mergeError e1 e2 = 204 case errorOffset e1 `compare` errorOffset e2 of 205 LT -> e2 206 EQ -> 207 case (e1, e2) of 208 (TrivialError s1 u1 p1, TrivialError _ u2 p2) -> 209 TrivialError s1 (n u1 u2) (E.union p1 p2) 210 (FancyError {}, TrivialError {}) -> e1 211 (TrivialError {}, FancyError {}) -> e2 212 (FancyError s1 x1, FancyError _ x2) -> 213 FancyError s1 (E.union x1 x2) 214 GT -> e1 215 where 216 -- NOTE The logic behind this merging is that since we only combine 217 -- parse errors that happen at exactly the same position, all the 218 -- unexpected items will be prefixes of input stream at that position or 219 -- labels referring to the same thing. Our aim here is to choose the 220 -- longest prefix (merging with labels and end of input is somewhat 221 -- arbitrary, but is necessary because otherwise we can't make 222 -- ParseError lawful Monoid and have nice parse errors at the same 223 -- time). 224 n Nothing Nothing = Nothing 225 n (Just x) Nothing = Just x 226 n Nothing (Just y) = Just y 227 n (Just x) (Just y) = Just (max x y) 228{-# INLINE mergeError #-} 229 230-- | A non-empty collection of 'ParseError's equipped with 'PosState' that 231-- allows to pretty-print the errors efficiently and correctly. 232-- 233-- @since 7.0.0 234 235data ParseErrorBundle s e = ParseErrorBundle 236 { bundleErrors :: NonEmpty (ParseError s e) 237 -- ^ A collection of 'ParseError's that is sorted by parse error offsets 238 , bundlePosState :: PosState s 239 -- ^ State that is used for line\/column calculation 240 } deriving (Generic) 241 242deriving instance ( Show s 243 , Show (Token s) 244 , Show e 245 ) => Show (ParseErrorBundle s e) 246 247deriving instance ( Eq s 248 , Eq (Token s) 249 , Eq e 250 ) => Eq (ParseErrorBundle s e) 251 252deriving instance ( Typeable s 253 , Typeable (Token s) 254 , Typeable e 255 ) => Typeable (ParseErrorBundle s e) 256 257deriving instance ( Data s 258 , Data (Token s) 259 , Ord (Token s) 260 , Data e 261 , Ord e 262 ) => Data (ParseErrorBundle s e) 263 264instance ( NFData s 265 , NFData (Token s) 266 , NFData e 267 ) => NFData (ParseErrorBundle s e) 268 269instance ( Show s 270 , Show (Token s) 271 , Show e 272 , ShowErrorComponent e 273 , Stream s 274 , Typeable s 275 , Typeable e 276 ) => Exception (ParseErrorBundle s e) where 277 displayException = errorBundlePretty 278 279-- | Attach 'SourcePos'es to items in a 'Traversable' container given that 280-- there is a projection allowing to get an offset per item. 281-- 282-- Items must be in ascending order with respect to their offsets. 283-- 284-- @since 7.0.0 285 286attachSourcePos 287 :: (Traversable t, Stream s) 288 => (a -> Int) -- ^ How to project offset from an item (e.g. 'errorOffset') 289 -> t a -- ^ The collection of items 290 -> PosState s -- ^ Initial 'PosState' 291 -> (t (a, SourcePos), PosState s) -- ^ The collection with 'SourcePos'es 292 -- added and the final 'PosState' 293attachSourcePos projectOffset xs = runState (traverse f xs) 294 where 295 f a = do 296 pst <- get 297 let pst' = reachOffsetNoLine (projectOffset a) pst 298 put pst' 299 return (a, pstateSourcePos pst') 300{-# INLINEABLE attachSourcePos #-} 301 302---------------------------------------------------------------------------- 303-- Pretty-printing 304 305-- | The type class defines how to print a custom component of 'ParseError'. 306-- 307-- @since 5.0.0 308 309class Ord a => ShowErrorComponent a where 310 311 -- | Pretty-print a component of 'ParseError'. 312 313 showErrorComponent :: a -> String 314 315 -- | Length of the error component in characters, used for highlighting of 316 -- parse errors in input string. 317 -- 318 -- @since 7.0.0 319 320 errorComponentLen :: a -> Int 321 errorComponentLen _ = 1 322 323instance ShowErrorComponent Void where 324 showErrorComponent = absurd 325 326-- | Pretty-print a 'ParseErrorBundle'. All 'ParseError's in the bundle will 327-- be pretty-printed in order together with the corresponding offending 328-- lines by doing a single efficient pass over the input stream. The 329-- rendered 'String' always ends with a newline. 330-- 331-- @since 7.0.0 332 333errorBundlePretty 334 :: forall s e. ( Stream s 335 , ShowErrorComponent e 336 ) 337 => ParseErrorBundle s e -- ^ Parse error bundle to display 338 -> String -- ^ Textual rendition of the bundle 339errorBundlePretty ParseErrorBundle {..} = 340 let (r, _) = foldl f (id, bundlePosState) bundleErrors 341 in drop 1 (r "") 342 where 343 f :: (ShowS, PosState s) 344 -> ParseError s e 345 -> (ShowS, PosState s) 346 f (o, !pst) e = (o . (outChunk ++), pst') 347 where 348 (sline, pst') = reachOffset (errorOffset e) pst 349 epos = pstateSourcePos pst' 350 outChunk = 351 "\n" <> sourcePosPretty epos <> ":\n" <> 352 padding <> "|\n" <> 353 lineNumber <> " | " <> sline <> "\n" <> 354 padding <> "| " <> rpadding <> pointer <> "\n" <> 355 parseErrorTextPretty e 356 lineNumber = (show . unPos . sourceLine) epos 357 padding = replicate (length lineNumber + 1) ' ' 358 rpadding = 359 if pointerLen > 0 360 then replicate rpshift ' ' 361 else "" 362 rpshift = unPos (sourceColumn epos) - 1 363 pointer = replicate pointerLen '^' 364 pointerLen = 365 if rpshift + elen > slineLen 366 then slineLen - rpshift + 1 367 else elen 368 slineLen = length sline 369 pxy = Proxy :: Proxy s 370 elen = 371 case e of 372 TrivialError _ Nothing _ -> 1 373 TrivialError _ (Just x) _ -> errorItemLength pxy x 374 FancyError _ xs -> 375 E.foldl' (\a b -> max a (errorFancyLength b)) 1 xs 376 377-- | Pretty-print a 'ParseError'. The rendered 'String' always ends with a 378-- newline. 379-- 380-- @since 5.0.0 381 382parseErrorPretty 383 :: (Stream s, ShowErrorComponent e) 384 => ParseError s e -- ^ Parse error to render 385 -> String -- ^ Result of rendering 386parseErrorPretty e = 387 "offset=" <> show (errorOffset e) <> ":\n" <> parseErrorTextPretty e 388 389-- | Pretty-print a textual part of a 'ParseError', that is, everything 390-- except for its position. The rendered 'String' always ends with a 391-- newline. 392-- 393-- @since 5.1.0 394 395parseErrorTextPretty 396 :: forall s e. (Stream s, ShowErrorComponent e) 397 => ParseError s e -- ^ Parse error to render 398 -> String -- ^ Result of rendering 399parseErrorTextPretty (TrivialError _ us ps) = 400 if isNothing us && E.null ps 401 then "unknown parse error\n" 402 else messageItemsPretty "unexpected " (showErrorItem pxy `E.map` maybe E.empty E.singleton us) <> 403 messageItemsPretty "expecting " (showErrorItem pxy `E.map` ps) 404 where 405 pxy = Proxy :: Proxy s 406parseErrorTextPretty (FancyError _ xs) = 407 if E.null xs 408 then "unknown fancy parse error\n" 409 else unlines (showErrorFancy <$> E.toAscList xs) 410 411---------------------------------------------------------------------------- 412-- Helpers 413 414-- | Pretty-print an 'ErrorItem'. 415 416showErrorItem :: Stream s => Proxy s -> ErrorItem (Token s) -> String 417showErrorItem pxy = \case 418 Tokens ts -> showTokens pxy ts 419 Label label -> NE.toList label 420 EndOfInput -> "end of input" 421 422-- | Get length of the “pointer” to display under a given 'ErrorItem'. 423 424errorItemLength :: Stream s => Proxy s -> ErrorItem (Token s) -> Int 425errorItemLength pxy = \case 426 Tokens ts -> tokensLength pxy ts 427 _ -> 1 428 429-- | Pretty-print an 'ErrorFancy'. 430 431showErrorFancy :: ShowErrorComponent e => ErrorFancy e -> String 432showErrorFancy = \case 433 ErrorFail msg -> msg 434 ErrorIndentation ord ref actual -> 435 "incorrect indentation (got " <> show (unPos actual) <> 436 ", should be " <> p <> show (unPos ref) <> ")" 437 where 438 p = case ord of 439 LT -> "less than " 440 EQ -> "equal to " 441 GT -> "greater than " 442 ErrorCustom a -> showErrorComponent a 443 444-- | Get length of the “pointer” to display under a given 'ErrorFancy'. 445 446errorFancyLength :: ShowErrorComponent e => ErrorFancy e -> Int 447errorFancyLength = \case 448 ErrorCustom a -> errorComponentLen a 449 _ -> 1 450 451-- | Transforms a list of error messages into their textual representation. 452 453messageItemsPretty 454 :: String -- ^ Prefix to prepend 455 -> Set String -- ^ Collection of messages 456 -> String -- ^ Result of rendering 457messageItemsPretty prefix ts 458 | E.null ts = "" 459 | otherwise = 460 prefix <> (orList . NE.fromList . E.toAscList) ts <> "\n" 461 462-- | Print a pretty list where items are separated with commas and the word 463-- “or” according to the rules of English punctuation. 464 465orList :: NonEmpty String -> String 466orList (x:|[]) = x 467orList (x:|[y]) = x <> " or " <> y 468orList xs = intercalate ", " (NE.init xs) <> ", or " <> NE.last xs 469