1{-# LANGUAGE BangPatterns #-} 2{-# LANGUAGE DeriveDataTypeable #-} 3{-# LANGUAGE DeriveFunctor #-} 4{-# LANGUAGE DeriveGeneric #-} 5{-# LANGUAGE FlexibleContexts #-} 6{-# LANGUAGE FlexibleInstances #-} 7{-# LANGUAGE LambdaCase #-} 8{-# LANGUAGE RecordWildCards #-} 9{-# LANGUAGE Safe #-} 10{-# LANGUAGE ScopedTypeVariables #-} 11{-# LANGUAGE StandaloneDeriving #-} 12{-# LANGUAGE UndecidableInstances #-} 13 14-- | 15-- Module : Text.Megaparsec.Error 16-- Copyright : © 2015–present Megaparsec contributors 17-- License : FreeBSD 18-- 19-- Maintainer : Mark Karpov <markkarpov92@gmail.com> 20-- Stability : experimental 21-- Portability : portable 22-- 23-- Parse errors. The current version of Megaparsec supports well-typed 24-- errors instead of 'String'-based ones. This gives a lot of flexibility in 25-- describing what exactly went wrong as well as a way to return arbitrary 26-- data in case of failure. 27-- 28-- You probably do not want to import this module directly because 29-- "Text.Megaparsec" re-exports it anyway. 30module Text.Megaparsec.Error 31 ( -- * Parse error type 32 ErrorItem (..), 33 ErrorFancy (..), 34 ParseError (..), 35 mapParseError, 36 errorOffset, 37 setErrorOffset, 38 ParseErrorBundle (..), 39 attachSourcePos, 40 41 -- * Pretty-printing 42 ShowErrorComponent (..), 43 errorBundlePretty, 44 parseErrorPretty, 45 parseErrorTextPretty, 46 ) 47where 48 49import Control.DeepSeq 50import Control.Exception 51import Control.Monad.State.Strict 52import Data.Data (Data) 53import Data.List (intercalate) 54import Data.List.NonEmpty (NonEmpty (..)) 55import qualified Data.List.NonEmpty as NE 56import Data.Maybe (isNothing) 57import Data.Proxy 58import Data.Set (Set) 59import qualified Data.Set as E 60import Data.Typeable (Typeable) 61import Data.Void 62import GHC.Generics 63import Text.Megaparsec.Pos 64import Text.Megaparsec.State 65import Text.Megaparsec.Stream 66 67---------------------------------------------------------------------------- 68-- Parse error type 69 70-- | Data type that is used to represent “unexpected\/expected” items in 71-- 'ParseError'. The data type is parametrized over the token type @t@. 72-- 73-- @since 5.0.0 74data ErrorItem t 75 = -- | Non-empty stream of tokens 76 Tokens (NonEmpty t) 77 | -- | Label (cannot be empty) 78 Label (NonEmpty Char) 79 | -- | End of input 80 EndOfInput 81 deriving (Show, Read, Eq, Ord, Data, Typeable, Generic, Functor) 82 83instance NFData t => NFData (ErrorItem t) 84 85-- | Additional error data, extendable by user. When no custom data is 86-- necessary, the type is typically indexed by 'Void' to “cancel” the 87-- 'ErrorCustom' constructor. 88-- 89-- @since 6.0.0 90data ErrorFancy e 91 = -- | 'fail' has been used in parser monad 92 ErrorFail String 93 | -- | Incorrect indentation error: desired ordering between reference 94 -- level and actual level, reference indentation level, actual 95 -- indentation level 96 ErrorIndentation Ordering Pos Pos 97 | -- | Custom error data 98 ErrorCustom e 99 deriving (Show, Read, Eq, Ord, Data, Typeable, Generic, Functor) 100 101instance NFData a => NFData (ErrorFancy a) where 102 rnf (ErrorFail str) = rnf str 103 rnf (ErrorIndentation ord ref act) = ord `seq` rnf ref `seq` rnf act 104 rnf (ErrorCustom a) = rnf a 105 106-- | @'ParseError' s e@ represents a parse error parametrized over the 107-- stream type @s@ and the custom data @e@. 108-- 109-- 'Semigroup' and 'Monoid' instances of the data type allow to merge parse 110-- errors from different branches of parsing. When merging two 111-- 'ParseError's, the longest match is preferred; if positions are the same, 112-- custom data sets and collections of message items are combined. Note that 113-- fancy errors take precedence over trivial errors in merging. 114-- 115-- @since 7.0.0 116data ParseError s e 117 = -- | Trivial errors, generated by Megaparsec's machinery. The data 118 -- constructor includes the offset of error, unexpected token (if any), 119 -- and expected tokens. 120 -- 121 -- Type of the first argument was changed in the version /7.0.0/. 122 TrivialError Int (Maybe (ErrorItem (Token s))) (Set (ErrorItem (Token s))) 123 | -- | Fancy, custom errors. 124 -- 125 -- Type of the first argument was changed in the version /7.0.0/. 126 FancyError Int (Set (ErrorFancy e)) 127 deriving (Typeable, Generic) 128 129deriving instance 130 ( Show (Token s), 131 Show e 132 ) => 133 Show (ParseError s e) 134 135deriving instance 136 ( Eq (Token s), 137 Eq e 138 ) => 139 Eq (ParseError s e) 140 141deriving instance 142 ( Data s, 143 Data (Token s), 144 Ord (Token s), 145 Data e, 146 Ord e 147 ) => 148 Data (ParseError s e) 149 150instance 151 ( NFData (Token s), 152 NFData e 153 ) => 154 NFData (ParseError s e) 155 156instance (Stream s, Ord e) => Semigroup (ParseError s e) where 157 (<>) = mergeError 158 {-# INLINE (<>) #-} 159 160instance (Stream s, Ord e) => Monoid (ParseError s e) where 161 mempty = TrivialError 0 Nothing E.empty 162 mappend = (<>) 163 {-# INLINE mappend #-} 164 165instance 166 ( Show s, 167 Show (Token s), 168 Show e, 169 ShowErrorComponent e, 170 VisualStream s, 171 Typeable s, 172 Typeable e 173 ) => 174 Exception (ParseError s e) 175 where 176 displayException = parseErrorPretty 177 178-- | Modify the custom data component in a parse error. This could be done 179-- via 'fmap' if not for the 'Ord' constraint. 180-- 181-- @since 7.0.0 182mapParseError :: 183 Ord e' => 184 (e -> e') -> 185 ParseError s e -> 186 ParseError s e' 187mapParseError _ (TrivialError o u p) = TrivialError o u p 188mapParseError f (FancyError o x) = FancyError o (E.map (fmap f) x) 189 190-- | Get offset of 'ParseError'. 191-- 192-- @since 7.0.0 193errorOffset :: ParseError s e -> Int 194errorOffset (TrivialError o _ _) = o 195errorOffset (FancyError o _) = o 196 197-- | Set offset of 'ParseError'. 198-- 199-- @since 8.0.0 200setErrorOffset :: Int -> ParseError s e -> ParseError s e 201setErrorOffset o (TrivialError _ u p) = TrivialError o u p 202setErrorOffset o (FancyError _ x) = FancyError o x 203 204-- | Merge two error data structures into one joining their collections of 205-- message items and preferring the longest match. In other words, earlier 206-- error message is discarded. This may seem counter-intuitive, but 207-- 'mergeError' is only used to merge error messages of alternative branches 208-- of parsing and in this case longest match should be preferred. 209mergeError :: 210 (Stream s, Ord e) => 211 ParseError s e -> 212 ParseError s e -> 213 ParseError s e 214mergeError e1 e2 = 215 case errorOffset e1 `compare` errorOffset e2 of 216 LT -> e2 217 EQ -> 218 case (e1, e2) of 219 (TrivialError s1 u1 p1, TrivialError _ u2 p2) -> 220 TrivialError s1 (n u1 u2) (E.union p1 p2) 221 (FancyError {}, TrivialError {}) -> e1 222 (TrivialError {}, FancyError {}) -> e2 223 (FancyError s1 x1, FancyError _ x2) -> 224 FancyError s1 (E.union x1 x2) 225 GT -> e1 226 where 227 -- NOTE The logic behind this merging is that since we only combine 228 -- parse errors that happen at exactly the same position, all the 229 -- unexpected items will be prefixes of input stream at that position or 230 -- labels referring to the same thing. Our aim here is to choose the 231 -- longest prefix (merging with labels and end of input is somewhat 232 -- arbitrary, but is necessary because otherwise we can't make 233 -- ParseError lawful Monoid and have nice parse errors at the same 234 -- time). 235 n Nothing Nothing = Nothing 236 n (Just x) Nothing = Just x 237 n Nothing (Just y) = Just y 238 n (Just x) (Just y) = Just (max x y) 239{-# INLINE mergeError #-} 240 241-- | A non-empty collection of 'ParseError's equipped with 'PosState' that 242-- allows to pretty-print the errors efficiently and correctly. 243-- 244-- @since 7.0.0 245data ParseErrorBundle s e = ParseErrorBundle 246 { -- | A collection of 'ParseError's that is sorted by parse error offsets 247 bundleErrors :: NonEmpty (ParseError s e), 248 -- | State that is used for line\/column calculation 249 bundlePosState :: PosState s 250 } 251 deriving (Generic) 252 253deriving instance 254 ( Show s, 255 Show (Token s), 256 Show e 257 ) => 258 Show (ParseErrorBundle s e) 259 260deriving instance 261 ( Eq s, 262 Eq (Token s), 263 Eq e 264 ) => 265 Eq (ParseErrorBundle s e) 266 267deriving instance 268 ( Typeable s, 269 Typeable (Token s), 270 Typeable e 271 ) => 272 Typeable (ParseErrorBundle s e) 273 274deriving instance 275 ( Data s, 276 Data (Token s), 277 Ord (Token s), 278 Data e, 279 Ord e 280 ) => 281 Data (ParseErrorBundle s e) 282 283instance 284 ( NFData s, 285 NFData (Token s), 286 NFData e 287 ) => 288 NFData (ParseErrorBundle s e) 289 290instance 291 ( Show s, 292 Show (Token s), 293 Show e, 294 ShowErrorComponent e, 295 VisualStream s, 296 TraversableStream s, 297 Typeable s, 298 Typeable e 299 ) => 300 Exception (ParseErrorBundle s e) 301 where 302 displayException = errorBundlePretty 303 304-- | Attach 'SourcePos'es to items in a 'Traversable' container given that 305-- there is a projection allowing to get an offset per item. 306-- 307-- Items must be in ascending order with respect to their offsets. 308-- 309-- @since 7.0.0 310attachSourcePos :: 311 (Traversable t, TraversableStream s) => 312 -- | How to project offset from an item (e.g. 'errorOffset') 313 (a -> Int) -> 314 -- | The collection of items 315 t a -> 316 -- | Initial 'PosState' 317 PosState s -> 318 -- | The collection with 'SourcePos'es 319 -- added and the final 'PosState' 320 (t (a, SourcePos), PosState s) 321attachSourcePos projectOffset xs = runState (traverse f xs) 322 where 323 f a = do 324 pst <- get 325 let pst' = reachOffsetNoLine (projectOffset a) pst 326 put pst' 327 return (a, pstateSourcePos pst') 328{-# INLINEABLE attachSourcePos #-} 329 330---------------------------------------------------------------------------- 331-- Pretty-printing 332 333-- | The type class defines how to print a custom component of 'ParseError'. 334-- 335-- @since 5.0.0 336class Ord a => ShowErrorComponent a where 337 -- | Pretty-print a component of 'ParseError'. 338 showErrorComponent :: a -> String 339 340 -- | Length of the error component in characters, used for highlighting of 341 -- parse errors in input string. 342 -- 343 -- @since 7.0.0 344 errorComponentLen :: a -> Int 345 errorComponentLen _ = 1 346 347instance ShowErrorComponent Void where 348 showErrorComponent = absurd 349 350-- | Pretty-print a 'ParseErrorBundle'. All 'ParseError's in the bundle will 351-- be pretty-printed in order together with the corresponding offending 352-- lines by doing a single efficient pass over the input stream. The 353-- rendered 'String' always ends with a newline. 354-- 355-- @since 7.0.0 356errorBundlePretty :: 357 forall s e. 358 ( VisualStream s, 359 TraversableStream s, 360 ShowErrorComponent e 361 ) => 362 -- | Parse error bundle to display 363 ParseErrorBundle s e -> 364 -- | Textual rendition of the bundle 365 String 366errorBundlePretty ParseErrorBundle {..} = 367 let (r, _) = foldl f (id, bundlePosState) bundleErrors 368 in drop 1 (r "") 369 where 370 f :: 371 (ShowS, PosState s) -> 372 ParseError s e -> 373 (ShowS, PosState s) 374 f (o, !pst) e = (o . (outChunk ++), pst') 375 where 376 (msline, pst') = reachOffset (errorOffset e) pst 377 epos = pstateSourcePos pst' 378 outChunk = 379 "\n" <> sourcePosPretty epos <> ":\n" 380 <> offendingLine 381 <> parseErrorTextPretty e 382 offendingLine = 383 case msline of 384 Nothing -> "" 385 Just sline -> 386 let rpadding = 387 if pointerLen > 0 388 then replicate rpshift ' ' 389 else "" 390 pointerLen = 391 if rpshift + elen > slineLen 392 then slineLen - rpshift + 1 393 else elen 394 pointer = replicate pointerLen '^' 395 lineNumber = (show . unPos . sourceLine) epos 396 padding = replicate (length lineNumber + 1) ' ' 397 rpshift = unPos (sourceColumn epos) - 1 398 slineLen = length sline 399 in padding <> "|\n" <> lineNumber <> " | " <> sline 400 <> "\n" 401 <> padding 402 <> "| " 403 <> rpadding 404 <> pointer 405 <> "\n" 406 pxy = Proxy :: Proxy s 407 elen = 408 case e of 409 TrivialError _ Nothing _ -> 1 410 TrivialError _ (Just x) _ -> errorItemLength pxy x 411 FancyError _ xs -> 412 E.foldl' (\a b -> max a (errorFancyLength b)) 1 xs 413 414-- | Pretty-print a 'ParseError'. The rendered 'String' always ends with a 415-- newline. 416-- 417-- @since 5.0.0 418parseErrorPretty :: 419 (VisualStream s, ShowErrorComponent e) => 420 -- | Parse error to render 421 ParseError s e -> 422 -- | Result of rendering 423 String 424parseErrorPretty e = 425 "offset=" <> show (errorOffset e) <> ":\n" <> parseErrorTextPretty e 426 427-- | Pretty-print a textual part of a 'ParseError', that is, everything 428-- except for its position. The rendered 'String' always ends with a 429-- newline. 430-- 431-- @since 5.1.0 432parseErrorTextPretty :: 433 forall s e. 434 (VisualStream s, ShowErrorComponent e) => 435 -- | Parse error to render 436 ParseError s e -> 437 -- | Result of rendering 438 String 439parseErrorTextPretty (TrivialError _ us ps) = 440 if isNothing us && E.null ps 441 then "unknown parse error\n" 442 else 443 messageItemsPretty "unexpected " (showErrorItem pxy `E.map` maybe E.empty E.singleton us) 444 <> messageItemsPretty "expecting " (showErrorItem pxy `E.map` ps) 445 where 446 pxy = Proxy :: Proxy s 447parseErrorTextPretty (FancyError _ xs) = 448 if E.null xs 449 then "unknown fancy parse error\n" 450 else unlines (showErrorFancy <$> E.toAscList xs) 451 452---------------------------------------------------------------------------- 453-- Helpers 454 455-- | Pretty-print an 'ErrorItem'. 456showErrorItem :: VisualStream s => Proxy s -> ErrorItem (Token s) -> String 457showErrorItem pxy = \case 458 Tokens ts -> showTokens pxy ts 459 Label label -> NE.toList label 460 EndOfInput -> "end of input" 461 462-- | Get length of the “pointer” to display under a given 'ErrorItem'. 463errorItemLength :: VisualStream s => Proxy s -> ErrorItem (Token s) -> Int 464errorItemLength pxy = \case 465 Tokens ts -> tokensLength pxy ts 466 _ -> 1 467 468-- | Pretty-print an 'ErrorFancy'. 469showErrorFancy :: ShowErrorComponent e => ErrorFancy e -> String 470showErrorFancy = \case 471 ErrorFail msg -> msg 472 ErrorIndentation ord ref actual -> 473 "incorrect indentation (got " <> show (unPos actual) 474 <> ", should be " 475 <> p 476 <> show (unPos ref) 477 <> ")" 478 where 479 p = case ord of 480 LT -> "less than " 481 EQ -> "equal to " 482 GT -> "greater than " 483 ErrorCustom a -> showErrorComponent a 484 485-- | Get length of the “pointer” to display under a given 'ErrorFancy'. 486errorFancyLength :: ShowErrorComponent e => ErrorFancy e -> Int 487errorFancyLength = \case 488 ErrorCustom a -> errorComponentLen a 489 _ -> 1 490 491-- | Transforms a list of error messages into their textual representation. 492messageItemsPretty :: 493 -- | Prefix to prepend 494 String -> 495 -- | Collection of messages 496 Set String -> 497 -- | Result of rendering 498 String 499messageItemsPretty prefix ts 500 | E.null ts = "" 501 | otherwise = 502 prefix <> (orList . NE.fromList . E.toAscList) ts <> "\n" 503 504-- | Print a pretty list where items are separated with commas and the word 505-- “or” according to the rules of English punctuation. 506orList :: NonEmpty String -> String 507orList (x :| []) = x 508orList (x :| [y]) = x <> " or " <> y 509orList xs = intercalate ", " (NE.init xs) <> ", or " <> NE.last xs 510