1--- * -*- outline-regexp:"--- \\*"; -*- 2--- ** doc 3-- In Emacs, use TAB on lines beginning with "-- *" to collapse/expand sections. 4{-| 5 6File reading/parsing utilities used by multiple readers, and a good 7amount of the parsers for journal format, to avoid import cycles 8when JournalReader imports other readers. 9 10Some of these might belong in Hledger.Read.JournalReader or Hledger.Read. 11 12-} 13 14--- ** language 15{-# LANGUAGE BangPatterns #-} 16{-# LANGUAGE CPP #-} 17{-# LANGUAGE FlexibleContexts #-} 18{-# LANGUAGE LambdaCase #-} 19{-# LANGUAGE NamedFieldPuns #-} 20{-# LANGUAGE NoMonoLocalBinds #-} 21{-# LANGUAGE OverloadedStrings #-} 22{-# LANGUAGE PackageImports #-} 23{-# LANGUAGE Rank2Types #-} 24{-# LANGUAGE RecordWildCards #-} 25{-# LANGUAGE ScopedTypeVariables #-} 26{-# LANGUAGE TupleSections #-} 27{-# LANGUAGE TypeFamilies #-} 28 29--- ** exports 30module Hledger.Read.Common ( 31 Reader (..), 32 InputOpts (..), 33 definputopts, 34 rawOptsToInputOpts, 35 36 -- * parsing utilities 37 runTextParser, 38 rtp, 39 runJournalParser, 40 rjp, 41 runErroringJournalParser, 42 rejp, 43 genericSourcePos, 44 journalSourcePos, 45 parseAndFinaliseJournal, 46 parseAndFinaliseJournal', 47 journalFinalise, 48 setYear, 49 getYear, 50 setDefaultCommodityAndStyle, 51 getDefaultCommodityAndStyle, 52 getDefaultAmountStyle, 53 getAmountStyle, 54 addDeclaredAccountType, 55 pushParentAccount, 56 popParentAccount, 57 getParentAccount, 58 addAccountAlias, 59 getAccountAliases, 60 clearAccountAliases, 61 journalAddFile, 62 63 -- * parsers 64 -- ** transaction bits 65 statusp, 66 codep, 67 descriptionp, 68 69 -- ** dates 70 datep, 71 datetimep, 72 secondarydatep, 73 74 -- ** account names 75 modifiedaccountnamep, 76 accountnamep, 77 78 -- ** amounts 79 spaceandamountormissingp, 80 amountp, 81 amountp', 82 mamountp', 83 commoditysymbolp, 84 priceamountp, 85 balanceassertionp, 86 lotpricep, 87 numberp, 88 fromRawNumber, 89 rawnumberp, 90 91 -- ** comments 92 multilinecommentp, 93 emptyorcommentlinep, 94 95 followingcommentp, 96 transactioncommentp, 97 postingcommentp, 98 99 -- ** bracketed dates 100 bracketeddatetagsp, 101 102 -- ** misc 103 singlespacedtextp, 104 singlespacedtextsatisfyingp, 105 singlespacep, 106 107 skipNonNewlineSpaces, 108 skipNonNewlineSpaces1, 109 110 -- * tests 111 tests_Common, 112) 113where 114 115--- ** imports 116import Prelude () 117import "base-compat-batteries" Prelude.Compat hiding (fail, readFile) 118import Control.Applicative.Permutations (runPermutation, toPermutationWithDefault) 119import qualified "base-compat-batteries" Control.Monad.Fail.Compat as Fail (fail) 120import Control.Monad.Except (ExceptT(..), runExceptT, throwError) 121import Control.Monad.State.Strict hiding (fail) 122import Data.Bifunctor (bimap, second) 123import Data.Char (digitToInt, isDigit, isSpace) 124import Data.Decimal (DecimalRaw (Decimal), Decimal) 125import Data.Default (Default(..)) 126import Data.Function ((&)) 127import Data.Functor.Identity (Identity) 128import "base-compat-batteries" Data.List.Compat 129import Data.List.NonEmpty (NonEmpty(..)) 130import Data.Maybe (catMaybes, fromMaybe, isJust, listToMaybe) 131import qualified Data.Map as M 132import qualified Data.Semigroup as Sem 133import Data.Text (Text) 134import qualified Data.Text as T 135import Data.Time.Calendar (Day, fromGregorianValid, toGregorian) 136import Data.Time.LocalTime (LocalTime(..), TimeOfDay(..)) 137import Data.Word (Word8) 138import System.Time (getClockTime) 139import Text.Megaparsec 140import Text.Megaparsec.Char (char, char', digitChar, newline, string) 141import Text.Megaparsec.Char.Lexer (decimal) 142import Text.Megaparsec.Custom 143 (FinalParseError, attachSource, customErrorBundlePretty, 144 finalErrorBundlePretty, parseErrorAt, parseErrorAtRegion) 145 146import Hledger.Data 147import Hledger.Utils 148 149--- ** doctest setup 150-- $setup 151-- >>> :set -XOverloadedStrings 152 153--- ** types 154 155-- main types; a few more below 156 157-- | A hledger journal reader is a triple of storage format name, a 158-- detector of that format, and a parser from that format to Journal. 159-- The type variable m appears here so that rParserr can hold a 160-- journal parser, which depends on it. 161data Reader m = Reader { 162 163 -- The canonical name of the format handled by this reader 164 rFormat :: StorageFormat 165 166 -- The file extensions recognised as containing this format 167 ,rExtensions :: [String] 168 169 -- The entry point for reading this format, accepting input options, file 170 -- path for error messages and file contents, producing an exception-raising IO 171 -- action that produces a journal or error message. 172 ,rReadFn :: InputOpts -> FilePath -> Text -> ExceptT String IO Journal 173 174 -- The actual megaparsec parser called by the above, in case 175 -- another parser (includedirectivep) wants to use it directly. 176 ,rParser :: MonadIO m => ErroringJournalParser m ParsedJournal 177 } 178 179instance Show (Reader m) where show r = rFormat r ++ " reader" 180 181-- $setup 182 183-- | Various options to use when reading journal files. 184-- Similar to CliOptions.inputflags, simplifies the journal-reading functions. 185data InputOpts = InputOpts { 186 -- files_ :: [FilePath] 187 mformat_ :: Maybe StorageFormat -- ^ a file/storage format to try, unless overridden 188 -- by a filename prefix. Nothing means try all. 189 ,mrules_file_ :: Maybe FilePath -- ^ a conversion rules file to use (when reading CSV) 190 ,aliases_ :: [String] -- ^ account name aliases to apply 191 ,anon_ :: Bool -- ^ do light anonymisation/obfuscation of the data 192 ,ignore_assertions_ :: Bool -- ^ don't check balance assertions 193 ,new_ :: Bool -- ^ read only new transactions since this file was last read 194 ,new_save_ :: Bool -- ^ save latest new transactions state for next time 195 ,pivot_ :: String -- ^ use the given field's value as the account name 196 ,auto_ :: Bool -- ^ generate automatic postings when journal is parsed 197 } deriving (Show) 198 199instance Default InputOpts where def = definputopts 200 201definputopts :: InputOpts 202definputopts = InputOpts def def def def def def True def def 203 204rawOptsToInputOpts :: RawOpts -> InputOpts 205rawOptsToInputOpts rawopts = InputOpts{ 206 -- files_ = listofstringopt "file" rawopts 207 mformat_ = Nothing 208 ,mrules_file_ = maybestringopt "rules-file" rawopts 209 ,aliases_ = listofstringopt "alias" rawopts 210 ,anon_ = boolopt "anon" rawopts 211 ,ignore_assertions_ = boolopt "ignore-assertions" rawopts 212 ,new_ = boolopt "new" rawopts 213 ,new_save_ = True 214 ,pivot_ = stringopt "pivot" rawopts 215 ,auto_ = boolopt "auto" rawopts 216 } 217 218--- ** parsing utilities 219 220-- | Run a text parser in the identity monad. See also: parseWithState. 221runTextParser, rtp 222 :: TextParser Identity a -> Text -> Either (ParseErrorBundle Text CustomErr) a 223runTextParser p t = runParser p "" t 224rtp = runTextParser 225 226-- | Run a journal parser in some monad. See also: parseWithState. 227runJournalParser, rjp 228 :: Monad m 229 => JournalParser m a -> Text -> m (Either (ParseErrorBundle Text CustomErr) a) 230runJournalParser p t = runParserT (evalStateT p nulljournal) "" t 231rjp = runJournalParser 232 233-- | Run an erroring journal parser in some monad. See also: parseWithState. 234runErroringJournalParser, rejp 235 :: Monad m 236 => ErroringJournalParser m a 237 -> Text 238 -> m (Either FinalParseError (Either (ParseErrorBundle Text CustomErr) a)) 239runErroringJournalParser p t = 240 runExceptT $ runParserT (evalStateT p nulljournal) "" t 241rejp = runErroringJournalParser 242 243genericSourcePos :: SourcePos -> GenericSourcePos 244genericSourcePos p = GenericSourcePos (sourceName p) (unPos $ sourceLine p) (unPos $ sourceColumn p) 245 246-- | Construct a generic start & end line parse position from start and end megaparsec SourcePos's. 247journalSourcePos :: SourcePos -> SourcePos -> GenericSourcePos 248journalSourcePos p p' = JournalSourcePos (sourceName p) (unPos $ sourceLine p, line') 249 where line' | (unPos $ sourceColumn p') == 1 = unPos (sourceLine p') - 1 250 | otherwise = unPos $ sourceLine p' -- might be at end of file withat last new-line 251 252-- | Given a parser to ParsedJournal, input options, file path and 253-- content: run the parser on the content, and finalise the result to 254-- get a Journal; or throw an error. 255parseAndFinaliseJournal :: ErroringJournalParser IO ParsedJournal -> InputOpts 256 -> FilePath -> Text -> ExceptT String IO Journal 257parseAndFinaliseJournal parser iopts f txt = do 258 y <- liftIO getCurrentYear 259 let initJournal = nulljournal{ jparsedefaultyear = Just y, jincludefilestack = [f] } 260 eep <- liftIO $ runExceptT $ runParserT (evalStateT parser initJournal) f txt 261 -- TODO: urgh.. clean this up somehow 262 case eep of 263 Left finalParseError -> throwError $ finalErrorBundlePretty $ attachSource f txt finalParseError 264 Right ep -> case ep of 265 Left e -> throwError $ customErrorBundlePretty e 266 Right pj -> journalFinalise iopts f txt pj 267 268-- | Like parseAndFinaliseJournal but takes a (non-Erroring) JournalParser. 269-- Used for timeclock/timedot. 270-- TODO: get rid of this, use parseAndFinaliseJournal instead 271parseAndFinaliseJournal' :: JournalParser IO ParsedJournal -> InputOpts 272 -> FilePath -> Text -> ExceptT String IO Journal 273parseAndFinaliseJournal' parser iopts f txt = do 274 y <- liftIO getCurrentYear 275 let initJournal = nulljournal 276 { jparsedefaultyear = Just y 277 , jincludefilestack = [f] } 278 ep <- liftIO $ runParserT (evalStateT parser initJournal) f txt 279 -- see notes above 280 case ep of 281 Left e -> throwError $ customErrorBundlePretty e 282 Right pj -> journalFinalise iopts f txt pj 283 284-- | Post-process a Journal that has just been parsed or generated, in this order: 285-- 286-- - apply canonical amount styles, 287-- 288-- - save misc info and reverse transactions into their original parse order, 289-- 290-- - evaluate balance assignments and balance each transaction, 291-- 292-- - apply transaction modifiers (auto postings) if enabled, 293-- 294-- - check balance assertions if enabled. 295-- 296-- - infer transaction-implied market prices from transaction prices 297-- 298journalFinalise :: InputOpts -> FilePath -> Text -> Journal -> ExceptT String IO Journal 299journalFinalise iopts f txt pj = do 300 t <- liftIO getClockTime 301 d <- liftIO getCurrentDay 302 -- Infer and apply canonical styles for each commodity (or fail). 303 -- This affects transaction balancing/assertions/assignments, so needs to be done early. 304 -- (TODO: since #903's refactoring for hledger 1.12, 305 -- journalApplyCommodityStyles here is seeing the 306 -- transactions before they get reversesd to normal order.) 307 case journalApplyCommodityStyles pj of 308 Left e -> throwError e 309 Right pj' -> either throwError return $ 310 pj' 311 & journalAddFile (f, txt) -- save the file path and content 312 & journalSetLastReadTime t -- save the last read time 313 & journalReverse -- convert all lists to parse order 314 & (if not (auto_ iopts) || null (jtxnmodifiers pj) 315 then 316 -- Auto postings are not active. 317 -- Balance all transactions and maybe check balance assertions. 318 journalBalanceTransactions (not $ ignore_assertions_ iopts) 319 else \j -> do -- Either monad 320 -- Auto postings are active. 321 -- Balance all transactions without checking balance assertions, 322 j' <- journalBalanceTransactions False j 323 -- then add the auto postings 324 -- (Note adding auto postings after balancing means #893b fails; 325 -- adding them before balancing probably means #893a, #928, #938 fail.) 326 case journalModifyTransactions d j' of 327 Left e -> throwError e 328 Right j'' -> do 329 -- then apply commodity styles once more, to style the auto posting amounts. (XXX inefficient ?) 330 j''' <- journalApplyCommodityStyles j'' 331 -- then check balance assertions. 332 journalBalanceTransactions (not $ ignore_assertions_ iopts) j''' 333 ) 334 & fmap journalInferMarketPricesFromTransactions -- infer market prices from commodity-exchanging transactions 335 336setYear :: Year -> JournalParser m () 337setYear y = modify' (\j -> j{jparsedefaultyear=Just y}) 338 339getYear :: JournalParser m (Maybe Year) 340getYear = fmap jparsedefaultyear get 341 342setDefaultCommodityAndStyle :: (CommoditySymbol,AmountStyle) -> JournalParser m () 343setDefaultCommodityAndStyle cs = modify' (\j -> j{jparsedefaultcommodity=Just cs}) 344 345getDefaultCommodityAndStyle :: JournalParser m (Maybe (CommoditySymbol,AmountStyle)) 346getDefaultCommodityAndStyle = jparsedefaultcommodity `fmap` get 347 348-- | Get amount style associated with default currency. 349-- 350-- Returns 'AmountStyle' used to defined by a latest default commodity directive 351-- prior to current position within this file or its parents. 352getDefaultAmountStyle :: JournalParser m (Maybe AmountStyle) 353getDefaultAmountStyle = fmap snd <$> getDefaultCommodityAndStyle 354 355-- | Lookup currency-specific amount style. 356-- 357-- Returns 'AmountStyle' used in commodity directive within current journal 358-- prior to current position or in its parents files. 359getAmountStyle :: CommoditySymbol -> JournalParser m (Maybe AmountStyle) 360getAmountStyle commodity = do 361 specificStyle <- maybe Nothing cformat . M.lookup commodity . jcommodities <$> get 362 defaultStyle <- fmap snd <$> getDefaultCommodityAndStyle 363 let effectiveStyle = listToMaybe $ catMaybes [specificStyle, defaultStyle] 364 return effectiveStyle 365 366addDeclaredAccountType :: AccountName -> AccountType -> JournalParser m () 367addDeclaredAccountType acct atype = 368 modify' (\j -> j{jdeclaredaccounttypes = M.insertWith (++) atype [acct] (jdeclaredaccounttypes j)}) 369 370pushParentAccount :: AccountName -> JournalParser m () 371pushParentAccount acct = modify' (\j -> j{jparseparentaccounts = acct : jparseparentaccounts j}) 372 373popParentAccount :: JournalParser m () 374popParentAccount = do 375 j <- get 376 case jparseparentaccounts j of 377 [] -> unexpected (Tokens ('E' :| "nd of apply account block with no beginning")) 378 (_:rest) -> put j{jparseparentaccounts=rest} 379 380getParentAccount :: JournalParser m AccountName 381getParentAccount = fmap (concatAccountNames . reverse . jparseparentaccounts) get 382 383addAccountAlias :: MonadState Journal m => AccountAlias -> m () 384addAccountAlias a = modify' (\(j@Journal{..}) -> j{jparsealiases=a:jparsealiases}) 385 386getAccountAliases :: MonadState Journal m => m [AccountAlias] 387getAccountAliases = fmap jparsealiases get 388 389clearAccountAliases :: MonadState Journal m => m () 390clearAccountAliases = modify' (\j -> j{jparsealiases=[]}) 391 392-- getTransactionCount :: MonadState Journal m => m Integer 393-- getTransactionCount = fmap jparsetransactioncount get 394-- 395-- setTransactionCount :: MonadState Journal m => Integer -> m () 396-- setTransactionCount i = modify' (\j -> j{jparsetransactioncount=i}) 397-- 398-- -- | Increment the transaction index by one and return the new value. 399-- incrementTransactionCount :: MonadState Journal m => m Integer 400-- incrementTransactionCount = do 401-- modify' (\j -> j{jparsetransactioncount=jparsetransactioncount j + 1}) 402-- getTransactionCount 403 404journalAddFile :: (FilePath,Text) -> Journal -> Journal 405journalAddFile f j@Journal{jfiles=fs} = j{jfiles=fs++[f]} 406 -- append, unlike the other fields, even though we do a final reverse, 407 -- to compensate for additional reversal due to including/monoid-concatting 408 409-- A version of `match` that is strict in the returned text 410match' :: TextParser m a -> TextParser m (Text, a) 411match' p = do 412 (!txt, p) <- match p 413 pure (txt, p) 414 415--- ** parsers 416--- *** transaction bits 417 418statusp :: TextParser m Status 419statusp = 420 choice' 421 [ skipNonNewlineSpaces >> char '*' >> return Cleared 422 , skipNonNewlineSpaces >> char '!' >> return Pending 423 , return Unmarked 424 ] 425 426codep :: TextParser m Text 427codep = option "" $ do 428 try $ do 429 skipNonNewlineSpaces1 430 char '(' 431 code <- takeWhileP Nothing $ \c -> c /= ')' && c /= '\n' 432 char ')' <?> "closing bracket ')' for transaction code" 433 pure code 434 435descriptionp :: TextParser m Text 436descriptionp = takeWhileP Nothing (not . semicolonOrNewline) 437 where semicolonOrNewline c = c == ';' || c == '\n' 438 439--- *** dates 440 441-- | Parse a date in YYYY-MM-DD format. 442-- Slash (/) and period (.) are also allowed as separators. 443-- The year may be omitted if a default year has been set. 444-- Leading zeroes may be omitted. 445datep :: JournalParser m Day 446datep = do 447 mYear <- getYear 448 lift $ datep' mYear 449 450datep' :: Maybe Year -> TextParser m Day 451datep' mYear = do 452 startOffset <- getOffset 453 d1 <- yearorintp <?> "year or month" 454 sep <- datesepchar <?> "date separator" 455 d2 <- decimal <?> "month or day" 456 case d1 of 457 Left y -> fullDate startOffset y sep d2 458 Right m -> partialDate startOffset mYear m sep d2 459 <?> "full or partial date" 460 where 461 fullDate :: Int -> Year -> Char -> Month -> TextParser m Day 462 fullDate startOffset year sep1 month = do 463 sep2 <- satisfy isDateSepChar <?> "date separator" 464 day <- decimal <?> "day" 465 endOffset <- getOffset 466 let dateStr = show year ++ [sep1] ++ show month ++ [sep2] ++ show day 467 468 when (sep1 /= sep2) $ customFailure $ parseErrorAtRegion startOffset endOffset $ 469 "invalid date (mixing date separators is not allowed): " ++ dateStr 470 471 case fromGregorianValid year month day of 472 Nothing -> customFailure $ parseErrorAtRegion startOffset endOffset $ 473 "well-formed but invalid date: " ++ dateStr 474 Just date -> pure $! date 475 476 partialDate :: Int -> Maybe Year -> Month -> Char -> MonthDay -> TextParser m Day 477 partialDate startOffset mYear month sep day = do 478 endOffset <- getOffset 479 case mYear of 480 Just year -> 481 case fromGregorianValid year month day of 482 Nothing -> customFailure $ parseErrorAtRegion startOffset endOffset $ 483 "well-formed but invalid date: " ++ dateStr 484 Just date -> pure $! date 485 where dateStr = show year ++ [sep] ++ show month ++ [sep] ++ show day 486 487 Nothing -> customFailure $ parseErrorAtRegion startOffset endOffset $ 488 "partial date "++dateStr++" found, but the current year is unknown" 489 where dateStr = show month ++ [sep] ++ show day 490 491{-# INLINABLE datep' #-} 492 493-- | Parse a date and time in YYYY-MM-DD HH:MM[:SS][+-ZZZZ] format. 494-- Slash (/) and period (.) are also allowed as date separators. 495-- The year may be omitted if a default year has been set. 496-- Seconds are optional. 497-- The timezone is optional and ignored (the time is always interpreted as a local time). 498-- Leading zeroes may be omitted (except in a timezone). 499datetimep :: JournalParser m LocalTime 500datetimep = do 501 mYear <- getYear 502 lift $ datetimep' mYear 503 504datetimep' :: Maybe Year -> TextParser m LocalTime 505datetimep' mYear = do 506 day <- datep' mYear 507 skipNonNewlineSpaces1 508 time <- timeOfDay 509 optional timeZone -- ignoring time zones 510 pure $ LocalTime day time 511 512 where 513 timeOfDay :: TextParser m TimeOfDay 514 timeOfDay = do 515 off1 <- getOffset 516 h' <- twoDigitDecimal <?> "hour" 517 off2 <- getOffset 518 unless (h' >= 0 && h' <= 23) $ customFailure $ 519 parseErrorAtRegion off1 off2 "invalid time (bad hour)" 520 521 char ':' <?> "':' (hour-minute separator)" 522 off3 <- getOffset 523 m' <- twoDigitDecimal <?> "minute" 524 off4 <- getOffset 525 unless (m' >= 0 && m' <= 59) $ customFailure $ 526 parseErrorAtRegion off3 off4 "invalid time (bad minute)" 527 528 s' <- option 0 $ do 529 char ':' <?> "':' (minute-second separator)" 530 off5 <- getOffset 531 s' <- twoDigitDecimal <?> "second" 532 off6 <- getOffset 533 unless (s' >= 0 && s' <= 59) $ customFailure $ 534 parseErrorAtRegion off5 off6 "invalid time (bad second)" 535 -- we do not support leap seconds 536 pure s' 537 538 pure $ TimeOfDay h' m' (fromIntegral s') 539 540 twoDigitDecimal :: TextParser m Int 541 twoDigitDecimal = do 542 d1 <- digitToInt <$> digitChar 543 d2 <- digitToInt <$> (digitChar <?> "a second digit") 544 pure $ d1*10 + d2 545 546 timeZone :: TextParser m String 547 timeZone = do 548 plusminus <- satisfy $ \c -> c == '-' || c == '+' 549 fourDigits <- count 4 (digitChar <?> "a digit (for a time zone)") 550 pure $ plusminus:fourDigits 551 552secondarydatep :: Day -> TextParser m Day 553secondarydatep primaryDate = char '=' *> datep' (Just primaryYear) 554 where primaryYear = first3 $ toGregorian primaryDate 555 556-- | Parse a year number or an Int. Years must contain at least four 557-- digits. 558yearorintp :: TextParser m (Either Year Int) 559yearorintp = do 560 yearOrMonth <- takeWhile1P (Just "digit") isDigit 561 let n = readDecimal yearOrMonth 562 return $ if T.length yearOrMonth >= 4 then Left n else Right (fromInteger n) 563 564--- *** account names 565 566-- | Parse an account name (plus one following space if present), 567-- then apply any parent account prefix and/or account aliases currently in effect, 568-- in that order. (Ie first add the parent account prefix, then rewrite with aliases). 569-- This calls error if any account alias with an invalid regular expression exists. 570modifiedaccountnamep :: JournalParser m AccountName 571modifiedaccountnamep = do 572 parent <- getParentAccount 573 aliases <- getAccountAliases 574 -- off1 <- getOffset 575 a <- lift accountnamep 576 -- off2 <- getOffset 577 -- XXX or accountNameApplyAliasesMemo ? doesn't seem to make a difference (retest that function) 578 case accountNameApplyAliases aliases $ joinAccountNames parent a of 579 Right a' -> return $! a' 580 -- should not happen, regexaliasp will have displayed a better error already: 581 -- (XXX why does customFailure cause error to be displayed there, but not here ?) 582 -- Left e -> customFailure $! parseErrorAtRegion off1 off2 err 583 Left e -> error' err -- PARTIAL: 584 where 585 err = "problem in account alias applied to "++T.unpack a++": "++e 586 587-- | Parse an account name, plus one following space if present. 588-- Account names have one or more parts separated by the account separator character, 589-- and are terminated by two or more spaces (or end of input). 590-- Each part is at least one character long, may have single spaces inside it, 591-- and starts with a non-whitespace. 592-- Note, this means "{account}", "%^!" and ";comment" are all accepted 593-- (parent parsers usually prevent/consume the last). 594-- It should have required parts to start with an alphanumeric; 595-- for now it remains as-is for backwards compatibility. 596accountnamep :: TextParser m AccountName 597accountnamep = singlespacedtextp 598 599 600-- | Parse any text beginning with a non-whitespace character, until a 601-- double space or the end of input. 602-- TODO including characters which normally start a comment (;#) - exclude those ? 603singlespacedtextp :: TextParser m T.Text 604singlespacedtextp = singlespacedtextsatisfyingp (const True) 605 606-- | Similar to 'singlespacedtextp', except that the text must only contain 607-- characters satisfying the given predicate. 608singlespacedtextsatisfyingp :: (Char -> Bool) -> TextParser m T.Text 609singlespacedtextsatisfyingp pred = do 610 firstPart <- partp 611 otherParts <- many $ try $ singlespacep *> partp 612 pure $! T.unwords $ firstPart : otherParts 613 where 614 partp = takeWhile1P Nothing (\c -> pred c && not (isSpace c)) 615 616-- | Parse one non-newline whitespace character that is not followed by another one. 617singlespacep :: TextParser m () 618singlespacep = spacenonewline *> notFollowedBy spacenonewline 619 620--- *** amounts 621 622-- | Parse whitespace then an amount, with an optional left or right 623-- currency symbol and optional price, or return the special 624-- "missing" marker amount. 625spaceandamountormissingp :: JournalParser m MixedAmount 626spaceandamountormissingp = 627 option missingmixedamt $ try $ do 628 lift $ skipNonNewlineSpaces1 629 Mixed . (:[]) <$> amountp 630 631-- | Parse a single-commodity amount, with optional symbol on the left 632-- or right, followed by, in any order: an optional transaction price, 633-- an optional ledger-style lot price, and/or an optional ledger-style 634-- lot date. A lot price and lot date will be ignored. 635amountp :: JournalParser m Amount 636amountp = label "amount" $ do 637 let spaces = lift $ skipNonNewlineSpaces 638 amount <- amountwithoutpricep <* spaces 639 (mprice, _elotprice, _elotdate) <- runPermutation $ 640 (,,) <$> toPermutationWithDefault Nothing (Just <$> priceamountp <* spaces) 641 <*> toPermutationWithDefault Nothing (Just <$> lotpricep <* spaces) 642 <*> toPermutationWithDefault Nothing (Just <$> lotdatep <* spaces) 643 pure $ amount { aprice = mprice } 644 645-- XXX Just like amountp but don't allow lot prices. Needed for balanceassertionp. 646amountpnolotprices :: JournalParser m Amount 647amountpnolotprices = label "amount" $ do 648 let spaces = lift $ skipNonNewlineSpaces 649 amount <- amountwithoutpricep 650 spaces 651 mprice <- optional $ priceamountp <* spaces 652 pure $ amount { aprice = mprice } 653 654amountwithoutpricep :: JournalParser m Amount 655amountwithoutpricep = do 656 (mult, sign) <- lift $ (,) <$> multiplierp <*> signp 657 leftsymbolamountp mult sign <|> rightornosymbolamountp mult sign 658 659 where 660 661 leftsymbolamountp :: Bool -> (Decimal -> Decimal) -> JournalParser m Amount 662 leftsymbolamountp mult sign = label "amount" $ do 663 c <- lift commoditysymbolp 664 suggestedStyle <- getAmountStyle c 665 commodityspaced <- lift skipNonNewlineSpaces' 666 sign2 <- lift $ signp 667 offBeforeNum <- getOffset 668 ambiguousRawNum <- lift rawnumberp 669 mExponent <- lift $ optional $ try exponentp 670 offAfterNum <- getOffset 671 let numRegion = (offBeforeNum, offAfterNum) 672 (q,prec,mdec,mgrps) <- lift $ interpretNumber numRegion suggestedStyle ambiguousRawNum mExponent 673 let s = amountstyle{ascommodityside=L, ascommodityspaced=commodityspaced, asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps} 674 return $ nullamt{acommodity=c, aquantity=sign (sign2 q), aismultiplier=mult, astyle=s, aprice=Nothing} 675 676 rightornosymbolamountp :: Bool -> (Decimal -> Decimal) -> JournalParser m Amount 677 rightornosymbolamountp mult sign = label "amount" $ do 678 offBeforeNum <- getOffset 679 ambiguousRawNum <- lift rawnumberp 680 mExponent <- lift $ optional $ try exponentp 681 offAfterNum <- getOffset 682 let numRegion = (offBeforeNum, offAfterNum) 683 mSpaceAndCommodity <- lift $ optional $ try $ (,) <$> skipNonNewlineSpaces' <*> commoditysymbolp 684 case mSpaceAndCommodity of 685 -- right symbol amount 686 Just (commodityspaced, c) -> do 687 suggestedStyle <- getAmountStyle c 688 (q,prec,mdec,mgrps) <- lift $ interpretNumber numRegion suggestedStyle ambiguousRawNum mExponent 689 let s = amountstyle{ascommodityside=R, ascommodityspaced=commodityspaced, asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps} 690 return $ nullamt{acommodity=c, aquantity=sign q, aismultiplier=mult, astyle=s, aprice=Nothing} 691 -- no symbol amount 692 Nothing -> do 693 suggestedStyle <- getDefaultAmountStyle 694 (q,prec,mdec,mgrps) <- lift $ interpretNumber numRegion suggestedStyle ambiguousRawNum mExponent 695 -- if a default commodity has been set, apply it and its style to this amount 696 -- (unless it's a multiplier in an automated posting) 697 defcs <- getDefaultCommodityAndStyle 698 let (c,s) = case (mult, defcs) of 699 (False, Just (defc,defs)) -> (defc, defs{asprecision=max (asprecision defs) prec}) 700 _ -> ("", amountstyle{asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps}) 701 return $ nullamt{acommodity=c, aquantity=sign q, aismultiplier=mult, astyle=s, aprice=Nothing} 702 703 -- For reducing code duplication. Doesn't parse anything. Has the type 704 -- of a parser only in order to throw parse errors (for convenience). 705 interpretNumber 706 :: (Int, Int) -- offsets 707 -> Maybe AmountStyle 708 -> Either AmbiguousNumber RawNumber 709 -> Maybe Integer 710 -> TextParser m (Quantity, AmountPrecision, Maybe Char, Maybe DigitGroupStyle) 711 interpretNumber posRegion suggestedStyle ambiguousNum mExp = 712 let rawNum = either (disambiguateNumber suggestedStyle) id ambiguousNum 713 in case fromRawNumber rawNum mExp of 714 Left errMsg -> customFailure $ 715 uncurry parseErrorAtRegion posRegion errMsg 716 Right (q,p,d,g) -> pure (q, Precision p, d, g) 717 718-- | Parse an amount from a string, or get an error. 719amountp' :: String -> Amount 720amountp' s = 721 case runParser (evalStateT (amountp <* eof) nulljournal) "" (T.pack s) of 722 Right amt -> amt 723 Left err -> error' $ show err -- PARTIAL: XXX should throwError 724 725-- | Parse a mixed amount from a string, or get an error. 726mamountp' :: String -> MixedAmount 727mamountp' = Mixed . (:[]) . amountp' 728 729-- | Parse a minus or plus sign followed by zero or more spaces, 730-- or nothing, returning a function that negates or does nothing. 731signp :: Num a => TextParser m (a -> a) 732signp = ((char '-' *> pure negate <|> char '+' *> pure id) <* skipNonNewlineSpaces) <|> pure id 733 734multiplierp :: TextParser m Bool 735multiplierp = option False $ char '*' *> pure True 736 737commoditysymbolp :: TextParser m CommoditySymbol 738commoditysymbolp = 739 quotedcommoditysymbolp <|> simplecommoditysymbolp <?> "commodity symbol" 740 741quotedcommoditysymbolp :: TextParser m CommoditySymbol 742quotedcommoditysymbolp = 743 between (char '"') (char '"') $ takeWhile1P Nothing f 744 where f c = c /= ';' && c /= '\n' && c /= '\"' 745 746simplecommoditysymbolp :: TextParser m CommoditySymbol 747simplecommoditysymbolp = takeWhile1P Nothing (not . isNonsimpleCommodityChar) 748 749priceamountp :: JournalParser m AmountPrice 750priceamountp = label "transaction price" $ do 751 -- https://www.ledger-cli.org/3.0/doc/ledger3.html#Virtual-posting-costs 752 parenthesised <- option False $ char '(' >> pure True 753 char '@' 754 priceConstructor <- char '@' *> pure TotalPrice <|> pure UnitPrice 755 when parenthesised $ void $ char ')' 756 757 lift skipNonNewlineSpaces 758 priceAmount <- amountwithoutpricep -- <?> "unpriced amount (specifying a price)" 759 760 pure $ priceConstructor priceAmount 761 762balanceassertionp :: JournalParser m BalanceAssertion 763balanceassertionp = do 764 sourcepos <- genericSourcePos <$> lift getSourcePos 765 char '=' 766 istotal <- fmap isJust $ optional $ try $ char '=' 767 isinclusive <- fmap isJust $ optional $ try $ char '*' 768 lift skipNonNewlineSpaces 769 -- this amount can have a price; balance assertions ignore it, 770 -- but balance assignments will use it 771 a <- amountpnolotprices <?> "amount (for a balance assertion or assignment)" 772 return BalanceAssertion 773 { baamount = a 774 , batotal = istotal 775 , bainclusive = isinclusive 776 , baposition = sourcepos 777 } 778 779-- Parse a Ledger-style fixed {=UNITPRICE} or non-fixed {UNITPRICE} 780-- or fixed {{=TOTALPRICE}} or non-fixed {{TOTALPRICE}} lot price, 781-- and ignore it. 782-- https://www.ledger-cli.org/3.0/doc/ledger3.html#Fixing-Lot-Prices . 783lotpricep :: JournalParser m () 784lotpricep = label "ledger-style lot price" $ do 785 char '{' 786 doublebrace <- option False $ char '{' >> pure True 787 _fixed <- fmap isJust $ optional $ lift skipNonNewlineSpaces >> char '=' 788 lift skipNonNewlineSpaces 789 _a <- amountwithoutpricep 790 lift skipNonNewlineSpaces 791 char '}' 792 when (doublebrace) $ void $ char '}' 793 return () 794 795-- Parse a Ledger-style lot date [DATE], and ignore it. 796-- https://www.ledger-cli.org/3.0/doc/ledger3.html#Fixing-Lot-Prices . 797lotdatep :: JournalParser m () 798lotdatep = (do 799 char '[' 800 lift skipNonNewlineSpaces 801 _d <- datep 802 lift skipNonNewlineSpaces 803 char ']' 804 return () 805 ) <?> "ledger-style lot date" 806 807-- | Parse a string representation of a number for its value and display 808-- attributes. 809-- 810-- Some international number formats are accepted, eg either period or comma 811-- may be used for the decimal mark, and the other of these may be used for 812-- separating digit groups in the integer part. See 813-- http://en.wikipedia.org/wiki/Decimal_separator for more examples. 814-- 815-- This returns: the parsed numeric value, the precision (number of digits 816-- seen following the decimal mark), the decimal mark character used if any, 817-- and the digit group style if any. 818-- 819numberp :: Maybe AmountStyle -> TextParser m (Quantity, Word8, Maybe Char, Maybe DigitGroupStyle) 820numberp suggestedStyle = label "number" $ do 821 -- a number is an optional sign followed by a sequence of digits possibly 822 -- interspersed with periods, commas, or both 823 -- dbgparse 0 "numberp" 824 sign <- signp 825 rawNum <- either (disambiguateNumber suggestedStyle) id <$> rawnumberp 826 mExp <- optional $ try $ exponentp 827 dbg7 "numberp suggestedStyle" suggestedStyle `seq` return () 828 case dbg7 "numberp quantity,precision,mdecimalpoint,mgrps" 829 $ fromRawNumber rawNum mExp of 830 Left errMsg -> Fail.fail errMsg 831 Right (q, p, d, g) -> pure (sign q, p, d, g) 832 833exponentp :: TextParser m Integer 834exponentp = char' 'e' *> signp <*> decimal <?> "exponent" 835 836-- | Interpret a raw number as a decimal number. 837-- 838-- Returns: 839-- - the decimal number 840-- - the precision (number of digits after the decimal point) 841-- - the decimal point character, if any 842-- - the digit group style, if any (digit group character and sizes of digit groups) 843fromRawNumber 844 :: RawNumber 845 -> Maybe Integer 846 -> Either String 847 (Quantity, Word8, Maybe Char, Maybe DigitGroupStyle) 848fromRawNumber (WithSeparators _ _ _) (Just _) = 849 Left "invalid number: mixing digit separators with exponents is not allowed" 850fromRawNumber raw mExp = do 851 (quantity, precision) <- toQuantity (fromMaybe 0 mExp) (digitGroup raw) (decimalGroup raw) 852 return (quantity, precision, mDecPt raw, digitGroupStyle raw) 853 where 854 toQuantity :: Integer -> DigitGrp -> DigitGrp -> Either String (Quantity, Word8) 855 toQuantity e preDecimalGrp postDecimalGrp 856 | precision < 0 = Right (Decimal 0 (digitGrpNum * 10^(-precision)), 0) 857 | precision < 256 = Right (Decimal precision8 digitGrpNum, precision8) 858 | otherwise = Left "invalid number: numbers with more than 255 decimal digits are not allowed at this time" 859 where 860 digitGrpNum = digitGroupNumber $ preDecimalGrp <> postDecimalGrp 861 precision = toInteger (digitGroupLength postDecimalGrp) - e 862 precision8 = fromIntegral precision :: Word8 863 864 mDecPt (NoSeparators _ mDecimals) = fst <$> mDecimals 865 mDecPt (WithSeparators _ _ mDecimals) = fst <$> mDecimals 866 decimalGroup (NoSeparators _ mDecimals) = maybe mempty snd mDecimals 867 decimalGroup (WithSeparators _ _ mDecimals) = maybe mempty snd mDecimals 868 digitGroup (NoSeparators digitGrp _) = digitGrp 869 digitGroup (WithSeparators _ digitGrps _) = mconcat digitGrps 870 digitGroupStyle (NoSeparators _ _) = Nothing 871 digitGroupStyle (WithSeparators sep grps _) = Just . DigitGroups sep $ groupSizes grps 872 873 -- Outputs digit group sizes from least significant to most significant 874 groupSizes :: [DigitGrp] -> [Word8] 875 groupSizes digitGrps = reverse $ case map (fromIntegral . digitGroupLength) digitGrps of 876 (a:b:cs) | a < b -> b:cs 877 gs -> gs 878 879 880disambiguateNumber :: Maybe AmountStyle -> AmbiguousNumber -> RawNumber 881disambiguateNumber suggestedStyle (AmbiguousNumber grp1 sep grp2) = 882 -- If present, use the suggested style to disambiguate; 883 -- otherwise, assume that the separator is a decimal point where possible. 884 if isDecimalPointChar sep && 885 maybe True (sep `isValidDecimalBy`) suggestedStyle 886 then NoSeparators grp1 (Just (sep, grp2)) 887 else WithSeparators sep [grp1, grp2] Nothing 888 where 889 isValidDecimalBy :: Char -> AmountStyle -> Bool 890 isValidDecimalBy c = \case 891 AmountStyle{asdecimalpoint = Just d} -> d == c 892 AmountStyle{asdigitgroups = Just (DigitGroups g _)} -> g /= c 893 AmountStyle{asprecision = Precision 0} -> False 894 _ -> True 895 896-- | Parse and interpret the structure of a number without external hints. 897-- Numbers are digit strings, possibly separated into digit groups by one 898-- of two types of separators. (1) Numbers may optionally have a decimal 899-- mark, which may be either a period or comma. (2) Numbers may 900-- optionally contain digit group marks, which must all be either a 901-- period, a comma, or a space. 902-- 903-- It is our task to deduce the characters used as decimal mark and 904-- digit group mark, based on the allowed syntax. For instance, we 905-- make use of the fact that a decimal mark can occur at most once and 906-- must be to the right of all digit group marks. 907-- 908-- >>> parseTest rawnumberp "1,234,567.89" 909-- Right (WithSeparators ',' ["1","234","567"] (Just ('.',"89"))) 910-- >>> parseTest rawnumberp "1,000" 911-- Left (AmbiguousNumber "1" ',' "000") 912-- >>> parseTest rawnumberp "1 000" 913-- Right (WithSeparators ' ' ["1","000"] Nothing) 914-- 915rawnumberp :: TextParser m (Either AmbiguousNumber RawNumber) 916rawnumberp = label "number" $ do 917 rawNumber <- fmap Right leadingDecimalPt <|> leadingDigits 918 919 -- Guard against mistyped numbers 920 mExtraDecimalSep <- optional $ lookAhead $ satisfy isDecimalPointChar 921 when (isJust mExtraDecimalSep) $ 922 Fail.fail "invalid number (invalid use of separator)" 923 924 mExtraFragment <- optional $ lookAhead $ try $ 925 char ' ' *> getOffset <* digitChar 926 case mExtraFragment of 927 Just off -> customFailure $ 928 parseErrorAt off "invalid number (excessive trailing digits)" 929 Nothing -> pure () 930 931 return $ dbg7 "rawnumberp" rawNumber 932 where 933 934 leadingDecimalPt :: TextParser m RawNumber 935 leadingDecimalPt = do 936 decPt <- satisfy isDecimalPointChar 937 decGrp <- digitgroupp 938 pure $ NoSeparators mempty (Just (decPt, decGrp)) 939 940 leadingDigits :: TextParser m (Either AmbiguousNumber RawNumber) 941 leadingDigits = do 942 grp1 <- digitgroupp 943 withSeparators grp1 <|> fmap Right (trailingDecimalPt grp1) 944 <|> pure (Right $ NoSeparators grp1 Nothing) 945 946 withSeparators :: DigitGrp -> TextParser m (Either AmbiguousNumber RawNumber) 947 withSeparators grp1 = do 948 (sep, grp2) <- try $ (,) <$> satisfy isDigitSeparatorChar <*> digitgroupp 949 grps <- many $ try $ char sep *> digitgroupp 950 951 let digitGroups = grp1 : grp2 : grps 952 fmap Right (withDecimalPt sep digitGroups) 953 <|> pure (withoutDecimalPt grp1 sep grp2 grps) 954 955 withDecimalPt :: Char -> [DigitGrp] -> TextParser m RawNumber 956 withDecimalPt digitSep digitGroups = do 957 decPt <- satisfy $ \c -> isDecimalPointChar c && c /= digitSep 958 decDigitGrp <- option mempty digitgroupp 959 960 pure $ WithSeparators digitSep digitGroups (Just (decPt, decDigitGrp)) 961 962 withoutDecimalPt 963 :: DigitGrp 964 -> Char 965 -> DigitGrp 966 -> [DigitGrp] 967 -> Either AmbiguousNumber RawNumber 968 withoutDecimalPt grp1 sep grp2 grps 969 | null grps && isDecimalPointChar sep = 970 Left $ AmbiguousNumber grp1 sep grp2 971 | otherwise = Right $ WithSeparators sep (grp1:grp2:grps) Nothing 972 973 trailingDecimalPt :: DigitGrp -> TextParser m RawNumber 974 trailingDecimalPt grp1 = do 975 decPt <- satisfy isDecimalPointChar 976 pure $ NoSeparators grp1 (Just (decPt, mempty)) 977 978 979isDecimalPointChar :: Char -> Bool 980isDecimalPointChar c = c == '.' || c == ',' 981 982isDigitSeparatorChar :: Char -> Bool 983isDigitSeparatorChar c = isDecimalPointChar c || c == ' ' 984 985-- | Some kinds of number literal we might parse. 986data RawNumber 987 = NoSeparators DigitGrp (Maybe (Char, DigitGrp)) 988 -- ^ A number with no digit group marks (eg 100), 989 -- or with a leading or trailing comma or period 990 -- which (apparently) we interpret as a decimal mark (like 100. or .100) 991 | WithSeparators Char [DigitGrp] (Maybe (Char, DigitGrp)) 992 -- ^ A number with identifiable digit group marks 993 -- (eg 1,000,000 or 1,000.50 or 1 000) 994 deriving (Show, Eq) 995 996-- | Another kind of number literal: this one contains either a digit 997-- group separator or a decimal mark, we're not sure which (eg 1,000 or 100.50). 998data AmbiguousNumber = AmbiguousNumber DigitGrp Char DigitGrp 999 deriving (Show, Eq) 1000 1001-- | Description of a single digit group in a number literal. 1002-- "Thousands" is one well known digit grouping, but there are others. 1003data DigitGrp = DigitGrp { 1004 digitGroupLength :: !Word, -- ^ The number of digits in this group. 1005 -- This is Word to avoid the need to do overflow 1006 -- checking for the Semigroup instance of DigitGrp. 1007 digitGroupNumber :: !Integer -- ^ The natural number formed by this group's digits. This should always be positive. 1008} deriving (Eq) 1009 1010-- | A custom show instance, showing digit groups as the parser saw them. 1011instance Show DigitGrp where 1012 show (DigitGrp len num) = "\"" ++ padding ++ numStr ++ "\"" 1013 where numStr = show num 1014 padding = genericReplicate (toInteger len - toInteger (length numStr)) '0' 1015 1016instance Sem.Semigroup DigitGrp where 1017 DigitGrp l1 n1 <> DigitGrp l2 n2 = DigitGrp (l1 + l2) (n1 * 10^l2 + n2) 1018 1019instance Monoid DigitGrp where 1020 mempty = DigitGrp 0 0 1021 mappend = (Sem.<>) 1022 1023digitgroupp :: TextParser m DigitGrp 1024digitgroupp = label "digits" 1025 $ makeGroup <$> takeWhile1P (Just "digit") isDigit 1026 where 1027 makeGroup = uncurry DigitGrp . foldl' step (0, 0) . T.unpack 1028 step (!l, !a) c = (l+1, a*10 + fromIntegral (digitToInt c)) 1029 1030--- *** comments 1031 1032multilinecommentp :: TextParser m () 1033multilinecommentp = startComment *> anyLine `skipManyTill` endComment 1034 where 1035 startComment = string "comment" *> trailingSpaces 1036 endComment = eof <|> string "end comment" *> trailingSpaces 1037 1038 trailingSpaces = skipNonNewlineSpaces <* newline 1039 anyLine = void $ takeWhileP Nothing (\c -> c /= '\n') *> newline 1040 1041{-# INLINABLE multilinecommentp #-} 1042 1043-- | A blank or comment line in journal format: a line that's empty or 1044-- containing only whitespace or whose first non-whitespace character 1045-- is semicolon, hash, or star. 1046emptyorcommentlinep :: TextParser m () 1047emptyorcommentlinep = do 1048 skipNonNewlineSpaces 1049 skiplinecommentp <|> void newline 1050 where 1051 skiplinecommentp :: TextParser m () 1052 skiplinecommentp = do 1053 satisfy $ \c -> c == ';' || c == '#' || c == '*' 1054 void $ takeWhileP Nothing (\c -> c /= '\n') 1055 optional newline 1056 pure () 1057 1058{-# INLINABLE emptyorcommentlinep #-} 1059 1060-- A parser combinator for parsing (possibly multiline) comments 1061-- following journal items. 1062-- 1063-- Several journal items may be followed by comments, which begin with 1064-- semicolons and extend to the end of the line. Such comments may span 1065-- multiple lines, but comment lines below the journal item must be 1066-- preceded by leading whitespace. 1067-- 1068-- This parser combinator accepts a parser that consumes all input up 1069-- until the next newline. This parser should extract the "content" from 1070-- comments. The resulting parser returns this content plus the raw text 1071-- of the comment itself. 1072-- 1073-- See followingcommentp for tests. 1074-- 1075followingcommentp' :: (Monoid a, Show a) => TextParser m a -> TextParser m (Text, a) 1076followingcommentp' contentp = do 1077 skipNonNewlineSpaces 1078 -- there can be 0 or 1 sameLine 1079 sameLine <- try headerp *> ((:[]) <$> match' contentp) <|> pure [] 1080 _ <- eolof 1081 -- there can be 0 or more nextLines 1082 nextLines <- many $ 1083 try (skipNonNewlineSpaces1 *> headerp) *> match' contentp <* eolof 1084 let 1085 -- if there's just a next-line comment, insert an empty same-line comment 1086 -- so the next-line comment doesn't get rendered as a same-line comment. 1087 sameLine' | null sameLine && not (null nextLines) = [("",mempty)] 1088 | otherwise = sameLine 1089 (texts, contents) = unzip $ sameLine' ++ nextLines 1090 strippedCommentText = T.unlines $ map T.strip texts 1091 commentContent = mconcat contents 1092 pure (strippedCommentText, commentContent) 1093 1094 where 1095 headerp = char ';' *> skipNonNewlineSpaces 1096 1097{-# INLINABLE followingcommentp' #-} 1098 1099-- | Parse the text of a (possibly multiline) comment following a journal item. 1100-- 1101-- >>> rtp followingcommentp "" -- no comment 1102-- Right "" 1103-- >>> rtp followingcommentp ";" -- just a (empty) same-line comment. newline is added 1104-- Right "\n" 1105-- >>> rtp followingcommentp "; \n" 1106-- Right "\n" 1107-- >>> rtp followingcommentp ";\n ;\n" -- a same-line and a next-line comment 1108-- Right "\n\n" 1109-- >>> rtp followingcommentp "\n ;\n" -- just a next-line comment. Insert an empty same-line comment so the next-line comment doesn't become a same-line comment. 1110-- Right "\n\n" 1111-- 1112followingcommentp :: TextParser m Text 1113followingcommentp = 1114 fst <$> followingcommentp' (void $ takeWhileP Nothing (/= '\n')) -- XXX support \r\n ? 1115{-# INLINABLE followingcommentp #-} 1116 1117 1118-- | Parse a transaction comment and extract its tags. 1119-- 1120-- The first line of a transaction may be followed by comments, which 1121-- begin with semicolons and extend to the end of the line. Transaction 1122-- comments may span multiple lines, but comment lines below the 1123-- transaction must be preceded by leading whitespace. 1124-- 1125-- 2000/1/1 ; a transaction comment starting on the same line ... 1126-- ; extending to the next line 1127-- account1 $1 1128-- account2 1129-- 1130-- Tags are name-value pairs. 1131-- 1132-- >>> let getTags (_,tags) = tags 1133-- >>> let parseTags = fmap getTags . rtp transactioncommentp 1134-- 1135-- >>> parseTags "; name1: val1, name2:all this is value2" 1136-- Right [("name1","val1"),("name2","all this is value2")] 1137-- 1138-- A tag's name must be immediately followed by a colon, without 1139-- separating whitespace. The corresponding value consists of all the text 1140-- following the colon up until the next colon or newline, stripped of 1141-- leading and trailing whitespace. 1142-- 1143transactioncommentp :: TextParser m (Text, [Tag]) 1144transactioncommentp = followingcommentp' commenttagsp 1145{-# INLINABLE transactioncommentp #-} 1146 1147commenttagsp :: TextParser m [Tag] 1148commenttagsp = do 1149 tagName <- fmap (last . T.split isSpace) 1150 $ takeWhileP Nothing (\c -> c /= ':' && c /= '\n') 1151 atColon tagName <|> pure [] -- if not ':', then either '\n' or EOF 1152 1153 where 1154 atColon :: Text -> TextParser m [Tag] 1155 atColon name = char ':' *> do 1156 if T.null name 1157 then commenttagsp 1158 else do 1159 skipNonNewlineSpaces 1160 val <- tagValue 1161 let tag = (name, val) 1162 (tag:) <$> commenttagsp 1163 1164 tagValue :: TextParser m Text 1165 tagValue = do 1166 val <- T.strip <$> takeWhileP Nothing (\c -> c /= ',' && c /= '\n') 1167 _ <- optional $ char ',' 1168 pure val 1169 1170{-# INLINABLE commenttagsp #-} 1171 1172 1173-- | Parse a posting comment and extract its tags and dates. 1174-- 1175-- Postings may be followed by comments, which begin with semicolons and 1176-- extend to the end of the line. Posting comments may span multiple 1177-- lines, but comment lines below the posting must be preceded by 1178-- leading whitespace. 1179-- 1180-- 2000/1/1 1181-- account1 $1 ; a posting comment starting on the same line ... 1182-- ; extending to the next line 1183-- 1184-- account2 1185-- ; a posting comment beginning on the next line 1186-- 1187-- Tags are name-value pairs. 1188-- 1189-- >>> let getTags (_,tags,_,_) = tags 1190-- >>> let parseTags = fmap getTags . rtp (postingcommentp Nothing) 1191-- 1192-- >>> parseTags "; name1: val1, name2:all this is value2" 1193-- Right [("name1","val1"),("name2","all this is value2")] 1194-- 1195-- A tag's name must be immediately followed by a colon, without 1196-- separating whitespace. The corresponding value consists of all the text 1197-- following the colon up until the next colon or newline, stripped of 1198-- leading and trailing whitespace. 1199-- 1200-- Posting dates may be expressed with "date"/"date2" tags or with 1201-- bracketed date syntax. Posting dates will inherit their year from the 1202-- transaction date if the year is not specified. We throw parse errors on 1203-- invalid dates. 1204-- 1205-- >>> let getDates (_,_,d1,d2) = (d1, d2) 1206-- >>> let parseDates = fmap getDates . rtp (postingcommentp (Just 2000)) 1207-- 1208-- >>> parseDates "; date: 1/2, date2: 1999/12/31" 1209-- Right (Just 2000-01-02,Just 1999-12-31) 1210-- >>> parseDates "; [1/2=1999/12/31]" 1211-- Right (Just 2000-01-02,Just 1999-12-31) 1212-- 1213-- Example: tags, date tags, and bracketed dates 1214-- >>> rtp (postingcommentp (Just 2000)) "; a:b, date:3/4, [=5/6]" 1215-- Right ("a:b, date:3/4, [=5/6]\n",[("a","b"),("date","3/4")],Just 2000-03-04,Just 2000-05-06) 1216-- 1217-- Example: extraction of dates from date tags ignores trailing text 1218-- >>> rtp (postingcommentp (Just 2000)) "; date:3/4=5/6" 1219-- Right ("date:3/4=5/6\n",[("date","3/4=5/6")],Just 2000-03-04,Nothing) 1220-- 1221postingcommentp 1222 :: Maybe Year -> TextParser m (Text, [Tag], Maybe Day, Maybe Day) 1223postingcommentp mYear = do 1224 (commentText, (tags, dateTags)) <- 1225 followingcommentp' (commenttagsanddatesp mYear) 1226 let mdate = fmap snd $ find ((=="date") .fst) dateTags 1227 mdate2 = fmap snd $ find ((=="date2").fst) dateTags 1228 pure (commentText, tags, mdate, mdate2) 1229{-# INLINABLE postingcommentp #-} 1230 1231 1232commenttagsanddatesp 1233 :: Maybe Year -> TextParser m ([Tag], [DateTag]) 1234commenttagsanddatesp mYear = do 1235 (txt, dateTags) <- match $ readUpTo ':' 1236 -- next char is either ':' or '\n' (or EOF) 1237 let tagName = last (T.split isSpace txt) 1238 (fmap.second) (dateTags++) (atColon tagName) <|> pure ([], dateTags) -- if not ':', then either '\n' or EOF 1239 1240 where 1241 readUpTo :: Char -> TextParser m [DateTag] 1242 readUpTo end = do 1243 void $ takeWhileP Nothing (\c -> c /= end && c /= '\n' && c /= '[') 1244 -- if not '[' then ':' or '\n' or EOF 1245 atBracket (readUpTo end) <|> pure [] 1246 1247 atBracket :: TextParser m [DateTag] -> TextParser m [DateTag] 1248 atBracket cont = do 1249 -- Uses the fact that bracketed date-tags cannot contain newlines 1250 dateTags <- option [] $ lookAhead (bracketeddatetagsp mYear) 1251 _ <- char '[' 1252 dateTags' <- cont 1253 pure $ dateTags ++ dateTags' 1254 1255 atColon :: Text -> TextParser m ([Tag], [DateTag]) 1256 atColon name = char ':' *> do 1257 skipNonNewlineSpaces 1258 (tags, dateTags) <- case name of 1259 "" -> pure ([], []) 1260 "date" -> dateValue name 1261 "date2" -> dateValue name 1262 _ -> tagValue name 1263 _ <- optional $ char ',' 1264 bimap (tags++) (dateTags++) <$> commenttagsanddatesp mYear 1265 1266 dateValue :: Text -> TextParser m ([Tag], [DateTag]) 1267 dateValue name = do 1268 (txt, (date, dateTags)) <- match' $ do 1269 date <- datep' mYear 1270 dateTags <- readUpTo ',' 1271 pure (date, dateTags) 1272 let val = T.strip txt 1273 pure $ ( [(name, val)] 1274 , (name, date) : dateTags ) 1275 1276 tagValue :: Text -> TextParser m ([Tag], [DateTag]) 1277 tagValue name = do 1278 (txt, dateTags) <- match' $ readUpTo ',' 1279 let val = T.strip txt 1280 pure $ ( [(name, val)] 1281 , dateTags ) 1282 1283{-# INLINABLE commenttagsanddatesp #-} 1284 1285-- | Parse Ledger-style bracketed posting dates ([DATE=DATE2]), as 1286-- "date" and/or "date2" tags. Anything that looks like an attempt at 1287-- this (a square-bracketed sequence of 0123456789/-.= containing at 1288-- least one digit and one date separator) is also parsed, and will 1289-- throw an appropriate error. 1290-- 1291-- The dates are parsed in full here so that errors are reported in 1292-- the right position. A missing year in DATE can be inferred if a 1293-- default date is provided. A missing year in DATE2 will be inferred 1294-- from DATE. 1295-- 1296-- >>> either (Left . customErrorBundlePretty) Right $ rtp (bracketeddatetagsp Nothing) "[2016/1/2=3/4]" 1297-- Right [("date",2016-01-02),("date2",2016-03-04)] 1298-- 1299-- >>> either (Left . customErrorBundlePretty) Right $ rtp (bracketeddatetagsp Nothing) "[1]" 1300-- Left ...not a bracketed date... 1301-- 1302-- >>> either (Left . customErrorBundlePretty) Right $ rtp (bracketeddatetagsp Nothing) "[2016/1/32]" 1303-- Left ...1:2:...well-formed but invalid date: 2016/1/32... 1304-- 1305-- >>> either (Left . customErrorBundlePretty) Right $ rtp (bracketeddatetagsp Nothing) "[1/31]" 1306-- Left ...1:2:...partial date 1/31 found, but the current year is unknown... 1307-- 1308-- >>> either (Left . customErrorBundlePretty) Right $ rtp (bracketeddatetagsp Nothing) "[0123456789/-.=/-.=]" 1309-- Left ...1:13:...expecting month or day... 1310-- 1311bracketeddatetagsp 1312 :: Maybe Year -> TextParser m [(TagName, Day)] 1313bracketeddatetagsp mYear1 = do 1314 -- dbgparse 0 "bracketeddatetagsp" 1315 try $ do 1316 s <- lookAhead 1317 $ between (char '[') (char ']') 1318 $ takeWhile1P Nothing isBracketedDateChar 1319 unless (T.any isDigit s && T.any isDateSepChar s) $ 1320 Fail.fail "not a bracketed date" 1321 -- Looks sufficiently like a bracketed date to commit to parsing a date 1322 1323 between (char '[') (char ']') $ do 1324 md1 <- optional $ datep' mYear1 1325 1326 let mYear2 = fmap readYear md1 <|> mYear1 1327 md2 <- optional $ char '=' *> datep' mYear2 1328 1329 pure $ catMaybes [("date",) <$> md1, ("date2",) <$> md2] 1330 1331 where 1332 readYear = first3 . toGregorian 1333 isBracketedDateChar c = isDigit c || isDateSepChar c || c == '=' 1334 1335{-# INLINABLE bracketeddatetagsp #-} 1336 1337--- ** tests 1338 1339tests_Common = tests "Common" [ 1340 1341 tests "amountp" [ 1342 test "basic" $ assertParseEq amountp "$47.18" (usd 47.18) 1343 ,test "ends with decimal mark" $ assertParseEq amountp "$1." (usd 1 `withPrecision` Precision 0) 1344 ,test "unit price" $ assertParseEq amountp "$10 @ €0.5" 1345 -- not precise enough: 1346 -- (usd 10 `withPrecision` 0 `at` (eur 0.5 `withPrecision` 1)) -- `withStyle` asdecimalpoint=Just '.' 1347 amount{ 1348 acommodity="$" 1349 ,aquantity=10 -- need to test internal precision with roundTo ? I think not 1350 ,astyle=amountstyle{asprecision=Precision 0, asdecimalpoint=Nothing} 1351 ,aprice=Just $ UnitPrice $ 1352 amount{ 1353 acommodity="€" 1354 ,aquantity=0.5 1355 ,astyle=amountstyle{asprecision=Precision 1, asdecimalpoint=Just '.'} 1356 } 1357 } 1358 ,test "total price" $ assertParseEq amountp "$10 @@ €5" 1359 amount{ 1360 acommodity="$" 1361 ,aquantity=10 1362 ,astyle=amountstyle{asprecision=Precision 0, asdecimalpoint=Nothing} 1363 ,aprice=Just $ TotalPrice $ 1364 amount{ 1365 acommodity="€" 1366 ,aquantity=5 1367 ,astyle=amountstyle{asprecision=Precision 0, asdecimalpoint=Nothing} 1368 } 1369 } 1370 ,test "unit price, parenthesised" $ assertParse amountp "$10 (@) €0.5" 1371 ,test "total price, parenthesised" $ assertParse amountp "$10 (@@) €0.5" 1372 ] 1373 1374 ,let p = lift (numberp Nothing) :: JournalParser IO (Quantity, Word8, Maybe Char, Maybe DigitGroupStyle) in 1375 test "numberp" $ do 1376 assertParseEq p "0" (0, 0, Nothing, Nothing) 1377 assertParseEq p "1" (1, 0, Nothing, Nothing) 1378 assertParseEq p "1.1" (1.1, 1, Just '.', Nothing) 1379 assertParseEq p "1,000.1" (1000.1, 1, Just '.', Just $ DigitGroups ',' [3]) 1380 assertParseEq p "1.00.000,1" (100000.1, 1, Just ',', Just $ DigitGroups '.' [3,2]) 1381 assertParseEq p "1,000,000" (1000000, 0, Nothing, Just $ DigitGroups ',' [3,3]) -- could be simplified to [3] 1382 assertParseEq p "1." (1, 0, Just '.', Nothing) 1383 assertParseEq p "1," (1, 0, Just ',', Nothing) 1384 assertParseEq p ".1" (0.1, 1, Just '.', Nothing) 1385 assertParseEq p ",1" (0.1, 1, Just ',', Nothing) 1386 assertParseError p "" "" 1387 assertParseError p "1,000.000,1" "" 1388 assertParseError p "1.000,000.1" "" 1389 assertParseError p "1,000.000.1" "" 1390 assertParseError p "1,,1" "" 1391 assertParseError p "1..1" "" 1392 assertParseError p ".1," "" 1393 assertParseError p ",1." "" 1394 assertParseEq p "1.555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555" (1.555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555, 255, Just '.', Nothing) 1395 assertParseError p "1.5555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555" "" 1396 1397 ,tests "spaceandamountormissingp" [ 1398 test "space and amount" $ assertParseEq spaceandamountormissingp " $47.18" (Mixed [usd 47.18]) 1399 ,test "empty string" $ assertParseEq spaceandamountormissingp "" missingmixedamt 1400 -- ,test "just space" $ assertParseEq spaceandamountormissingp " " missingmixedamt -- XXX should it ? 1401 -- ,test "just amount" $ assertParseError spaceandamountormissingp "$47.18" "" -- succeeds, consuming nothing 1402 ] 1403 1404 ] 1405