1--- * -*- outline-regexp:"--- \\*"; -*-
2--- ** doc
3-- In Emacs, use TAB on lines beginning with "-- *" to collapse/expand sections.
4{-|
5
6File reading/parsing utilities used by multiple readers, and a good
7amount of the parsers for journal format, to avoid import cycles
8when JournalReader imports other readers.
9
10Some of these might belong in Hledger.Read.JournalReader or Hledger.Read.
11
12-}
13
14--- ** language
15{-# LANGUAGE BangPatterns        #-}
16{-# LANGUAGE CPP                 #-}
17{-# LANGUAGE FlexibleContexts    #-}
18{-# LANGUAGE LambdaCase          #-}
19{-# LANGUAGE NamedFieldPuns      #-}
20{-# LANGUAGE NoMonoLocalBinds    #-}
21{-# LANGUAGE OverloadedStrings   #-}
22{-# LANGUAGE PackageImports      #-}
23{-# LANGUAGE Rank2Types          #-}
24{-# LANGUAGE RecordWildCards     #-}
25{-# LANGUAGE ScopedTypeVariables #-}
26{-# LANGUAGE TupleSections       #-}
27{-# LANGUAGE TypeFamilies        #-}
28
29--- ** exports
30module Hledger.Read.Common (
31  Reader (..),
32  InputOpts (..),
33  definputopts,
34  rawOptsToInputOpts,
35
36  -- * parsing utilities
37  runTextParser,
38  rtp,
39  runJournalParser,
40  rjp,
41  runErroringJournalParser,
42  rejp,
43  genericSourcePos,
44  journalSourcePos,
45  parseAndFinaliseJournal,
46  parseAndFinaliseJournal',
47  journalFinalise,
48  setYear,
49  getYear,
50  setDefaultCommodityAndStyle,
51  getDefaultCommodityAndStyle,
52  getDefaultAmountStyle,
53  getAmountStyle,
54  addDeclaredAccountType,
55  pushParentAccount,
56  popParentAccount,
57  getParentAccount,
58  addAccountAlias,
59  getAccountAliases,
60  clearAccountAliases,
61  journalAddFile,
62
63  -- * parsers
64  -- ** transaction bits
65  statusp,
66  codep,
67  descriptionp,
68
69  -- ** dates
70  datep,
71  datetimep,
72  secondarydatep,
73
74  -- ** account names
75  modifiedaccountnamep,
76  accountnamep,
77
78  -- ** amounts
79  spaceandamountormissingp,
80  amountp,
81  amountp',
82  mamountp',
83  commoditysymbolp,
84  priceamountp,
85  balanceassertionp,
86  lotpricep,
87  numberp,
88  fromRawNumber,
89  rawnumberp,
90
91  -- ** comments
92  multilinecommentp,
93  emptyorcommentlinep,
94
95  followingcommentp,
96  transactioncommentp,
97  postingcommentp,
98
99  -- ** bracketed dates
100  bracketeddatetagsp,
101
102  -- ** misc
103  singlespacedtextp,
104  singlespacedtextsatisfyingp,
105  singlespacep,
106
107  skipNonNewlineSpaces,
108  skipNonNewlineSpaces1,
109
110  -- * tests
111  tests_Common,
112)
113where
114
115--- ** imports
116import Prelude ()
117import "base-compat-batteries" Prelude.Compat hiding (fail, readFile)
118import Control.Applicative.Permutations (runPermutation, toPermutationWithDefault)
119import qualified "base-compat-batteries" Control.Monad.Fail.Compat as Fail (fail)
120import Control.Monad.Except (ExceptT(..), runExceptT, throwError)
121import Control.Monad.State.Strict hiding (fail)
122import Data.Bifunctor (bimap, second)
123import Data.Char (digitToInt, isDigit, isSpace)
124import Data.Decimal (DecimalRaw (Decimal), Decimal)
125import Data.Default (Default(..))
126import Data.Function ((&))
127import Data.Functor.Identity (Identity)
128import "base-compat-batteries" Data.List.Compat
129import Data.List.NonEmpty (NonEmpty(..))
130import Data.Maybe (catMaybes, fromMaybe, isJust, listToMaybe)
131import qualified Data.Map as M
132import qualified Data.Semigroup as Sem
133import Data.Text (Text)
134import qualified Data.Text as T
135import Data.Time.Calendar (Day, fromGregorianValid, toGregorian)
136import Data.Time.LocalTime (LocalTime(..), TimeOfDay(..))
137import Data.Word (Word8)
138import System.Time (getClockTime)
139import Text.Megaparsec
140import Text.Megaparsec.Char (char, char', digitChar, newline, string)
141import Text.Megaparsec.Char.Lexer (decimal)
142import Text.Megaparsec.Custom
143  (FinalParseError, attachSource, customErrorBundlePretty,
144  finalErrorBundlePretty, parseErrorAt, parseErrorAtRegion)
145
146import Hledger.Data
147import Hledger.Utils
148
149--- ** doctest setup
150-- $setup
151-- >>> :set -XOverloadedStrings
152
153--- ** types
154
155-- main types; a few more below
156
157-- | A hledger journal reader is a triple of storage format name, a
158-- detector of that format, and a parser from that format to Journal.
159-- The type variable m appears here so that rParserr can hold a
160-- journal parser, which depends on it.
161data Reader m = Reader {
162
163     -- The canonical name of the format handled by this reader
164     rFormat   :: StorageFormat
165
166     -- The file extensions recognised as containing this format
167    ,rExtensions :: [String]
168
169     -- The entry point for reading this format, accepting input options, file
170     -- path for error messages and file contents, producing an exception-raising IO
171     -- action that produces a journal or error message.
172    ,rReadFn   :: InputOpts -> FilePath -> Text -> ExceptT String IO Journal
173
174     -- The actual megaparsec parser called by the above, in case
175     -- another parser (includedirectivep) wants to use it directly.
176    ,rParser :: MonadIO m => ErroringJournalParser m ParsedJournal
177    }
178
179instance Show (Reader m) where show r = rFormat r ++ " reader"
180
181-- $setup
182
183-- | Various options to use when reading journal files.
184-- Similar to CliOptions.inputflags, simplifies the journal-reading functions.
185data InputOpts = InputOpts {
186     -- files_             :: [FilePath]
187     mformat_           :: Maybe StorageFormat  -- ^ a file/storage format to try, unless overridden
188                                                --   by a filename prefix. Nothing means try all.
189    ,mrules_file_       :: Maybe FilePath       -- ^ a conversion rules file to use (when reading CSV)
190    ,aliases_           :: [String]             -- ^ account name aliases to apply
191    ,anon_              :: Bool                 -- ^ do light anonymisation/obfuscation of the data
192    ,ignore_assertions_ :: Bool                 -- ^ don't check balance assertions
193    ,new_               :: Bool                 -- ^ read only new transactions since this file was last read
194    ,new_save_          :: Bool                 -- ^ save latest new transactions state for next time
195    ,pivot_             :: String               -- ^ use the given field's value as the account name
196    ,auto_              :: Bool                 -- ^ generate automatic postings when journal is parsed
197 } deriving (Show)
198
199instance Default InputOpts where def = definputopts
200
201definputopts :: InputOpts
202definputopts = InputOpts def def def def def def True def def
203
204rawOptsToInputOpts :: RawOpts -> InputOpts
205rawOptsToInputOpts rawopts = InputOpts{
206   -- files_             = listofstringopt "file" rawopts
207   mformat_           = Nothing
208  ,mrules_file_       = maybestringopt "rules-file" rawopts
209  ,aliases_           = listofstringopt "alias" rawopts
210  ,anon_              = boolopt "anon" rawopts
211  ,ignore_assertions_ = boolopt "ignore-assertions" rawopts
212  ,new_               = boolopt "new" rawopts
213  ,new_save_          = True
214  ,pivot_             = stringopt "pivot" rawopts
215  ,auto_              = boolopt "auto" rawopts
216  }
217
218--- ** parsing utilities
219
220-- | Run a text parser in the identity monad. See also: parseWithState.
221runTextParser, rtp
222  :: TextParser Identity a -> Text -> Either (ParseErrorBundle Text CustomErr) a
223runTextParser p t =  runParser p "" t
224rtp = runTextParser
225
226-- | Run a journal parser in some monad. See also: parseWithState.
227runJournalParser, rjp
228  :: Monad m
229  => JournalParser m a -> Text -> m (Either (ParseErrorBundle Text CustomErr) a)
230runJournalParser p t = runParserT (evalStateT p nulljournal) "" t
231rjp = runJournalParser
232
233-- | Run an erroring journal parser in some monad. See also: parseWithState.
234runErroringJournalParser, rejp
235  :: Monad m
236  => ErroringJournalParser m a
237  -> Text
238  -> m (Either FinalParseError (Either (ParseErrorBundle Text CustomErr) a))
239runErroringJournalParser p t =
240  runExceptT $ runParserT (evalStateT p nulljournal) "" t
241rejp = runErroringJournalParser
242
243genericSourcePos :: SourcePos -> GenericSourcePos
244genericSourcePos p = GenericSourcePos (sourceName p) (unPos $ sourceLine p) (unPos $ sourceColumn p)
245
246-- | Construct a generic start & end line parse position from start and end megaparsec SourcePos's.
247journalSourcePos :: SourcePos -> SourcePos -> GenericSourcePos
248journalSourcePos p p' = JournalSourcePos (sourceName p) (unPos $ sourceLine p, line')
249    where line' | (unPos $ sourceColumn p') == 1 = unPos (sourceLine p') - 1
250                | otherwise = unPos $ sourceLine p' -- might be at end of file withat last new-line
251
252-- | Given a parser to ParsedJournal, input options, file path and
253-- content: run the parser on the content, and finalise the result to
254-- get a Journal; or throw an error.
255parseAndFinaliseJournal :: ErroringJournalParser IO ParsedJournal -> InputOpts
256                           -> FilePath -> Text -> ExceptT String IO Journal
257parseAndFinaliseJournal parser iopts f txt = do
258  y <- liftIO getCurrentYear
259  let initJournal = nulljournal{ jparsedefaultyear = Just y, jincludefilestack = [f] }
260  eep <- liftIO $ runExceptT $ runParserT (evalStateT parser initJournal) f txt
261  -- TODO: urgh.. clean this up somehow
262  case eep of
263    Left finalParseError -> throwError $ finalErrorBundlePretty $ attachSource f txt finalParseError
264    Right ep -> case ep of
265                  Left e   -> throwError $ customErrorBundlePretty e
266                  Right pj -> journalFinalise iopts f txt pj
267
268-- | Like parseAndFinaliseJournal but takes a (non-Erroring) JournalParser.
269-- Used for timeclock/timedot.
270-- TODO: get rid of this, use parseAndFinaliseJournal instead
271parseAndFinaliseJournal' :: JournalParser IO ParsedJournal -> InputOpts
272                           -> FilePath -> Text -> ExceptT String IO Journal
273parseAndFinaliseJournal' parser iopts f txt = do
274  y <- liftIO getCurrentYear
275  let initJournal = nulljournal
276        { jparsedefaultyear = Just y
277        , jincludefilestack = [f] }
278  ep <- liftIO $ runParserT (evalStateT parser initJournal) f txt
279  -- see notes above
280  case ep of
281    Left e   -> throwError $ customErrorBundlePretty e
282    Right pj -> journalFinalise iopts f txt pj
283
284-- | Post-process a Journal that has just been parsed or generated, in this order:
285--
286-- - apply canonical amount styles,
287--
288-- - save misc info and reverse transactions into their original parse order,
289--
290-- - evaluate balance assignments and balance each transaction,
291--
292-- - apply transaction modifiers (auto postings) if enabled,
293--
294-- - check balance assertions if enabled.
295--
296-- - infer transaction-implied market prices from transaction prices
297--
298journalFinalise :: InputOpts -> FilePath -> Text -> Journal -> ExceptT String IO Journal
299journalFinalise iopts f txt pj = do
300  t <- liftIO getClockTime
301  d <- liftIO getCurrentDay
302  -- Infer and apply canonical styles for each commodity (or fail).
303  -- This affects transaction balancing/assertions/assignments, so needs to be done early.
304  -- (TODO: since #903's refactoring for hledger 1.12,
305  -- journalApplyCommodityStyles here is seeing the
306  -- transactions before they get reversesd to normal order.)
307  case journalApplyCommodityStyles pj of
308    Left e    -> throwError e
309    Right pj' -> either throwError return $
310      pj'
311      & journalAddFile (f, txt)  -- save the file path and content
312      & journalSetLastReadTime t -- save the last read time
313      & journalReverse           -- convert all lists to parse order
314      & (if not (auto_ iopts) || null (jtxnmodifiers pj)
315         then
316           -- Auto postings are not active.
317           -- Balance all transactions and maybe check balance assertions.
318           journalBalanceTransactions (not $ ignore_assertions_ iopts)
319         else \j -> do  -- Either monad
320           -- Auto postings are active.
321           -- Balance all transactions without checking balance assertions,
322           j' <- journalBalanceTransactions False j
323           -- then add the auto postings
324           -- (Note adding auto postings after balancing means #893b fails;
325           -- adding them before balancing probably means #893a, #928, #938 fail.)
326           case journalModifyTransactions d j' of
327             Left e -> throwError e
328             Right j'' -> do
329               -- then apply commodity styles once more, to style the auto posting amounts. (XXX inefficient ?)
330               j''' <- journalApplyCommodityStyles j''
331               -- then check balance assertions.
332               journalBalanceTransactions (not $ ignore_assertions_ iopts) j'''
333        )
334     & fmap journalInferMarketPricesFromTransactions  -- infer market prices from commodity-exchanging transactions
335
336setYear :: Year -> JournalParser m ()
337setYear y = modify' (\j -> j{jparsedefaultyear=Just y})
338
339getYear :: JournalParser m (Maybe Year)
340getYear = fmap jparsedefaultyear get
341
342setDefaultCommodityAndStyle :: (CommoditySymbol,AmountStyle) -> JournalParser m ()
343setDefaultCommodityAndStyle cs = modify' (\j -> j{jparsedefaultcommodity=Just cs})
344
345getDefaultCommodityAndStyle :: JournalParser m (Maybe (CommoditySymbol,AmountStyle))
346getDefaultCommodityAndStyle = jparsedefaultcommodity `fmap` get
347
348-- | Get amount style associated with default currency.
349--
350-- Returns 'AmountStyle' used to defined by a latest default commodity directive
351-- prior to current position within this file or its parents.
352getDefaultAmountStyle :: JournalParser m (Maybe AmountStyle)
353getDefaultAmountStyle = fmap snd <$> getDefaultCommodityAndStyle
354
355-- | Lookup currency-specific amount style.
356--
357-- Returns 'AmountStyle' used in commodity directive within current journal
358-- prior to current position or in its parents files.
359getAmountStyle :: CommoditySymbol -> JournalParser m (Maybe AmountStyle)
360getAmountStyle commodity = do
361    specificStyle <-  maybe Nothing cformat . M.lookup commodity . jcommodities <$> get
362    defaultStyle <- fmap snd <$> getDefaultCommodityAndStyle
363    let effectiveStyle = listToMaybe $ catMaybes [specificStyle, defaultStyle]
364    return effectiveStyle
365
366addDeclaredAccountType :: AccountName -> AccountType -> JournalParser m ()
367addDeclaredAccountType acct atype =
368  modify' (\j -> j{jdeclaredaccounttypes = M.insertWith (++) atype [acct] (jdeclaredaccounttypes j)})
369
370pushParentAccount :: AccountName -> JournalParser m ()
371pushParentAccount acct = modify' (\j -> j{jparseparentaccounts = acct : jparseparentaccounts j})
372
373popParentAccount :: JournalParser m ()
374popParentAccount = do
375  j <- get
376  case jparseparentaccounts j of
377    []       -> unexpected (Tokens ('E' :| "nd of apply account block with no beginning"))
378    (_:rest) -> put j{jparseparentaccounts=rest}
379
380getParentAccount :: JournalParser m AccountName
381getParentAccount = fmap (concatAccountNames . reverse . jparseparentaccounts) get
382
383addAccountAlias :: MonadState Journal m => AccountAlias -> m ()
384addAccountAlias a = modify' (\(j@Journal{..}) -> j{jparsealiases=a:jparsealiases})
385
386getAccountAliases :: MonadState Journal m => m [AccountAlias]
387getAccountAliases = fmap jparsealiases get
388
389clearAccountAliases :: MonadState Journal m => m ()
390clearAccountAliases = modify' (\j -> j{jparsealiases=[]})
391
392-- getTransactionCount :: MonadState Journal m =>  m Integer
393-- getTransactionCount = fmap jparsetransactioncount get
394--
395-- setTransactionCount :: MonadState Journal m => Integer -> m ()
396-- setTransactionCount i = modify' (\j -> j{jparsetransactioncount=i})
397--
398-- -- | Increment the transaction index by one and return the new value.
399-- incrementTransactionCount :: MonadState Journal m => m Integer
400-- incrementTransactionCount = do
401--   modify' (\j -> j{jparsetransactioncount=jparsetransactioncount j + 1})
402--   getTransactionCount
403
404journalAddFile :: (FilePath,Text) -> Journal -> Journal
405journalAddFile f j@Journal{jfiles=fs} = j{jfiles=fs++[f]}
406  -- append, unlike the other fields, even though we do a final reverse,
407  -- to compensate for additional reversal due to including/monoid-concatting
408
409-- A version of `match` that is strict in the returned text
410match' :: TextParser m a -> TextParser m (Text, a)
411match' p = do
412  (!txt, p) <- match p
413  pure (txt, p)
414
415--- ** parsers
416--- *** transaction bits
417
418statusp :: TextParser m Status
419statusp =
420  choice'
421    [ skipNonNewlineSpaces >> char '*' >> return Cleared
422    , skipNonNewlineSpaces >> char '!' >> return Pending
423    , return Unmarked
424    ]
425
426codep :: TextParser m Text
427codep = option "" $ do
428  try $ do
429    skipNonNewlineSpaces1
430    char '('
431  code <- takeWhileP Nothing $ \c -> c /= ')' && c /= '\n'
432  char ')' <?> "closing bracket ')' for transaction code"
433  pure code
434
435descriptionp :: TextParser m Text
436descriptionp = takeWhileP Nothing (not . semicolonOrNewline)
437  where semicolonOrNewline c = c == ';' || c == '\n'
438
439--- *** dates
440
441-- | Parse a date in YYYY-MM-DD format.
442-- Slash (/) and period (.) are also allowed as separators.
443-- The year may be omitted if a default year has been set.
444-- Leading zeroes may be omitted.
445datep :: JournalParser m Day
446datep = do
447  mYear <- getYear
448  lift $ datep' mYear
449
450datep' :: Maybe Year -> TextParser m Day
451datep' mYear = do
452    startOffset <- getOffset
453    d1 <- yearorintp <?> "year or month"
454    sep <- datesepchar <?> "date separator"
455    d2 <- decimal <?> "month or day"
456    case d1 of
457         Left y  -> fullDate startOffset y sep d2
458         Right m -> partialDate startOffset mYear m sep d2
459    <?> "full or partial date"
460  where
461    fullDate :: Int -> Year -> Char -> Month -> TextParser m Day
462    fullDate startOffset year sep1 month = do
463      sep2 <- satisfy isDateSepChar <?> "date separator"
464      day <- decimal <?> "day"
465      endOffset <- getOffset
466      let dateStr = show year ++ [sep1] ++ show month ++ [sep2] ++ show day
467
468      when (sep1 /= sep2) $ customFailure $ parseErrorAtRegion startOffset endOffset $
469        "invalid date (mixing date separators is not allowed): " ++ dateStr
470
471      case fromGregorianValid year month day of
472        Nothing -> customFailure $ parseErrorAtRegion startOffset endOffset $
473                     "well-formed but invalid date: " ++ dateStr
474        Just date -> pure $! date
475
476    partialDate :: Int -> Maybe Year -> Month -> Char -> MonthDay -> TextParser m Day
477    partialDate startOffset mYear month sep day = do
478      endOffset <- getOffset
479      case mYear of
480        Just year ->
481          case fromGregorianValid year month day of
482            Nothing -> customFailure $ parseErrorAtRegion startOffset endOffset $
483                        "well-formed but invalid date: " ++ dateStr
484            Just date -> pure $! date
485          where dateStr = show year ++ [sep] ++ show month ++ [sep] ++ show day
486
487        Nothing -> customFailure $ parseErrorAtRegion startOffset endOffset $
488          "partial date "++dateStr++" found, but the current year is unknown"
489          where dateStr = show month ++ [sep] ++ show day
490
491{-# INLINABLE datep' #-}
492
493-- | Parse a date and time in YYYY-MM-DD HH:MM[:SS][+-ZZZZ] format.
494-- Slash (/) and period (.) are also allowed as date separators.
495-- The year may be omitted if a default year has been set.
496-- Seconds are optional.
497-- The timezone is optional and ignored (the time is always interpreted as a local time).
498-- Leading zeroes may be omitted (except in a timezone).
499datetimep :: JournalParser m LocalTime
500datetimep = do
501  mYear <- getYear
502  lift $ datetimep' mYear
503
504datetimep' :: Maybe Year -> TextParser m LocalTime
505datetimep' mYear = do
506  day <- datep' mYear
507  skipNonNewlineSpaces1
508  time <- timeOfDay
509  optional timeZone -- ignoring time zones
510  pure $ LocalTime day time
511
512  where
513    timeOfDay :: TextParser m TimeOfDay
514    timeOfDay = do
515      off1 <- getOffset
516      h' <- twoDigitDecimal <?> "hour"
517      off2 <- getOffset
518      unless (h' >= 0 && h' <= 23) $ customFailure $
519        parseErrorAtRegion off1 off2 "invalid time (bad hour)"
520
521      char ':' <?> "':' (hour-minute separator)"
522      off3 <- getOffset
523      m' <- twoDigitDecimal <?> "minute"
524      off4 <- getOffset
525      unless (m' >= 0 && m' <= 59) $ customFailure $
526        parseErrorAtRegion off3 off4 "invalid time (bad minute)"
527
528      s' <- option 0 $ do
529        char ':' <?> "':' (minute-second separator)"
530        off5 <- getOffset
531        s' <- twoDigitDecimal <?> "second"
532        off6 <- getOffset
533        unless (s' >= 0 && s' <= 59) $ customFailure $
534          parseErrorAtRegion off5 off6 "invalid time (bad second)"
535          -- we do not support leap seconds
536        pure s'
537
538      pure $ TimeOfDay h' m' (fromIntegral s')
539
540    twoDigitDecimal :: TextParser m Int
541    twoDigitDecimal = do
542      d1 <- digitToInt <$> digitChar
543      d2 <- digitToInt <$> (digitChar <?> "a second digit")
544      pure $ d1*10 + d2
545
546    timeZone :: TextParser m String
547    timeZone = do
548      plusminus <- satisfy $ \c -> c == '-' || c == '+'
549      fourDigits <- count 4 (digitChar <?> "a digit (for a time zone)")
550      pure $ plusminus:fourDigits
551
552secondarydatep :: Day -> TextParser m Day
553secondarydatep primaryDate = char '=' *> datep' (Just primaryYear)
554  where primaryYear = first3 $ toGregorian primaryDate
555
556-- | Parse a year number or an Int. Years must contain at least four
557-- digits.
558yearorintp :: TextParser m (Either Year Int)
559yearorintp = do
560    yearOrMonth <- takeWhile1P (Just "digit") isDigit
561    let n = readDecimal yearOrMonth
562    return $ if T.length yearOrMonth >= 4 then Left n else Right (fromInteger n)
563
564--- *** account names
565
566-- | Parse an account name (plus one following space if present),
567-- then apply any parent account prefix and/or account aliases currently in effect,
568-- in that order. (Ie first add the parent account prefix, then rewrite with aliases).
569-- This calls error if any account alias with an invalid regular expression exists.
570modifiedaccountnamep :: JournalParser m AccountName
571modifiedaccountnamep = do
572  parent  <- getParentAccount
573  aliases <- getAccountAliases
574  -- off1    <- getOffset
575  a       <- lift accountnamep
576  -- off2    <- getOffset
577  -- XXX or accountNameApplyAliasesMemo ? doesn't seem to make a difference (retest that function)
578  case accountNameApplyAliases aliases $ joinAccountNames parent a of
579    Right a' -> return $! a'
580    -- should not happen, regexaliasp will have displayed a better error already:
581    -- (XXX why does customFailure cause error to be displayed there, but not here ?)
582    -- Left e  -> customFailure $! parseErrorAtRegion off1 off2 err
583    Left e   -> error' err  -- PARTIAL:
584      where
585        err = "problem in account alias applied to "++T.unpack a++": "++e
586
587-- | Parse an account name, plus one following space if present.
588-- Account names have one or more parts separated by the account separator character,
589-- and are terminated by two or more spaces (or end of input).
590-- Each part is at least one character long, may have single spaces inside it,
591-- and starts with a non-whitespace.
592-- Note, this means "{account}", "%^!" and ";comment" are all accepted
593-- (parent parsers usually prevent/consume the last).
594-- It should have required parts to start with an alphanumeric;
595-- for now it remains as-is for backwards compatibility.
596accountnamep :: TextParser m AccountName
597accountnamep = singlespacedtextp
598
599
600-- | Parse any text beginning with a non-whitespace character, until a
601-- double space or the end of input.
602-- TODO including characters which normally start a comment (;#) - exclude those ?
603singlespacedtextp :: TextParser m T.Text
604singlespacedtextp = singlespacedtextsatisfyingp (const True)
605
606-- | Similar to 'singlespacedtextp', except that the text must only contain
607-- characters satisfying the given predicate.
608singlespacedtextsatisfyingp :: (Char -> Bool) -> TextParser m T.Text
609singlespacedtextsatisfyingp pred = do
610  firstPart <- partp
611  otherParts <- many $ try $ singlespacep *> partp
612  pure $! T.unwords $ firstPart : otherParts
613  where
614    partp = takeWhile1P Nothing (\c -> pred c && not (isSpace c))
615
616-- | Parse one non-newline whitespace character that is not followed by another one.
617singlespacep :: TextParser m ()
618singlespacep = spacenonewline *> notFollowedBy spacenonewline
619
620--- *** amounts
621
622-- | Parse whitespace then an amount, with an optional left or right
623-- currency symbol and optional price, or return the special
624-- "missing" marker amount.
625spaceandamountormissingp :: JournalParser m MixedAmount
626spaceandamountormissingp =
627  option missingmixedamt $ try $ do
628    lift $ skipNonNewlineSpaces1
629    Mixed . (:[]) <$> amountp
630
631-- | Parse a single-commodity amount, with optional symbol on the left
632-- or right, followed by, in any order: an optional transaction price,
633-- an optional ledger-style lot price, and/or an optional ledger-style
634-- lot date. A lot price and lot date will be ignored.
635amountp :: JournalParser m Amount
636amountp = label "amount" $ do
637  let spaces = lift $ skipNonNewlineSpaces
638  amount <- amountwithoutpricep <* spaces
639  (mprice, _elotprice, _elotdate) <- runPermutation $
640    (,,) <$> toPermutationWithDefault Nothing (Just <$> priceamountp <* spaces)
641         <*> toPermutationWithDefault Nothing (Just <$> lotpricep <* spaces)
642         <*> toPermutationWithDefault Nothing (Just <$> lotdatep <* spaces)
643  pure $ amount { aprice = mprice }
644
645-- XXX Just like amountp but don't allow lot prices. Needed for balanceassertionp.
646amountpnolotprices :: JournalParser m Amount
647amountpnolotprices = label "amount" $ do
648  let spaces = lift $ skipNonNewlineSpaces
649  amount <- amountwithoutpricep
650  spaces
651  mprice <- optional $ priceamountp <* spaces
652  pure $ amount { aprice = mprice }
653
654amountwithoutpricep :: JournalParser m Amount
655amountwithoutpricep = do
656  (mult, sign) <- lift $ (,) <$> multiplierp <*> signp
657  leftsymbolamountp mult sign <|> rightornosymbolamountp mult sign
658
659  where
660
661  leftsymbolamountp :: Bool -> (Decimal -> Decimal) -> JournalParser m Amount
662  leftsymbolamountp mult sign = label "amount" $ do
663    c <- lift commoditysymbolp
664    suggestedStyle <- getAmountStyle c
665    commodityspaced <- lift skipNonNewlineSpaces'
666    sign2 <- lift $ signp
667    offBeforeNum <- getOffset
668    ambiguousRawNum <- lift rawnumberp
669    mExponent <- lift $ optional $ try exponentp
670    offAfterNum <- getOffset
671    let numRegion = (offBeforeNum, offAfterNum)
672    (q,prec,mdec,mgrps) <- lift $ interpretNumber numRegion suggestedStyle ambiguousRawNum mExponent
673    let s = amountstyle{ascommodityside=L, ascommodityspaced=commodityspaced, asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps}
674    return $ nullamt{acommodity=c, aquantity=sign (sign2 q), aismultiplier=mult, astyle=s, aprice=Nothing}
675
676  rightornosymbolamountp :: Bool -> (Decimal -> Decimal) -> JournalParser m Amount
677  rightornosymbolamountp mult sign = label "amount" $ do
678    offBeforeNum <- getOffset
679    ambiguousRawNum <- lift rawnumberp
680    mExponent <- lift $ optional $ try exponentp
681    offAfterNum <- getOffset
682    let numRegion = (offBeforeNum, offAfterNum)
683    mSpaceAndCommodity <- lift $ optional $ try $ (,) <$> skipNonNewlineSpaces' <*> commoditysymbolp
684    case mSpaceAndCommodity of
685      -- right symbol amount
686      Just (commodityspaced, c) -> do
687        suggestedStyle <- getAmountStyle c
688        (q,prec,mdec,mgrps) <- lift $ interpretNumber numRegion suggestedStyle ambiguousRawNum mExponent
689        let s = amountstyle{ascommodityside=R, ascommodityspaced=commodityspaced, asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps}
690        return $ nullamt{acommodity=c, aquantity=sign q, aismultiplier=mult, astyle=s, aprice=Nothing}
691      -- no symbol amount
692      Nothing -> do
693        suggestedStyle <- getDefaultAmountStyle
694        (q,prec,mdec,mgrps) <- lift $ interpretNumber numRegion suggestedStyle ambiguousRawNum mExponent
695        -- if a default commodity has been set, apply it and its style to this amount
696        -- (unless it's a multiplier in an automated posting)
697        defcs <- getDefaultCommodityAndStyle
698        let (c,s) = case (mult, defcs) of
699              (False, Just (defc,defs)) -> (defc, defs{asprecision=max (asprecision defs) prec})
700              _ -> ("", amountstyle{asprecision=prec, asdecimalpoint=mdec, asdigitgroups=mgrps})
701        return $ nullamt{acommodity=c, aquantity=sign q, aismultiplier=mult, astyle=s, aprice=Nothing}
702
703  -- For reducing code duplication. Doesn't parse anything. Has the type
704  -- of a parser only in order to throw parse errors (for convenience).
705  interpretNumber
706    :: (Int, Int) -- offsets
707    -> Maybe AmountStyle
708    -> Either AmbiguousNumber RawNumber
709    -> Maybe Integer
710    -> TextParser m (Quantity, AmountPrecision, Maybe Char, Maybe DigitGroupStyle)
711  interpretNumber posRegion suggestedStyle ambiguousNum mExp =
712    let rawNum = either (disambiguateNumber suggestedStyle) id ambiguousNum
713    in  case fromRawNumber rawNum mExp of
714          Left errMsg -> customFailure $
715                           uncurry parseErrorAtRegion posRegion errMsg
716          Right (q,p,d,g) -> pure (q, Precision p, d, g)
717
718-- | Parse an amount from a string, or get an error.
719amountp' :: String -> Amount
720amountp' s =
721  case runParser (evalStateT (amountp <* eof) nulljournal) "" (T.pack s) of
722    Right amt -> amt
723    Left err  -> error' $ show err  -- PARTIAL: XXX should throwError
724
725-- | Parse a mixed amount from a string, or get an error.
726mamountp' :: String -> MixedAmount
727mamountp' = Mixed . (:[]) . amountp'
728
729-- | Parse a minus or plus sign followed by zero or more spaces,
730-- or nothing, returning a function that negates or does nothing.
731signp :: Num a => TextParser m (a -> a)
732signp = ((char '-' *> pure negate <|> char '+' *> pure id) <* skipNonNewlineSpaces) <|> pure id
733
734multiplierp :: TextParser m Bool
735multiplierp = option False $ char '*' *> pure True
736
737commoditysymbolp :: TextParser m CommoditySymbol
738commoditysymbolp =
739  quotedcommoditysymbolp <|> simplecommoditysymbolp <?> "commodity symbol"
740
741quotedcommoditysymbolp :: TextParser m CommoditySymbol
742quotedcommoditysymbolp =
743  between (char '"') (char '"') $ takeWhile1P Nothing f
744  where f c = c /= ';' && c /= '\n' && c /= '\"'
745
746simplecommoditysymbolp :: TextParser m CommoditySymbol
747simplecommoditysymbolp = takeWhile1P Nothing (not . isNonsimpleCommodityChar)
748
749priceamountp :: JournalParser m AmountPrice
750priceamountp = label "transaction price" $ do
751  -- https://www.ledger-cli.org/3.0/doc/ledger3.html#Virtual-posting-costs
752  parenthesised <- option False $ char '(' >> pure True
753  char '@'
754  priceConstructor <- char '@' *> pure TotalPrice <|> pure UnitPrice
755  when parenthesised $ void $ char ')'
756
757  lift skipNonNewlineSpaces
758  priceAmount <- amountwithoutpricep -- <?> "unpriced amount (specifying a price)"
759
760  pure $ priceConstructor priceAmount
761
762balanceassertionp :: JournalParser m BalanceAssertion
763balanceassertionp = do
764  sourcepos <- genericSourcePos <$> lift getSourcePos
765  char '='
766  istotal <- fmap isJust $ optional $ try $ char '='
767  isinclusive <- fmap isJust $ optional $ try $ char '*'
768  lift skipNonNewlineSpaces
769  -- this amount can have a price; balance assertions ignore it,
770  -- but balance assignments will use it
771  a <- amountpnolotprices <?> "amount (for a balance assertion or assignment)"
772  return BalanceAssertion
773    { baamount    = a
774    , batotal     = istotal
775    , bainclusive = isinclusive
776    , baposition  = sourcepos
777    }
778
779-- Parse a Ledger-style fixed {=UNITPRICE} or non-fixed {UNITPRICE}
780-- or fixed {{=TOTALPRICE}} or non-fixed {{TOTALPRICE}} lot price,
781-- and ignore it.
782-- https://www.ledger-cli.org/3.0/doc/ledger3.html#Fixing-Lot-Prices .
783lotpricep :: JournalParser m ()
784lotpricep = label "ledger-style lot price" $ do
785  char '{'
786  doublebrace <- option False $ char '{' >> pure True
787  _fixed <- fmap isJust $ optional $ lift skipNonNewlineSpaces >> char '='
788  lift skipNonNewlineSpaces
789  _a <- amountwithoutpricep
790  lift skipNonNewlineSpaces
791  char '}'
792  when (doublebrace) $ void $ char '}'
793  return ()
794
795-- Parse a Ledger-style lot date [DATE], and ignore it.
796-- https://www.ledger-cli.org/3.0/doc/ledger3.html#Fixing-Lot-Prices .
797lotdatep :: JournalParser m ()
798lotdatep = (do
799  char '['
800  lift skipNonNewlineSpaces
801  _d <- datep
802  lift skipNonNewlineSpaces
803  char ']'
804  return ()
805  ) <?> "ledger-style lot date"
806
807-- | Parse a string representation of a number for its value and display
808-- attributes.
809--
810-- Some international number formats are accepted, eg either period or comma
811-- may be used for the decimal mark, and the other of these may be used for
812-- separating digit groups in the integer part. See
813-- http://en.wikipedia.org/wiki/Decimal_separator for more examples.
814--
815-- This returns: the parsed numeric value, the precision (number of digits
816-- seen following the decimal mark), the decimal mark character used if any,
817-- and the digit group style if any.
818--
819numberp :: Maybe AmountStyle -> TextParser m (Quantity, Word8, Maybe Char, Maybe DigitGroupStyle)
820numberp suggestedStyle = label "number" $ do
821    -- a number is an optional sign followed by a sequence of digits possibly
822    -- interspersed with periods, commas, or both
823    -- dbgparse 0 "numberp"
824    sign <- signp
825    rawNum <- either (disambiguateNumber suggestedStyle) id <$> rawnumberp
826    mExp <- optional $ try $ exponentp
827    dbg7 "numberp suggestedStyle" suggestedStyle `seq` return ()
828    case dbg7 "numberp quantity,precision,mdecimalpoint,mgrps"
829           $ fromRawNumber rawNum mExp of
830      Left errMsg -> Fail.fail errMsg
831      Right (q, p, d, g) -> pure (sign q, p, d, g)
832
833exponentp :: TextParser m Integer
834exponentp = char' 'e' *> signp <*> decimal <?> "exponent"
835
836-- | Interpret a raw number as a decimal number.
837--
838-- Returns:
839-- - the decimal number
840-- - the precision (number of digits after the decimal point)
841-- - the decimal point character, if any
842-- - the digit group style, if any (digit group character and sizes of digit groups)
843fromRawNumber
844  :: RawNumber
845  -> Maybe Integer
846  -> Either String
847            (Quantity, Word8, Maybe Char, Maybe DigitGroupStyle)
848fromRawNumber (WithSeparators _ _ _) (Just _) =
849    Left "invalid number: mixing digit separators with exponents is not allowed"
850fromRawNumber raw mExp = do
851    (quantity, precision) <- toQuantity (fromMaybe 0 mExp) (digitGroup raw) (decimalGroup raw)
852    return (quantity, precision, mDecPt raw, digitGroupStyle raw)
853  where
854    toQuantity :: Integer -> DigitGrp -> DigitGrp -> Either String (Quantity, Word8)
855    toQuantity e preDecimalGrp postDecimalGrp
856      | precision < 0   = Right (Decimal 0 (digitGrpNum * 10^(-precision)), 0)
857      | precision < 256 = Right (Decimal precision8 digitGrpNum, precision8)
858      | otherwise = Left "invalid number: numbers with more than 255 decimal digits are not allowed at this time"
859      where
860        digitGrpNum = digitGroupNumber $ preDecimalGrp <> postDecimalGrp
861        precision   = toInteger (digitGroupLength postDecimalGrp) - e
862        precision8  = fromIntegral precision :: Word8
863
864    mDecPt (NoSeparators _ mDecimals)           = fst <$> mDecimals
865    mDecPt (WithSeparators _ _ mDecimals)       = fst <$> mDecimals
866    decimalGroup (NoSeparators _ mDecimals)     = maybe mempty snd mDecimals
867    decimalGroup (WithSeparators _ _ mDecimals) = maybe mempty snd mDecimals
868    digitGroup (NoSeparators digitGrp _)        = digitGrp
869    digitGroup (WithSeparators _ digitGrps _)   = mconcat digitGrps
870    digitGroupStyle (NoSeparators _ _)          = Nothing
871    digitGroupStyle (WithSeparators sep grps _) = Just . DigitGroups sep $ groupSizes grps
872
873    -- Outputs digit group sizes from least significant to most significant
874    groupSizes :: [DigitGrp] -> [Word8]
875    groupSizes digitGrps = reverse $ case map (fromIntegral . digitGroupLength) digitGrps of
876      (a:b:cs) | a < b -> b:cs
877      gs               -> gs
878
879
880disambiguateNumber :: Maybe AmountStyle -> AmbiguousNumber -> RawNumber
881disambiguateNumber suggestedStyle (AmbiguousNumber grp1 sep grp2) =
882  -- If present, use the suggested style to disambiguate;
883  -- otherwise, assume that the separator is a decimal point where possible.
884  if isDecimalPointChar sep &&
885     maybe True (sep `isValidDecimalBy`) suggestedStyle
886  then NoSeparators grp1 (Just (sep, grp2))
887  else WithSeparators sep [grp1, grp2] Nothing
888  where
889    isValidDecimalBy :: Char -> AmountStyle -> Bool
890    isValidDecimalBy c = \case
891      AmountStyle{asdecimalpoint = Just d} -> d == c
892      AmountStyle{asdigitgroups = Just (DigitGroups g _)} -> g /= c
893      AmountStyle{asprecision = Precision 0} -> False
894      _ -> True
895
896-- | Parse and interpret the structure of a number without external hints.
897-- Numbers are digit strings, possibly separated into digit groups by one
898-- of two types of separators. (1) Numbers may optionally have a decimal
899-- mark, which may be either a period or comma. (2) Numbers may
900-- optionally contain digit group marks, which must all be either a
901-- period, a comma, or a space.
902--
903-- It is our task to deduce the characters used as decimal mark and
904-- digit group mark, based on the allowed syntax. For instance, we
905-- make use of the fact that a decimal mark can occur at most once and
906-- must be to the right of all digit group marks.
907--
908-- >>> parseTest rawnumberp "1,234,567.89"
909-- Right (WithSeparators ',' ["1","234","567"] (Just ('.',"89")))
910-- >>> parseTest rawnumberp "1,000"
911-- Left (AmbiguousNumber "1" ',' "000")
912-- >>> parseTest rawnumberp "1 000"
913-- Right (WithSeparators ' ' ["1","000"] Nothing)
914--
915rawnumberp :: TextParser m (Either AmbiguousNumber RawNumber)
916rawnumberp = label "number" $ do
917  rawNumber <- fmap Right leadingDecimalPt <|> leadingDigits
918
919  -- Guard against mistyped numbers
920  mExtraDecimalSep <- optional $ lookAhead $ satisfy isDecimalPointChar
921  when (isJust mExtraDecimalSep) $
922    Fail.fail "invalid number (invalid use of separator)"
923
924  mExtraFragment <- optional $ lookAhead $ try $
925    char ' ' *> getOffset <* digitChar
926  case mExtraFragment of
927    Just off -> customFailure $
928                  parseErrorAt off "invalid number (excessive trailing digits)"
929    Nothing -> pure ()
930
931  return $ dbg7 "rawnumberp" rawNumber
932  where
933
934  leadingDecimalPt :: TextParser m RawNumber
935  leadingDecimalPt = do
936    decPt <- satisfy isDecimalPointChar
937    decGrp <- digitgroupp
938    pure $ NoSeparators mempty (Just (decPt, decGrp))
939
940  leadingDigits :: TextParser m (Either AmbiguousNumber RawNumber)
941  leadingDigits = do
942    grp1 <- digitgroupp
943    withSeparators grp1 <|> fmap Right (trailingDecimalPt grp1)
944                        <|> pure (Right $ NoSeparators grp1 Nothing)
945
946  withSeparators :: DigitGrp -> TextParser m (Either AmbiguousNumber RawNumber)
947  withSeparators grp1 = do
948    (sep, grp2) <- try $ (,) <$> satisfy isDigitSeparatorChar <*> digitgroupp
949    grps <- many $ try $ char sep *> digitgroupp
950
951    let digitGroups = grp1 : grp2 : grps
952    fmap Right (withDecimalPt sep digitGroups)
953      <|> pure (withoutDecimalPt grp1 sep grp2 grps)
954
955  withDecimalPt :: Char -> [DigitGrp] -> TextParser m RawNumber
956  withDecimalPt digitSep digitGroups = do
957    decPt <- satisfy $ \c -> isDecimalPointChar c && c /= digitSep
958    decDigitGrp <- option mempty digitgroupp
959
960    pure $ WithSeparators digitSep digitGroups (Just (decPt, decDigitGrp))
961
962  withoutDecimalPt
963    :: DigitGrp
964    -> Char
965    -> DigitGrp
966    -> [DigitGrp]
967    -> Either AmbiguousNumber RawNumber
968  withoutDecimalPt grp1 sep grp2 grps
969    | null grps && isDecimalPointChar sep =
970        Left $ AmbiguousNumber grp1 sep grp2
971    | otherwise = Right $ WithSeparators sep (grp1:grp2:grps) Nothing
972
973  trailingDecimalPt :: DigitGrp -> TextParser m RawNumber
974  trailingDecimalPt grp1 = do
975    decPt <- satisfy isDecimalPointChar
976    pure $ NoSeparators grp1 (Just (decPt, mempty))
977
978
979isDecimalPointChar :: Char -> Bool
980isDecimalPointChar c = c == '.' || c == ','
981
982isDigitSeparatorChar :: Char -> Bool
983isDigitSeparatorChar c = isDecimalPointChar c || c == ' '
984
985-- | Some kinds of number literal we might parse.
986data RawNumber
987  = NoSeparators   DigitGrp (Maybe (Char, DigitGrp))
988    -- ^ A number with no digit group marks (eg 100),
989    --   or with a leading or trailing comma or period
990    --   which (apparently) we interpret as a decimal mark (like 100. or .100)
991  | WithSeparators Char [DigitGrp] (Maybe (Char, DigitGrp))
992    -- ^ A number with identifiable digit group marks
993    --   (eg 1,000,000 or 1,000.50 or 1 000)
994  deriving (Show, Eq)
995
996-- | Another kind of number literal: this one contains either a digit
997-- group separator or a decimal mark, we're not sure which (eg 1,000 or 100.50).
998data AmbiguousNumber = AmbiguousNumber DigitGrp Char DigitGrp
999  deriving (Show, Eq)
1000
1001-- | Description of a single digit group in a number literal.
1002-- "Thousands" is one well known digit grouping, but there are others.
1003data DigitGrp = DigitGrp {
1004  digitGroupLength :: !Word,    -- ^ The number of digits in this group.
1005                                -- This is Word to avoid the need to do overflow
1006                                -- checking for the Semigroup instance of DigitGrp.
1007  digitGroupNumber :: !Integer  -- ^ The natural number formed by this group's digits. This should always be positive.
1008} deriving (Eq)
1009
1010-- | A custom show instance, showing digit groups as the parser saw them.
1011instance Show DigitGrp where
1012  show (DigitGrp len num) = "\"" ++ padding ++ numStr ++ "\""
1013    where numStr = show num
1014          padding = genericReplicate (toInteger len - toInteger (length numStr)) '0'
1015
1016instance Sem.Semigroup DigitGrp where
1017  DigitGrp l1 n1 <> DigitGrp l2 n2 = DigitGrp (l1 + l2) (n1 * 10^l2 + n2)
1018
1019instance Monoid DigitGrp where
1020  mempty = DigitGrp 0 0
1021  mappend = (Sem.<>)
1022
1023digitgroupp :: TextParser m DigitGrp
1024digitgroupp = label "digits"
1025            $ makeGroup <$> takeWhile1P (Just "digit") isDigit
1026  where
1027    makeGroup = uncurry DigitGrp . foldl' step (0, 0) . T.unpack
1028    step (!l, !a) c = (l+1, a*10 + fromIntegral (digitToInt c))
1029
1030--- *** comments
1031
1032multilinecommentp :: TextParser m ()
1033multilinecommentp = startComment *> anyLine `skipManyTill` endComment
1034  where
1035    startComment = string "comment" *> trailingSpaces
1036    endComment = eof <|> string "end comment" *> trailingSpaces
1037
1038    trailingSpaces = skipNonNewlineSpaces <* newline
1039    anyLine = void $ takeWhileP Nothing (\c -> c /= '\n') *> newline
1040
1041{-# INLINABLE multilinecommentp #-}
1042
1043-- | A blank or comment line in journal format: a line that's empty or
1044-- containing only whitespace or whose first non-whitespace character
1045-- is semicolon, hash, or star.
1046emptyorcommentlinep :: TextParser m ()
1047emptyorcommentlinep = do
1048  skipNonNewlineSpaces
1049  skiplinecommentp <|> void newline
1050  where
1051    skiplinecommentp :: TextParser m ()
1052    skiplinecommentp = do
1053      satisfy $ \c -> c == ';' || c == '#' || c == '*'
1054      void $ takeWhileP Nothing (\c -> c /= '\n')
1055      optional newline
1056      pure ()
1057
1058{-# INLINABLE emptyorcommentlinep #-}
1059
1060-- A parser combinator for parsing (possibly multiline) comments
1061-- following journal items.
1062--
1063-- Several journal items may be followed by comments, which begin with
1064-- semicolons and extend to the end of the line. Such comments may span
1065-- multiple lines, but comment lines below the journal item must be
1066-- preceded by leading whitespace.
1067--
1068-- This parser combinator accepts a parser that consumes all input up
1069-- until the next newline. This parser should extract the "content" from
1070-- comments. The resulting parser returns this content plus the raw text
1071-- of the comment itself.
1072--
1073-- See followingcommentp for tests.
1074--
1075followingcommentp' :: (Monoid a, Show a) => TextParser m a -> TextParser m (Text, a)
1076followingcommentp' contentp = do
1077  skipNonNewlineSpaces
1078  -- there can be 0 or 1 sameLine
1079  sameLine <- try headerp *> ((:[]) <$> match' contentp) <|> pure []
1080  _ <- eolof
1081  -- there can be 0 or more nextLines
1082  nextLines <- many $
1083    try (skipNonNewlineSpaces1 *> headerp) *> match' contentp <* eolof
1084  let
1085    -- if there's just a next-line comment, insert an empty same-line comment
1086    -- so the next-line comment doesn't get rendered as a same-line comment.
1087    sameLine' | null sameLine && not (null nextLines) = [("",mempty)]
1088              | otherwise = sameLine
1089    (texts, contents) = unzip $ sameLine' ++ nextLines
1090    strippedCommentText = T.unlines $ map T.strip texts
1091    commentContent = mconcat contents
1092  pure (strippedCommentText, commentContent)
1093
1094  where
1095    headerp = char ';' *> skipNonNewlineSpaces
1096
1097{-# INLINABLE followingcommentp' #-}
1098
1099-- | Parse the text of a (possibly multiline) comment following a journal item.
1100--
1101-- >>> rtp followingcommentp ""   -- no comment
1102-- Right ""
1103-- >>> rtp followingcommentp ";"    -- just a (empty) same-line comment. newline is added
1104-- Right "\n"
1105-- >>> rtp followingcommentp ";  \n"
1106-- Right "\n"
1107-- >>> rtp followingcommentp ";\n ;\n"  -- a same-line and a next-line comment
1108-- Right "\n\n"
1109-- >>> rtp followingcommentp "\n ;\n"  -- just a next-line comment. Insert an empty same-line comment so the next-line comment doesn't become a same-line comment.
1110-- Right "\n\n"
1111--
1112followingcommentp :: TextParser m Text
1113followingcommentp =
1114  fst <$> followingcommentp' (void $ takeWhileP Nothing (/= '\n'))  -- XXX support \r\n ?
1115{-# INLINABLE followingcommentp #-}
1116
1117
1118-- | Parse a transaction comment and extract its tags.
1119--
1120-- The first line of a transaction may be followed by comments, which
1121-- begin with semicolons and extend to the end of the line. Transaction
1122-- comments may span multiple lines, but comment lines below the
1123-- transaction must be preceded by leading whitespace.
1124--
1125-- 2000/1/1 ; a transaction comment starting on the same line ...
1126--   ; extending to the next line
1127--   account1  $1
1128--   account2
1129--
1130-- Tags are name-value pairs.
1131--
1132-- >>> let getTags (_,tags) = tags
1133-- >>> let parseTags = fmap getTags . rtp transactioncommentp
1134--
1135-- >>> parseTags "; name1: val1, name2:all this is value2"
1136-- Right [("name1","val1"),("name2","all this is value2")]
1137--
1138-- A tag's name must be immediately followed by a colon, without
1139-- separating whitespace. The corresponding value consists of all the text
1140-- following the colon up until the next colon or newline, stripped of
1141-- leading and trailing whitespace.
1142--
1143transactioncommentp :: TextParser m (Text, [Tag])
1144transactioncommentp = followingcommentp' commenttagsp
1145{-# INLINABLE transactioncommentp #-}
1146
1147commenttagsp :: TextParser m [Tag]
1148commenttagsp = do
1149  tagName <- fmap (last . T.split isSpace)
1150            $ takeWhileP Nothing (\c -> c /= ':' && c /= '\n')
1151  atColon tagName <|> pure [] -- if not ':', then either '\n' or EOF
1152
1153  where
1154    atColon :: Text -> TextParser m [Tag]
1155    atColon name = char ':' *> do
1156      if T.null name
1157        then commenttagsp
1158        else do
1159          skipNonNewlineSpaces
1160          val <- tagValue
1161          let tag = (name, val)
1162          (tag:) <$> commenttagsp
1163
1164    tagValue :: TextParser m Text
1165    tagValue = do
1166      val <- T.strip <$> takeWhileP Nothing (\c -> c /= ',' && c /= '\n')
1167      _ <- optional $ char ','
1168      pure val
1169
1170{-# INLINABLE commenttagsp #-}
1171
1172
1173-- | Parse a posting comment and extract its tags and dates.
1174--
1175-- Postings may be followed by comments, which begin with semicolons and
1176-- extend to the end of the line. Posting comments may span multiple
1177-- lines, but comment lines below the posting must be preceded by
1178-- leading whitespace.
1179--
1180-- 2000/1/1
1181--   account1  $1 ; a posting comment starting on the same line ...
1182--   ; extending to the next line
1183--
1184--   account2
1185--   ; a posting comment beginning on the next line
1186--
1187-- Tags are name-value pairs.
1188--
1189-- >>> let getTags (_,tags,_,_) = tags
1190-- >>> let parseTags = fmap getTags . rtp (postingcommentp Nothing)
1191--
1192-- >>> parseTags "; name1: val1, name2:all this is value2"
1193-- Right [("name1","val1"),("name2","all this is value2")]
1194--
1195-- A tag's name must be immediately followed by a colon, without
1196-- separating whitespace. The corresponding value consists of all the text
1197-- following the colon up until the next colon or newline, stripped of
1198-- leading and trailing whitespace.
1199--
1200-- Posting dates may be expressed with "date"/"date2" tags or with
1201-- bracketed date syntax. Posting dates will inherit their year from the
1202-- transaction date if the year is not specified. We throw parse errors on
1203-- invalid dates.
1204--
1205-- >>> let getDates (_,_,d1,d2) = (d1, d2)
1206-- >>> let parseDates = fmap getDates . rtp (postingcommentp (Just 2000))
1207--
1208-- >>> parseDates "; date: 1/2, date2: 1999/12/31"
1209-- Right (Just 2000-01-02,Just 1999-12-31)
1210-- >>> parseDates "; [1/2=1999/12/31]"
1211-- Right (Just 2000-01-02,Just 1999-12-31)
1212--
1213-- Example: tags, date tags, and bracketed dates
1214-- >>> rtp (postingcommentp (Just 2000)) "; a:b, date:3/4, [=5/6]"
1215-- Right ("a:b, date:3/4, [=5/6]\n",[("a","b"),("date","3/4")],Just 2000-03-04,Just 2000-05-06)
1216--
1217-- Example: extraction of dates from date tags ignores trailing text
1218-- >>> rtp (postingcommentp (Just 2000)) "; date:3/4=5/6"
1219-- Right ("date:3/4=5/6\n",[("date","3/4=5/6")],Just 2000-03-04,Nothing)
1220--
1221postingcommentp
1222  :: Maybe Year -> TextParser m (Text, [Tag], Maybe Day, Maybe Day)
1223postingcommentp mYear = do
1224  (commentText, (tags, dateTags)) <-
1225    followingcommentp' (commenttagsanddatesp mYear)
1226  let mdate  = fmap snd $ find ((=="date") .fst) dateTags
1227      mdate2 = fmap snd $ find ((=="date2").fst) dateTags
1228  pure (commentText, tags, mdate, mdate2)
1229{-# INLINABLE postingcommentp #-}
1230
1231
1232commenttagsanddatesp
1233  :: Maybe Year -> TextParser m ([Tag], [DateTag])
1234commenttagsanddatesp mYear = do
1235  (txt, dateTags) <- match $ readUpTo ':'
1236  -- next char is either ':' or '\n' (or EOF)
1237  let tagName = last (T.split isSpace txt)
1238  (fmap.second) (dateTags++) (atColon tagName) <|> pure ([], dateTags) -- if not ':', then either '\n' or EOF
1239
1240  where
1241    readUpTo :: Char -> TextParser m [DateTag]
1242    readUpTo end = do
1243      void $ takeWhileP Nothing (\c -> c /= end && c /= '\n' && c /= '[')
1244      -- if not '[' then ':' or '\n' or EOF
1245      atBracket (readUpTo end) <|> pure []
1246
1247    atBracket :: TextParser m [DateTag] -> TextParser m [DateTag]
1248    atBracket cont = do
1249      -- Uses the fact that bracketed date-tags cannot contain newlines
1250      dateTags <- option [] $ lookAhead (bracketeddatetagsp mYear)
1251      _ <- char '['
1252      dateTags' <- cont
1253      pure $ dateTags ++ dateTags'
1254
1255    atColon :: Text -> TextParser m ([Tag], [DateTag])
1256    atColon name = char ':' *> do
1257      skipNonNewlineSpaces
1258      (tags, dateTags) <- case name of
1259        ""      -> pure ([], [])
1260        "date"  -> dateValue name
1261        "date2" -> dateValue name
1262        _       -> tagValue name
1263      _ <- optional $ char ','
1264      bimap (tags++) (dateTags++) <$> commenttagsanddatesp mYear
1265
1266    dateValue :: Text -> TextParser m ([Tag], [DateTag])
1267    dateValue name = do
1268      (txt, (date, dateTags)) <- match' $ do
1269        date <- datep' mYear
1270        dateTags <- readUpTo ','
1271        pure (date, dateTags)
1272      let val = T.strip txt
1273      pure $ ( [(name, val)]
1274             , (name, date) : dateTags )
1275
1276    tagValue :: Text -> TextParser m ([Tag], [DateTag])
1277    tagValue name = do
1278      (txt, dateTags) <- match' $ readUpTo ','
1279      let val = T.strip txt
1280      pure $ ( [(name, val)]
1281             , dateTags )
1282
1283{-# INLINABLE commenttagsanddatesp #-}
1284
1285-- | Parse Ledger-style bracketed posting dates ([DATE=DATE2]), as
1286-- "date" and/or "date2" tags. Anything that looks like an attempt at
1287-- this (a square-bracketed sequence of 0123456789/-.= containing at
1288-- least one digit and one date separator) is also parsed, and will
1289-- throw an appropriate error.
1290--
1291-- The dates are parsed in full here so that errors are reported in
1292-- the right position. A missing year in DATE can be inferred if a
1293-- default date is provided. A missing year in DATE2 will be inferred
1294-- from DATE.
1295--
1296-- >>> either (Left . customErrorBundlePretty) Right $ rtp (bracketeddatetagsp Nothing) "[2016/1/2=3/4]"
1297-- Right [("date",2016-01-02),("date2",2016-03-04)]
1298--
1299-- >>> either (Left . customErrorBundlePretty) Right $ rtp (bracketeddatetagsp Nothing) "[1]"
1300-- Left ...not a bracketed date...
1301--
1302-- >>> either (Left . customErrorBundlePretty) Right $ rtp (bracketeddatetagsp Nothing) "[2016/1/32]"
1303-- Left ...1:2:...well-formed but invalid date: 2016/1/32...
1304--
1305-- >>> either (Left . customErrorBundlePretty) Right $ rtp (bracketeddatetagsp Nothing) "[1/31]"
1306-- Left ...1:2:...partial date 1/31 found, but the current year is unknown...
1307--
1308-- >>> either (Left . customErrorBundlePretty) Right $ rtp (bracketeddatetagsp Nothing) "[0123456789/-.=/-.=]"
1309-- Left ...1:13:...expecting month or day...
1310--
1311bracketeddatetagsp
1312  :: Maybe Year -> TextParser m [(TagName, Day)]
1313bracketeddatetagsp mYear1 = do
1314  -- dbgparse 0 "bracketeddatetagsp"
1315  try $ do
1316    s <- lookAhead
1317       $ between (char '[') (char ']')
1318       $ takeWhile1P Nothing isBracketedDateChar
1319    unless (T.any isDigit s && T.any isDateSepChar s) $
1320      Fail.fail "not a bracketed date"
1321  -- Looks sufficiently like a bracketed date to commit to parsing a date
1322
1323  between (char '[') (char ']') $ do
1324    md1 <- optional $ datep' mYear1
1325
1326    let mYear2 = fmap readYear md1 <|> mYear1
1327    md2 <- optional $ char '=' *> datep' mYear2
1328
1329    pure $ catMaybes [("date",) <$> md1, ("date2",) <$> md2]
1330
1331  where
1332    readYear = first3 . toGregorian
1333    isBracketedDateChar c = isDigit c || isDateSepChar c || c == '='
1334
1335{-# INLINABLE bracketeddatetagsp #-}
1336
1337--- ** tests
1338
1339tests_Common = tests "Common" [
1340
1341   tests "amountp" [
1342    test "basic"                  $ assertParseEq amountp "$47.18"     (usd 47.18)
1343   ,test "ends with decimal mark" $ assertParseEq amountp "$1."        (usd 1  `withPrecision` Precision 0)
1344   ,test "unit price"             $ assertParseEq amountp "$10 @ €0.5"
1345      -- not precise enough:
1346      -- (usd 10 `withPrecision` 0 `at` (eur 0.5 `withPrecision` 1)) -- `withStyle` asdecimalpoint=Just '.'
1347      amount{
1348         acommodity="$"
1349        ,aquantity=10 -- need to test internal precision with roundTo ? I think not
1350        ,astyle=amountstyle{asprecision=Precision 0, asdecimalpoint=Nothing}
1351        ,aprice=Just $ UnitPrice $
1352          amount{
1353             acommodity="€"
1354            ,aquantity=0.5
1355            ,astyle=amountstyle{asprecision=Precision 1, asdecimalpoint=Just '.'}
1356            }
1357        }
1358   ,test "total price"            $ assertParseEq amountp "$10 @@ €5"
1359      amount{
1360         acommodity="$"
1361        ,aquantity=10
1362        ,astyle=amountstyle{asprecision=Precision 0, asdecimalpoint=Nothing}
1363        ,aprice=Just $ TotalPrice $
1364          amount{
1365             acommodity="€"
1366            ,aquantity=5
1367            ,astyle=amountstyle{asprecision=Precision 0, asdecimalpoint=Nothing}
1368            }
1369        }
1370   ,test "unit price, parenthesised" $ assertParse amountp "$10 (@) €0.5"
1371   ,test "total price, parenthesised" $ assertParse amountp "$10 (@@) €0.5"
1372   ]
1373
1374  ,let p = lift (numberp Nothing) :: JournalParser IO (Quantity, Word8, Maybe Char, Maybe DigitGroupStyle) in
1375   test "numberp" $ do
1376     assertParseEq p "0"          (0, 0, Nothing, Nothing)
1377     assertParseEq p "1"          (1, 0, Nothing, Nothing)
1378     assertParseEq p "1.1"        (1.1, 1, Just '.', Nothing)
1379     assertParseEq p "1,000.1"    (1000.1, 1, Just '.', Just $ DigitGroups ',' [3])
1380     assertParseEq p "1.00.000,1" (100000.1, 1, Just ',', Just $ DigitGroups '.' [3,2])
1381     assertParseEq p "1,000,000"  (1000000, 0, Nothing, Just $ DigitGroups ',' [3,3])  -- could be simplified to [3]
1382     assertParseEq p "1."         (1, 0, Just '.', Nothing)
1383     assertParseEq p "1,"         (1, 0, Just ',', Nothing)
1384     assertParseEq p ".1"         (0.1, 1, Just '.', Nothing)
1385     assertParseEq p ",1"         (0.1, 1, Just ',', Nothing)
1386     assertParseError p "" ""
1387     assertParseError p "1,000.000,1" ""
1388     assertParseError p "1.000,000.1" ""
1389     assertParseError p "1,000.000.1" ""
1390     assertParseError p "1,,1" ""
1391     assertParseError p "1..1" ""
1392     assertParseError p ".1," ""
1393     assertParseError p ",1." ""
1394     assertParseEq    p "1.555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555" (1.555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555, 255, Just '.', Nothing)
1395     assertParseError p "1.5555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555555" ""
1396
1397  ,tests "spaceandamountormissingp" [
1398     test "space and amount" $ assertParseEq spaceandamountormissingp " $47.18" (Mixed [usd 47.18])
1399    ,test "empty string" $ assertParseEq spaceandamountormissingp "" missingmixedamt
1400    -- ,test "just space" $ assertParseEq spaceandamountormissingp " " missingmixedamt  -- XXX should it ?
1401    -- ,test "just amount" $ assertParseError spaceandamountormissingp "$47.18" ""  -- succeeds, consuming nothing
1402    ]
1403
1404  ]
1405