1{-# LANGUAGE ViewPatterns      #-}
2{-# LANGUAGE LambdaCase #-}
3{-# LANGUAGE OverloadedStrings #-}
4{-# LANGUAGE ScopedTypeVariables #-}
5{-# LANGUAGE FlexibleContexts #-}
6{-# OPTIONS_GHC -fno-warn-unused-do-bind #-}
7-----------------------------------------------------------------------------
8-- |
9-- Module      :  Text.CSL.Input.Bibtex
10-- Copyright   :  (c) John MacFarlane
11-- License     :  BSD-style (see LICENSE)
12--
13-- Maintainer  :  John MacFarlane <fiddlosopher@gmail.com>
14-- Stability   :  unstable-- Portability :  unportable
15--
16-----------------------------------------------------------------------------
17
18module Text.Pandoc.Citeproc.BibTeX
19    ( Variant(..)
20    , readBibtexString
21    , writeBibtexString
22    )
23    where
24
25import Text.Pandoc.Definition
26import Text.Pandoc.Builder as B
27import Text.Pandoc.Readers.LaTeX (readLaTeX)
28import Text.Pandoc.Extensions (Extension(..), extensionsFromList)
29import Text.Pandoc.Options (ReaderOptions(..), WriterOptions)
30import Text.Pandoc.Error (PandocError)
31import Text.Pandoc.Shared (stringify)
32import Text.Pandoc.Writers.LaTeX (writeLaTeX)
33import Text.Pandoc.Class (runPure)
34import qualified Text.Pandoc.Walk       as Walk
35import Citeproc.Types
36import Citeproc.Pandoc ()
37import Text.Pandoc.Citeproc.Util (toIETF)
38import Text.Pandoc.Citeproc.Data (biblatexStringMap)
39import Data.Default
40import           Data.Text              (Text)
41import qualified Data.Text              as T
42import qualified Data.Map               as Map
43import           Data.Maybe
44import           Text.Pandoc.Parsing hiding ((<|>), many)
45import           Control.Applicative
46import           Data.List.Split        (splitOn, splitWhen, wordsBy)
47import           Control.Monad.RWS      hiding ((<>))
48import qualified Data.Sequence          as Seq
49import           Data.Char              (isAlphaNum, isDigit, isLetter,
50                                         isUpper, toLower, toUpper,
51                                         isLower, isPunctuation)
52import           Data.List              (foldl', intercalate, intersperse)
53import           Safe                   (readMay)
54import           Text.Printf            (printf)
55
56data Variant = Bibtex | Biblatex
57  deriving (Show, Eq, Ord)
58
59-- | Parse BibTeX or BibLaTeX into a list of 'Reference's.
60readBibtexString :: Variant           -- ^ bibtex or biblatex
61                 -> Locale            -- ^ Locale
62                 -> (Text -> Bool)    -- ^ Filter on citation ids
63                 -> Text              -- ^ bibtex/biblatex text
64                 -> Either ParseError [Reference Inlines]
65readBibtexString variant locale idpred contents = do
66  case runParser (((resolveCrossRefs variant <$> bibEntries) <* eof) >>=
67                   mapM (itemToReference locale variant) .
68                      filter (\item -> idpred (identifier item) &&
69                                        entryType item /= "xdata"))
70           (fromMaybe defaultLang $ localeLanguage locale, Map.empty)
71           "" contents of
72          Left err -> Left err
73          Right xs -> return xs
74
75-- | Write BibTeX or BibLaTeX given given a 'Reference'.
76writeBibtexString :: WriterOptions       -- ^ options (for writing LaTex)
77                  -> Variant             -- ^ bibtex or biblatex
78                  -> Maybe Lang          -- ^ Language
79                  -> Reference Inlines   -- ^ Reference to write
80                  -> Text
81writeBibtexString opts variant mblang ref =
82  "@" <> bibtexType <> "{" <> unItemId (referenceId ref) <> ",\n  " <>
83  renderFields fs <> "\n}\n"
84
85 where
86  bibtexType =
87    case referenceType ref of
88      "article-magazine"  -> "article"
89      "article-newspaper" -> "article"
90      "article-journal"   -> "article"
91      "book"              -> "book"
92      "pamphlet"          -> "booklet"
93      "dataset" | variant == Biblatex -> "dataset"
94      "webpage" | variant == Biblatex -> "online"
95      "chapter"           -> case getVariable "editor" of
96                                Just _  -> "incollection"
97                                Nothing -> "inbook"
98      "entry-encyclopedia" | variant == Biblatex -> "inreference"
99                           | otherwise -> "inbook"
100      "paper-conference"  -> "inproceedings"
101      "thesis" -> case getVariableAsText "genre" of
102                    Just "mathesis" -> "mastersthesis"
103                    _               -> "phdthesis"
104      "patent"            | variant == Biblatex -> "patent"
105      "report"            | variant == Biblatex -> "report"
106                          | otherwise -> "techreport"
107      "speech"            -> "unpublished"
108      "manuscript"        -> "unpublished"
109      "graphic"           | variant == Biblatex -> "artwork"
110      "song"              | variant == Biblatex -> "music"
111      "legal_case"        | variant == Biblatex -> "jurisdictionN"
112      "legislation"       | variant == Biblatex -> "legislation"
113      "treaty"            | variant == Biblatex -> "legal"
114      "personal_communication" | variant == Biblatex -> "letter"
115      "motion_picture"    | variant == Biblatex -> "movie"
116      "review"             | variant == Biblatex -> "review"
117      _                   -> "misc"
118
119  mbSubtype =
120    case referenceType ref of
121      "article-magazine"  -> Just "magazine"
122      "article-newspaper" -> Just "newspaper"
123      _ -> Nothing
124
125  fs =
126    case variant of
127      Biblatex ->
128           [ "author"
129           , "editor"
130           , "translator"
131           , "publisher"
132           , "title"
133           , "booktitle"
134           , "journal"
135           , "series"
136           , "edition"
137           , "volume"
138           , "volumes"
139           , "number"
140           , "pages"
141           , "date"
142           , "eventdate"
143           , "urldate"
144           , "address"
145           , "url"
146           , "doi"
147           , "isbn"
148           , "issn"
149           , "type"
150           , "entrysubtype"
151           , "note"
152           , "language"
153           , "abstract"
154           , "keywords"
155           ]
156      Bibtex ->
157           [ "author"
158           , "editor"
159           , "translator"
160           , "publisher"
161           , "title"
162           , "booktitle"
163           , "journal"
164           , "series"
165           , "edition"
166           , "volume"
167           , "number"
168           , "pages"
169           , "year"
170           , "month"
171           , "address"
172           , "type"
173           , "note"
174           ]
175
176  valToInlines (TextVal t) = B.text t
177  valToInlines (FancyVal ils) = ils
178  valToInlines (NumVal n) = B.text (T.pack $ show n)
179  valToInlines (NamesVal names) =
180    mconcat $ intersperse (B.space <> B.text "and" <> B.space)
181            $ map renderName names
182  valToInlines (DateVal date) = B.text $
183    case dateLiteral date of
184      Just t  -> t
185      Nothing -> T.intercalate "/" (map renderDatePart (dateParts date)) <>
186                    (if dateCirca date then "~" else mempty)
187
188  renderDatePart (DateParts xs) = T.intercalate "-" $
189                                    map (T.pack . printf "%02d") xs
190
191  renderName name =
192    case nameLiteral name of
193      Just t  -> B.text t
194      Nothing -> spacedMaybes
195                  [ nameNonDroppingParticle name
196                  , nameFamily name
197                  , if nameCommaSuffix name
198                        then (", " <>) <$> nameSuffix name
199                        else nameSuffix name ]
200                  <>
201                  spacedMaybes
202                   [ (", " <>) <$> nameGiven name,
203                     nameDroppingParticle name ]
204
205  titlecase = case mblang of
206                Just (Lang "en" _) -> titlecase'
207                Nothing            -> titlecase'
208                _                  -> id
209
210  titlecase' = addTextCase mblang TitleCase .
211    (\ils -> B.fromList
212               (case B.toList ils of
213                  Str t : xs -> Str t : Walk.walk spanAroundCapitalizedWords xs
214                  xs         -> Walk.walk spanAroundCapitalizedWords xs))
215
216  -- protect capitalized words when we titlecase
217  spanAroundCapitalizedWords (Str t)
218    | not (T.all (\c -> isLower c || not (isLetter c)) t) =
219       Span ("",["nocase"],[]) [Str t]
220  spanAroundCapitalizedWords x = x
221
222  spacedMaybes = mconcat . intersperse B.space . mapMaybe (fmap B.text)
223
224  toLaTeX x =
225    case runPure (writeLaTeX opts $ doc (B.plain x)) of
226           Left _  -> Nothing
227           Right t -> Just t
228
229  renderField name = (\contents -> name <> " = {" <> contents <> "}")
230                      <$> getContentsFor name
231
232  getVariable v = lookupVariable (toVariable v) ref
233
234  getVariableAsText v = (stringify . valToInlines) <$> getVariable v
235
236  getYear val =
237    case val of
238       DateVal date ->
239         case dateLiteral date of
240           Just t -> toLaTeX (B.text t)
241           Nothing ->
242             case dateParts date of
243               [DateParts (y1:_), DateParts (y2:_)] ->
244                 Just (T.pack (printf "%04d" y1) <> "--" <>
245                        T.pack (printf "%04d" y2))
246               [DateParts (y1:_)] ->
247                 Just (T.pack (printf "%04d" y1))
248               _ -> Nothing
249       _ -> Nothing
250
251  toMonth 1 = "jan"
252  toMonth 2 = "feb"
253  toMonth 3 = "mar"
254  toMonth 4 = "apr"
255  toMonth 5 = "may"
256  toMonth 6 = "jun"
257  toMonth 7 = "jul"
258  toMonth 8 = "aug"
259  toMonth 9 = "sep"
260  toMonth 10 = "oct"
261  toMonth 11 = "nov"
262  toMonth 12 = "dec"
263  toMonth x  = T.pack $ show x
264
265  getMonth val =
266    case val of
267       DateVal date ->
268         case dateParts date of
269           [DateParts (_:m1:_), DateParts (_:m2:_)] ->
270             Just (toMonth m1 <> "--" <> toMonth m2)
271           [DateParts (_:m1:_)] -> Just (toMonth m1)
272           _ -> Nothing
273       _ -> Nothing
274
275  getContentsFor :: Text -> Maybe Text
276  getContentsFor "type" =
277    getVariableAsText "genre" >>=
278       \case
279          "mathesis"  -> Just "mastersthesis"
280          "phdthesis" -> Just "phdthesis"
281          _           -> Nothing
282  getContentsFor "entrysubtype" = mbSubtype
283  getContentsFor "journal"
284    | bibtexType `elem` ["article", "periodical", "suppperiodical", "review"]
285      = getVariable "container-title" >>= toLaTeX . valToInlines
286    | otherwise = Nothing
287  getContentsFor "booktitle"
288    | bibtexType `elem`
289       ["inbook","incollection","inproceedings","inreference","bookinbook"]
290    = (getVariable "volume-title" <|> getVariable "container-title")
291                               >>= toLaTeX . valToInlines
292    | otherwise = Nothing
293  getContentsFor "series" = getVariable "collection-title"
294                               >>= toLaTeX . valToInlines
295  getContentsFor "address" = getVariable "publisher-place"
296                               >>= toLaTeX . valToInlines
297  getContentsFor "date"  = getVariable "issued" >>= toLaTeX . valToInlines
298  getContentsFor "eventdate" = getVariable "event-date" >>= toLaTeX . valToInlines
299  getContentsFor "urldate"  = getVariable "accessed" >>= toLaTeX . valToInlines
300  getContentsFor "year"  = getVariable "issued" >>= getYear
301  getContentsFor "month"  = getVariable "issued" >>= getMonth
302  getContentsFor "number" = (getVariable "number"
303                         <|> getVariable "collection-number"
304                         <|> getVariable "issue") >>= toLaTeX . valToInlines
305
306  getContentsFor x = getVariable x >>=
307    if isURL x
308       then Just . stringify . valToInlines
309       else toLaTeX .
310            (if x == "title"
311                then titlecase
312                else id) .
313            valToInlines
314
315  isURL x = x `elem` ["url","doi","issn","isbn"]
316
317  renderFields = T.intercalate ",\n  " . mapMaybe renderField
318
319defaultLang :: Lang
320defaultLang = Lang "en" (Just "US")
321
322-- a map of bibtex "string" macros
323type StringMap = Map.Map Text Text
324
325type BibParser = Parser Text (Lang, StringMap)
326
327data Item = Item{ identifier :: Text
328                , sourcePos  :: SourcePos
329                , entryType  :: Text
330                , fields     :: Map.Map Text Text
331                }
332                deriving (Show, Ord, Eq)
333
334itemToReference :: Locale -> Variant -> Item -> BibParser (Reference Inlines)
335itemToReference locale variant item = do
336  setPosition (sourcePos item)
337  bib item $ do
338    let lang = fromMaybe defaultLang $ localeLanguage locale
339    modify $ \st -> st{ localeLang = lang,
340                        untitlecase = case lang of
341                                           (Lang "en" _) -> True
342                                           _             -> False }
343
344    id' <- asks identifier
345    otherIds <- (Just <$> getRawField "ids")
346                  <|> return Nothing
347    (reftype, genre) <- getTypeAndGenre
348    -- hyphenation:
349    let getLangId = do
350             langid <- T.strip . T.toLower <$> getRawField "langid"
351             idopts <- T.strip . T.toLower . stringify <$>
352                           getField "langidopts" <|> return ""
353             case (langid, idopts) of
354                  ("english","variant=british")    -> return "british"
355                  ("english","variant=american")   -> return "american"
356                  ("english","variant=us")         -> return "american"
357                  ("english","variant=usmax")      -> return "american"
358                  ("english","variant=uk")         -> return "british"
359                  ("english","variant=australian") -> return "australian"
360                  ("english","variant=newzealand") -> return "newzealand"
361                  (x,_)                            -> return x
362    hyphenation <- (Just . toIETF . T.toLower <$>
363                     (getLangId <|> getRawField "hyphenation"))
364                  <|> return Nothing
365    modify $ \s -> s{ untitlecase = untitlecase s &&
366                                      case hyphenation of
367                                        Just x -> "en-" `T.isPrefixOf` x
368                                        _ -> True }
369
370
371    opts <- (parseOptions <$> getRawField "options") <|> return []
372
373    et <- asks entryType
374
375    -- titles
376    let isArticle = et `elem`
377                     ["article", "periodical", "suppperiodical", "review"]
378    let isPeriodical = et == "periodical"
379    let isChapterlike = et `elem`
380           ["inbook","incollection","inproceedings","inreference","bookinbook"]
381
382    let getFieldMaybe f = (Just <$> getField f) <|> return Nothing
383
384    -- names
385    let getNameList' f = Just <$>
386         getNameList (("bibtex", case variant of
387                                      Bibtex   -> "true"
388                                      Biblatex -> "false") : opts) f
389
390    author' <- getNameList' "author" <|> return Nothing
391    containerAuthor' <- getNameList' "bookauthor" <|> return Nothing
392    translator' <- getNameList' "translator" <|> return Nothing
393    editortype <- getRawField "editortype" <|> return mempty
394    editor'' <- getNameList' "editor" <|> return Nothing
395    director'' <- getNameList' "director" <|> return Nothing
396    let (editor', director') = case editortype of
397                                    "director" -> (Nothing, editor'')
398                                    _          -> (editor'', director'')
399    -- FIXME: add same for editora, editorb, editorc
400
401    -- dates
402    issued' <- (Just <$> (getDate "date" <|> getOldDate mempty)) <|>
403               return Nothing
404    eventDate' <- (Just <$> (getDate "eventdate" <|> getOldDate "event")) <|>
405                   return Nothing
406    origDate' <- (Just <$> (getDate "origdate" <|> getOldDate "orig")) <|>
407                   return Nothing
408    accessed' <- (Just <$> (getDate "urldate" <|> getOldDate "url")) <|>
409                    return Nothing
410
411    -- locators
412    pages' <- getFieldMaybe "pages"
413    volume' <- getFieldMaybe "volume"
414    part' <- getFieldMaybe "part"
415    volumes' <- getFieldMaybe "volumes"
416    pagetotal' <- getFieldMaybe "pagetotal"
417    chapter' <- getFieldMaybe "chapter"
418    edition' <- getFieldMaybe "edition"
419    version' <- getFieldMaybe "version"
420    (number', collectionNumber', issue') <-
421       (getField "number" >>= \x ->
422         if et `elem` ["book","collection","proceedings","reference",
423                       "mvbook","mvcollection","mvproceedings", "mvreference",
424                       "bookinbook","inbook", "incollection","inproceedings",
425                       "inreference", "suppbook","suppcollection"]
426         then return (Nothing, Just x, Nothing)
427         else if isArticle
428              then (getField "issue" >>= \y ->
429                      return (Nothing, Nothing, Just $ concatWith ',' [x,y]))
430                 <|> return (Nothing, Nothing, Just x)
431              else return (Just x, Nothing, Nothing))
432        <|> return (Nothing, Nothing, Nothing)
433
434    -- titles
435    hasMaintitle <- (True <$ getRawField "maintitle") <|> return False
436
437    title' <- Just <$>
438              ((guard isPeriodical >> getTitle "issuetitle")
439              <|> (guard hasMaintitle >>
440                   guard (not isChapterlike) >>
441                   getTitle "maintitle")
442              <|> getTitle "title")
443              <|> return Nothing
444
445    subtitle' <- (guard isPeriodical >> getTitle "issuesubtitle")
446                  <|> (guard hasMaintitle >>
447                       guard (not isChapterlike) >>
448                       getTitle "mainsubtitle")
449                  <|> getTitle "subtitle"
450                  <|> return mempty
451    titleaddon' <- (guard hasMaintitle >>
452                     guard (not isChapterlike) >>
453                     getTitle "maintitleaddon")
454                    <|> getTitle "titleaddon"
455                    <|> return mempty
456
457    volumeTitle' <- Just <$>
458                    ((guard hasMaintitle >>
459                      guard (not isChapterlike) >>
460                      getTitle "title")
461                     <|> (guard hasMaintitle >>
462                          guard isChapterlike >>
463                          getTitle "booktitle"))
464                    <|> return Nothing
465    volumeSubtitle' <- (guard hasMaintitle >>
466                        guard (not isChapterlike) >>
467                        getTitle "subtitle")
468                       <|> (guard hasMaintitle >>
469                            guard isChapterlike >>
470                            getTitle "booksubtitle")
471                       <|> return mempty
472    volumeTitleAddon' <- (guard hasMaintitle >>
473                          guard (not isChapterlike) >>
474                          getTitle "titleaddon")
475                         <|> (guard hasMaintitle >>
476                              guard isChapterlike >>
477                              getTitle "booktitleaddon")
478                         <|> return mempty
479
480    containerTitle' <- Just <$>
481                       ((guard isPeriodical >> getPeriodicalTitle "title")
482                       <|> (guard isChapterlike >> getTitle "maintitle")
483                       <|> (guard isChapterlike >> getTitle "booktitle")
484                       <|> getPeriodicalTitle "journaltitle"
485                       <|> getPeriodicalTitle "journal")
486                       <|> return Nothing
487    containerSubtitle' <- (guard isPeriodical >> getPeriodicalTitle "subtitle")
488                          <|> (guard isChapterlike >> getTitle "mainsubtitle")
489                          <|> (guard isChapterlike >> getTitle "booksubtitle")
490                          <|> getPeriodicalTitle "journalsubtitle"
491                          <|> return mempty
492    containerTitleAddon' <- (guard isPeriodical >>
493                             getPeriodicalTitle "titleaddon")
494                            <|> (guard isChapterlike >>
495                                 getTitle "maintitleaddon")
496                            <|> (guard isChapterlike >>
497                                 getTitle "booktitleaddon")
498                            <|> return mempty
499    containerTitleShort' <- Just <$>
500                            ((guard isPeriodical >>
501                              guard (not hasMaintitle) >>
502                              getField "shorttitle")
503                            <|> getPeriodicalTitle "shortjournal")
504                           <|> return Nothing
505
506    -- change numerical series title to e.g. 'series 3'
507    let fixSeriesTitle [Str xs] | isNumber xs =
508          [Str (ordinalize locale xs), Space, Str (resolveKey' lang "jourser")]
509        fixSeriesTitle xs = xs
510    seriesTitle' <- (Just . B.fromList . fixSeriesTitle .
511                     B.toList . resolveKey lang <$>
512                        getTitle "series") <|>
513                    return Nothing
514    shortTitle' <- (Just <$> (guard (not hasMaintitle || isChapterlike) >>
515                              getTitle "shorttitle"))
516                 <|> (if (subtitle' /= mempty || titleaddon' /= mempty) &&
517                          not hasMaintitle
518                          then getShortTitle False "title"
519                          else getShortTitle True  "title")
520                 <|> return Nothing
521
522    eventTitle' <- Just <$> getTitle "eventtitle" <|> return Nothing
523    origTitle' <- Just <$> getTitle "origtitle" <|> return Nothing
524
525    -- publisher
526    pubfields <- mapM (\f -> Just `fmap`
527                         (if variant == Bibtex || f == "howpublished"
528                          then getField f
529                          else getLiteralList' f)
530                        <|> return Nothing)
531           ["school","institution","organization", "howpublished","publisher"]
532    let publisher' = case catMaybes pubfields of
533                       [] -> Nothing
534                       xs -> Just $ concatWith ';' xs
535    origpublisher' <- (Just <$> getField "origpublisher") <|> return Nothing
536
537    -- places
538    venue' <- (Just <$> getField "venue") <|> return Nothing
539    address' <- Just <$>
540                  (if variant == Bibtex
541                      then getField "address"
542                      else getLiteralList' "address"
543                         <|> (guard (et /= "patent") >>
544                              getLiteralList' "location"))
545                <|> return Nothing
546    origLocation' <- Just <$>
547                  (if variant == Bibtex
548                      then getField "origlocation"
549                      else getLiteralList' "origlocation")
550                    <|> return Nothing
551    jurisdiction' <- if reftype == "patent"
552                     then Just <$>
553                        (concatWith ';' . map (resolveKey lang) <$>
554                             getLiteralList "location") <|> return Nothing
555                     else return Nothing
556
557    -- url, doi, isbn, etc.:
558    -- note that with eprinttype = arxiv, we take eprint to be a partial url
559    -- archivePrefix is an alias for eprinttype
560    url' <- (guard (et == "online" || lookup "url" opts /= Just "false")
561             >> Just <$> getRawField "url")
562         <|> (do etype <- getRawField "eprinttype"
563                 eprint <- getRawField "eprint"
564                 let baseUrl =
565                       case T.toLower etype of
566                         "arxiv"       -> "http://arxiv.org/abs/"
567                         "jstor"       -> "http://www.jstor.org/stable/"
568                         "pubmed"      -> "http://www.ncbi.nlm.nih.gov/pubmed/"
569                         "googlebooks" -> "http://books.google.com?id="
570                         _             -> ""
571                 if T.null baseUrl
572                    then mzero
573                    else return $ Just $ baseUrl <> eprint)
574         <|> return Nothing
575    doi' <- (guard (lookup "doi" opts /= Just "false") >>
576             Just <$> getRawField "doi")
577           <|> return Nothing
578    isbn' <- Just <$> getRawField "isbn" <|> return Nothing
579    issn' <- Just <$> getRawField "issn" <|> return Nothing
580    pmid' <- Just <$> getRawField  "pmid" <|> return Nothing
581    pmcid' <- Just <$> getRawField "pmcid" <|> return Nothing
582    callNumber' <- Just <$> getRawField "library" <|> return Nothing
583
584    -- notes
585    annotation' <- Just <$>
586                   (getField "annotation" <|> getField "annote")
587                     <|> return Nothing
588    abstract' <- Just <$> getField "abstract" <|> return Nothing
589    keywords' <- Just <$> getField "keywords" <|> return Nothing
590    note' <- if et == "periodical"
591             then return Nothing
592             else Just <$> getField "note" <|> return Nothing
593    addendum' <- if variant == Bibtex
594                    then return Nothing
595                    else Just <$> getField "addendum"
596                 <|> return Nothing
597    pubstate' <- (  (Just . resolveKey lang <$> getField "pubstate")
598                  <|> case dateLiteral <$> issued' of
599                           Just (Just "forthcoming") ->
600                             return $ Just $ B.str "forthcoming"
601                           _ -> return Nothing
602                   )
603
604
605
606
607    let addField (_, Nothing) = id
608        addField (f, Just x)  = Map.insert f x
609    let vars = foldr addField mempty
610                [ ("other-ids", TextVal <$> otherIds)
611                , ("genre", TextVal <$> genre)
612                , ("language", TextVal <$> hyphenation)
613                -- dates
614                , ("accessed", DateVal <$> accessed')
615                , ("event-date", DateVal <$> eventDate')
616                , ("issued", DateVal <$> issued')
617                , ("original-date", DateVal <$> origDate')
618                -- names
619                , ("author", NamesVal <$> author')
620                , ("editor", NamesVal <$> editor')
621                , ("translator", NamesVal <$> translator')
622                , ("director", NamesVal <$> director')
623                , ("container-author", NamesVal <$> containerAuthor')
624                -- locators
625                , ("page", FancyVal . Walk.walk convertEnDash <$> pages')
626                , ("number-of-pages", FancyVal <$> pagetotal')
627                , ("volume", case (volume', part') of
628                               (Nothing, Nothing) -> Nothing
629                               (Just v, Nothing) -> Just $ FancyVal v
630                               (Nothing, Just p) -> Just $ FancyVal p
631                               (Just v, Just p)  ->
632                                 Just $ FancyVal $ v <> B.str "." <> p)
633                , ("number-of-volumes", FancyVal <$> volumes')
634                , ("chapter-number", FancyVal <$> chapter')
635                , ("edition", FancyVal <$> edition')
636                , ("version", FancyVal <$> version')
637                , ("number", FancyVal <$> number')
638                , ("collection-number", FancyVal <$> collectionNumber')
639                , ("issue", FancyVal <$> issue')
640                -- title
641                , ("original-title", FancyVal <$> origTitle')
642                , ("event", FancyVal <$> eventTitle')
643                , ("title", case title' of
644                              Just t -> Just $ FancyVal $
645                                         concatWith '.' [
646                                             concatWith ':' [t, subtitle']
647                                           , titleaddon' ]
648                              Nothing -> Nothing)
649                , ("volume-title",
650                            case volumeTitle' of
651                              Just t -> Just $ FancyVal $
652                                         concatWith '.' [
653                                             concatWith ':' [t, volumeSubtitle']
654                                           , volumeTitleAddon' ]
655                              Nothing -> Nothing)
656                , ("container-title",
657                            case containerTitle' of
658                              Just t -> Just $ FancyVal $
659                                         concatWith '.' [
660                                             concatWith ':' [t,
661                                               containerSubtitle']
662                                           , containerTitleAddon' ]
663                              Nothing -> Nothing)
664                , ("container-title-short", FancyVal <$> containerTitleShort')
665                , ("collection-title", FancyVal <$> seriesTitle')
666                , ("title-short", FancyVal <$> shortTitle')
667                -- publisher
668                , ("publisher", FancyVal <$> publisher')
669                , ("original-publisher", FancyVal <$> origpublisher')
670                -- places
671                , ("jurisdiction", FancyVal <$> jurisdiction')
672                , ("event-place",  FancyVal <$> venue')
673                , ("publisher-place", FancyVal <$> address')
674                , ("original-publisher-place", FancyVal <$> origLocation')
675                -- urls
676                , ("url", TextVal <$> url')
677                , ("doi", TextVal <$> doi')
678                , ("isbn", TextVal <$> isbn')
679                , ("issn", TextVal <$> issn')
680                , ("pmcid", TextVal <$> pmcid')
681                , ("pmid", TextVal <$> pmid')
682                , ("call-number", TextVal <$> callNumber')
683                -- notes
684                , ("note", case catMaybes [note', addendum'] of
685                             [] -> Nothing
686                             xs -> return $ FancyVal $ concatWith '.' xs)
687                , ("annote", FancyVal <$> annotation')
688                , ("abstract", FancyVal <$> abstract')
689                , ("keyword", FancyVal <$> keywords')
690                , ("status", FancyVal <$> pubstate')
691                ]
692    return $ Reference
693      { referenceId             = ItemId id'
694      , referenceType           = reftype
695      , referenceDisambiguation = Nothing
696      , referenceVariables      = vars }
697
698
699bib :: Item -> Bib a -> BibParser a
700bib entry m = fst <$> evalRWST m entry (BibState True (Lang "en" (Just "US")))
701
702resolveCrossRefs :: Variant -> [Item] -> [Item]
703resolveCrossRefs variant entries =
704  map (resolveCrossRef variant entries) entries
705
706resolveCrossRef :: Variant -> [Item] -> Item -> Item
707resolveCrossRef variant entries entry =
708  Map.foldrWithKey go entry (fields entry)
709  where go key val entry' =
710          if key == "crossref" || key == "xdata"
711          then entry'{ fields = fields entry' <>
712                          Map.fromList (getXrefFields variant
713                                        entry entries val) }
714          else entry'
715
716getXrefFields :: Variant -> Item -> [Item] -> Text -> [(Text, Text)]
717getXrefFields variant baseEntry entries keys = do
718  let keys' = splitKeys keys
719  xrefEntry <- [e | e <- entries, identifier e `elem` keys']
720  (k, v) <- Map.toList $ fields xrefEntry
721  if k == "crossref" || k == "xdata"
722     then do
723       xs <- mapM (getXrefFields variant baseEntry entries)
724                   (splitKeys v)
725       (x, y) <- xs
726       guard $ isNothing $ Map.lookup x $ fields xrefEntry
727       return (x, y)
728     else do
729       k' <- case variant of
730               Bibtex -> return k
731               Biblatex -> transformKey
732                            (entryType xrefEntry) (entryType baseEntry) k
733       guard $ isNothing $ Map.lookup k' $ fields baseEntry
734       return (k',v)
735
736
737
738data BibState = BibState{
739           untitlecase    :: Bool
740         , localeLang     :: Lang
741         }
742
743type Bib = RWST Item () BibState BibParser
744
745blocksToInlines :: [Block] -> Inlines
746blocksToInlines bs =
747  case bs of
748       [Plain xs] -> B.fromList xs
749       [Para  xs] -> B.fromList xs
750       _          -> B.fromList $ Walk.query (:[]) bs
751
752adjustSpans :: Lang -> Inline -> Inline
753adjustSpans lang (RawInline (Format "latex") s)
754  | s == "\\hyphen" || s == "\\hyphen " = Str "-"
755  | otherwise = parseRawLaTeX lang s
756adjustSpans _ SoftBreak = Space
757adjustSpans _ x = x
758
759parseRawLaTeX :: Lang -> Text -> Inline
760parseRawLaTeX lang t@(T.stripPrefix "\\" -> Just xs) =
761  case parseLaTeX lang contents of
762          Right [Para ys]  -> f command ys
763          Right [Plain ys] -> f command ys
764          Right []         -> f command []
765          _                -> RawInline (Format "latex") t
766   where (command', contents') = T.break (\c -> c =='{' || c =='\\') xs
767         command  = T.strip command'
768         contents = T.drop 1 $ T.dropEnd 1 contents'
769         f "mkbibquote"    ils = Span nullAttr [Quoted DoubleQuote ils]
770         f "mkbibemph"     ils = Span nullAttr [Emph ils]
771         f "mkbibitalic"   ils = Span nullAttr [Emph ils]
772         f "mkbibbold"     ils = Span nullAttr [Strong ils]
773         f "mkbibparens"   ils = Span nullAttr $
774                                  [Str "("] ++ ils ++ [Str ")"]
775         f "mkbibbrackets" ils = Span nullAttr $
776                                  [Str "["] ++ ils ++ [Str "]"]
777         -- ... both should be nestable & should work in year fields
778         f "autocap"    ils    = Span nullAttr ils
779           -- TODO: should work in year fields
780         f "textnormal" ils    = Span ("",["nodecor"],[]) ils
781         f "bibstring" [Str s] = Str $ resolveKey' lang s
782         f "adddot"    []      = Str "."
783         f "adddotspace" []    = Span nullAttr [Str ".", Space]
784         f "addabbrvspace" []  = Space
785         f _            ils    = Span nullAttr ils
786parseRawLaTeX _ t = RawInline (Format "latex") t
787
788latex' :: Text -> Bib [Block]
789latex' t = do
790  lang <- gets localeLang
791  case parseLaTeX lang t of
792    Left _   -> mzero
793    Right bs -> return bs
794
795parseLaTeX :: Lang -> Text -> Either PandocError [Block]
796parseLaTeX lang t =
797  case runPure (readLaTeX
798                def{ readerExtensions =
799                      extensionsFromList [Ext_raw_tex, Ext_smart] } t) of
800    Left e              -> Left e
801    Right (Pandoc _ bs) -> Right $ Walk.walk (adjustSpans lang) bs
802
803latex :: Text -> Bib Inlines
804latex = fmap blocksToInlines . latex' . T.strip
805
806type Options = [(Text, Text)]
807
808parseOptions :: Text -> Options
809parseOptions = map breakOpt . T.splitOn ","
810  where breakOpt x = case T.break (=='=') x of
811                          (w,v) -> (T.toLower $ T.strip w,
812                                    T.toLower $ T.strip $ T.drop 1 v)
813
814bibEntries :: BibParser [Item]
815bibEntries = do
816  skipMany nonEntry
817  many (bibItem <* skipMany nonEntry)
818 where nonEntry = bibSkip <|>
819                  try (char '@' >>
820                       (bibComment <|> bibPreamble <|> bibString))
821
822bibSkip :: BibParser ()
823bibSkip = () <$ take1WhileP (/='@')
824
825bibComment :: BibParser ()
826bibComment = do
827  cistring "comment"
828  spaces
829  void inBraces <|> bibSkip <|> return ()
830
831bibPreamble :: BibParser ()
832bibPreamble = do
833  cistring "preamble"
834  spaces
835  void inBraces
836
837bibString :: BibParser ()
838bibString = do
839  cistring "string"
840  spaces
841  char '{'
842  spaces
843  (k,v) <- entField
844  char '}'
845  updateState (\(l,m) -> (l, Map.insert k v m))
846  return ()
847
848inBraces :: BibParser Text
849inBraces = do
850  char '{'
851  res <- manyTill
852         (  take1WhileP (\c -> c /= '{' && c /= '}' && c /= '\\')
853        <|> (char '\\' >> (  (char '{' >> return "\\{")
854                         <|> (char '}' >> return "\\}")
855                         <|> return "\\"))
856        <|> (braced <$> inBraces)
857         ) (char '}')
858  return $ T.concat res
859
860braced :: Text -> Text
861braced = T.cons '{' . flip T.snoc '}'
862
863inQuotes :: BibParser Text
864inQuotes = do
865  char '"'
866  T.concat <$> manyTill
867             ( take1WhileP (\c -> c /= '{' && c /= '"' && c /= '\\')
868               <|> (char '\\' >> T.cons '\\' . T.singleton <$> anyChar)
869               <|> braced <$> inBraces
870            ) (char '"')
871
872fieldName :: BibParser Text
873fieldName = resolveAlias . T.toLower
874  <$> take1WhileP (\c ->
875         isAlphaNum c || c == '-' || c == '_' || c == ':' || c == '+')
876
877isBibtexKeyChar :: Char -> Bool
878isBibtexKeyChar c =
879  isAlphaNum c || c `elem` (".:;?!`'()/*@_+=-[]*&" :: [Char])
880
881bibItem :: BibParser Item
882bibItem = do
883  char '@'
884  pos <- getPosition
885  enttype <- T.toLower <$> take1WhileP isLetter
886  spaces
887  char '{'
888  spaces
889  entid <- take1WhileP isBibtexKeyChar
890  spaces
891  char ','
892  spaces
893  entfields <- entField `sepEndBy` (char ',' >> spaces)
894  spaces
895  char '}'
896  return $ Item entid pos enttype (Map.fromList entfields)
897
898entField :: BibParser (Text, Text)
899entField = do
900  k <- fieldName
901  spaces
902  char '='
903  spaces
904  vs <- (expandString <|> inQuotes <|> inBraces <|> rawWord) `sepBy`
905            try (spaces >> char '#' >> spaces)
906  spaces
907  return (k, T.concat vs)
908
909resolveAlias :: Text -> Text
910resolveAlias "archiveprefix" = "eprinttype"
911resolveAlias "primaryclass" = "eprintclass"
912resolveAlias s = s
913
914rawWord :: BibParser Text
915rawWord = take1WhileP isAlphaNum
916
917expandString :: BibParser Text
918expandString = do
919  k <- fieldName
920  (lang, strs) <- getState
921  case Map.lookup k strs of
922       Just v  -> return v
923       Nothing -> return $ resolveKey' lang k
924
925cistring :: Text -> BibParser Text
926cistring s = try (go s)
927 where go t = case T.uncons t of
928         Nothing     -> return ""
929         Just (c,cs) -> do
930           x <- char (toLower c) <|> char (toUpper c)
931           xs <- go cs
932           return (T.cons x xs)
933
934splitKeys :: Text -> [Text]
935splitKeys = filter (not . T.null) . T.split (\c -> c == ' ' || c == ',')
936
937-- Biblatex Localization Keys (see Biblatex manual)
938-- Currently we only map a subset likely to be used in Biblatex *databases*
939-- (in fields such as `type`, and via `\bibstring{}` commands).
940
941parseMonth :: Text -> Maybe Int
942parseMonth s =
943  case T.toLower s of
944         "jan" -> Just 1
945         "feb" -> Just 2
946         "mar" -> Just 3
947         "apr" -> Just 4
948         "may" -> Just 5
949         "jun" -> Just 6
950         "jul" -> Just 7
951         "aug" -> Just 8
952         "sep" -> Just 9
953         "oct" -> Just 10
954         "nov" -> Just 11
955         "dec" -> Just 12
956         _     -> readMay (T.unpack s)
957
958notFound :: Text -> Bib a
959notFound f = Prelude.fail $ T.unpack f ++ " not found"
960
961getField :: Text -> Bib Inlines
962getField f = do
963  fs <- asks fields
964  case Map.lookup f fs of
965       Just x  -> latex x
966       Nothing -> notFound f
967
968
969getPeriodicalTitle :: Text -> Bib Inlines
970getPeriodicalTitle f = do
971  ils <- getField f
972  return ils
973
974protectCase :: (Inlines -> Inlines) -> (Inlines -> Inlines)
975protectCase f = Walk.walk unprotect . f . Walk.walk protect
976 where
977  protect (Span ("",[],[]) xs) = Span ("",["nocase"],[]) xs
978  protect  x = x
979  unprotect (Span ("",["nocase"],[]) xs)
980    | hasLowercaseWord xs = Span ("",["nocase"],[]) xs
981    | otherwise           = Span ("",[],[]) xs
982  unprotect x = x
983  hasLowercaseWord = any startsWithLowercase . splitStrWhen isPunctuation
984  startsWithLowercase (Str (T.uncons -> Just (x,_))) = isLower x
985  startsWithLowercase _           = False
986
987unTitlecase :: Maybe Lang -> Inlines -> Inlines
988unTitlecase mblang = protectCase (addTextCase mblang SentenceCase)
989
990getTitle :: Text -> Bib Inlines
991getTitle f = do
992  ils <- getField f
993  utc <- gets untitlecase
994  lang <- gets localeLang
995  let processTitle = if utc then unTitlecase (Just lang) else id
996  return $ processTitle ils
997
998getShortTitle :: Bool -> Text -> Bib (Maybe Inlines)
999getShortTitle requireColon f = do
1000  ils <- splitStrWhen (==':') . B.toList <$> getTitle f
1001  if not requireColon || containsColon ils
1002     then return $ Just $ B.fromList $ upToColon ils
1003     else return Nothing
1004
1005containsColon :: [Inline] -> Bool
1006containsColon xs = Str ":" `elem` xs
1007
1008upToColon :: [Inline] -> [Inline]
1009upToColon xs = takeWhile (/= Str ":") xs
1010
1011isNumber :: Text -> Bool
1012isNumber t = case T.uncons t of
1013  Just ('-', ds) -> T.all isDigit ds
1014  Just _         -> T.all isDigit t
1015  Nothing        -> False
1016
1017getDate :: Text -> Bib Date
1018getDate f = do
1019  -- the ~ can used for approx dates, but the latex reader
1020  -- parses this as a nonbreaking space, so we need to convert it back!
1021  let nbspToTilde '\160' = '~'
1022      nbspToTilde c      = c
1023  mbd <- rawDateEDTF . T.map nbspToTilde <$> getRawField f
1024  case mbd of
1025    Nothing -> Prelude.fail "expected date"
1026    Just d  -> return d
1027
1028-- A negative (BC) year might be written with -- or --- in bibtex:
1029fixLeadingDash :: Text -> Text
1030fixLeadingDash t = case T.uncons t of
1031  Just (c, ds) | (c == '–' || c == '—') && firstIsDigit ds -> T.cons '–' ds
1032  _ -> t
1033 where firstIsDigit = maybe False (isDigit . fst) . T.uncons
1034
1035getOldDate :: Text -> Bib Date
1036getOldDate prefix = do
1037  year' <- (readMay . T.unpack . fixLeadingDash . stringify
1038              <$> getField (prefix <> "year")) <|> return Nothing
1039  month' <- (parseMonth <$> getRawField (prefix <> "month"))
1040            <|> return Nothing
1041  day' <- (readMay . T.unpack <$> getRawField (prefix <> "day"))
1042          <|> return Nothing
1043  endyear' <- (readMay . T.unpack . fixLeadingDash . stringify
1044              <$> getField (prefix <> "endyear")) <|> return Nothing
1045  endmonth' <- (parseMonth . stringify
1046                 <$> getField (prefix <> "endmonth")) <|> return Nothing
1047  endday' <- (readMay . T.unpack . stringify <$>
1048                 getField (prefix <> "endday")) <|> return Nothing
1049  let toDateParts (y', m', d') =
1050              DateParts $
1051                 case y' of
1052                   Nothing -> []
1053                   Just y  ->
1054                     case m' of
1055                       Nothing -> [y]
1056                       Just m  ->
1057                         case d' of
1058                           Nothing -> [y,m]
1059                           Just d  -> [y,m,d]
1060  let dateparts = filter (\x -> x /= DateParts [])
1061                  $ map toDateParts [(year',month',day'),
1062                                     (endyear',endmonth',endday')]
1063  literal <- if null dateparts
1064                then Just <$> getRawField (prefix <> "year")
1065                else return Nothing
1066  return $
1067    Date { dateParts = dateparts
1068         , dateCirca = False
1069         , dateSeason = Nothing
1070         , dateLiteral = literal }
1071
1072getRawField :: Text -> Bib Text
1073getRawField f = do
1074  fs <- asks fields
1075  case Map.lookup f fs of
1076       Just x  -> return x
1077       Nothing -> notFound f
1078
1079getLiteralList :: Text -> Bib [Inlines]
1080getLiteralList f = do
1081  fs <- asks fields
1082  case Map.lookup f fs of
1083       Just x  -> latex' x >>= toLiteralList
1084       Nothing -> notFound f
1085
1086-- separates items with semicolons
1087getLiteralList' :: Text -> Bib Inlines
1088getLiteralList' f = do
1089  fs <- asks fields
1090  case Map.lookup f fs of
1091    Just x    -> do
1092      x' <- latex' x
1093      case x' of
1094        [Para xs]  ->
1095          return $ B.fromList
1096                 $ intercalate [Str ";", Space]
1097                 $ splitByAnd xs
1098        [Plain xs] ->
1099          return $ B.fromList
1100                 $ intercalate [Str ";", Space]
1101                 $ splitByAnd xs
1102        _          -> mzero
1103    Nothing   -> notFound f
1104
1105splitByAnd :: [Inline] -> [[Inline]]
1106splitByAnd = splitOn [Space, Str "and", Space]
1107
1108toLiteralList :: [Block] -> Bib [Inlines]
1109toLiteralList [Para xs] =
1110  return $ map B.fromList $ splitByAnd xs
1111toLiteralList [Plain xs] = toLiteralList [Para xs]
1112toLiteralList _ = mzero
1113
1114concatWith :: Char -> [Inlines] -> Inlines
1115concatWith sep = foldl' go mempty
1116  where go :: Inlines -> Inlines -> Inlines
1117        go accum s
1118          | s == mempty = accum
1119          | otherwise   =
1120              case Seq.viewr (B.unMany accum) of
1121                     Seq.EmptyR -> s
1122                     _ Seq.:> Str x
1123                       | not (T.null x) &&
1124                         T.last x `elem` ("!?.,:;" :: String)
1125                                    -> accum <> B.space <> s
1126                     _ -> accum <> B.str (T.singleton sep) <>
1127                                                B.space <> s
1128
1129
1130getNameList :: Options -> Text -> Bib [Name]
1131getNameList opts  f = do
1132  fs <- asks fields
1133  case Map.lookup f fs of
1134       Just x  -> latexNames opts x
1135       Nothing -> notFound f
1136
1137toNameList :: Options -> [Block] -> Bib [Name]
1138toNameList opts [Para xs] =
1139  filter (/= emptyName) <$> mapM (toName opts . addSpaceAfterPeriod)
1140                                    (splitByAnd xs)
1141toNameList opts [Plain xs] = toNameList opts [Para xs]
1142toNameList _ _ = mzero
1143
1144latexNames :: Options -> Text -> Bib [Name]
1145latexNames opts t = latex' (T.strip t) >>= toNameList opts
1146
1147-- see issue 392 for motivation.  We want to treat
1148-- "J.G. Smith" and "J. G. Smith" the same.
1149addSpaceAfterPeriod :: [Inline] -> [Inline]
1150addSpaceAfterPeriod = go . splitStrWhen (=='.')
1151  where
1152    go [] = []
1153    go (Str (T.unpack -> [c]):Str ".":Str (T.unpack -> [d]):xs)
1154      | isLetter d
1155      , isLetter c
1156      , isUpper c
1157      , isUpper d
1158        = Str (T.singleton c):Str ".":Space:go (Str (T.singleton d):xs)
1159    go (x:xs) = x:go xs
1160
1161emptyName :: Name
1162emptyName =
1163    Name {  nameFamily              = Nothing
1164          , nameGiven               = Nothing
1165          , nameDroppingParticle    = Nothing
1166          , nameNonDroppingParticle = Nothing
1167          , nameSuffix              = Nothing
1168          , nameLiteral             = Nothing
1169          , nameCommaSuffix         = False
1170          , nameStaticOrdering      = False
1171          }
1172
1173toName :: Options -> [Inline] -> Bib Name
1174toName _ [Str "others"] =
1175  return emptyName{ nameLiteral = Just "others" }
1176toName _ [Span ("",[],[]) ils] = -- corporate author
1177  return emptyName{ nameLiteral = Just $ stringify ils }
1178 -- extended BibLaTeX name format - see #266
1179toName _ ils@(Str ys:_) | T.any (== '=') ys = do
1180  let commaParts = splitWhen (== Str ",")
1181                   . splitStrWhen (\c -> c == ',' || c == '=' || c == '\160')
1182                   $ ils
1183  let addPart ag (Str "given" : Str "=" : xs) =
1184        ag{ nameGiven = case nameGiven ag of
1185                          Nothing -> Just $ stringify xs
1186                          Just t  -> Just $ t <> " " <> stringify xs }
1187      addPart ag (Str "family" : Str "=" : xs) =
1188        ag{ nameFamily = Just $ stringify xs }
1189      addPart ag (Str "prefix" : Str "=" : xs) =
1190        ag{ nameDroppingParticle =  Just $ stringify xs }
1191      addPart ag (Str "useprefix" : Str "=" : Str "true" : _) =
1192        ag{ nameNonDroppingParticle = nameDroppingParticle ag
1193          , nameDroppingParticle    = Nothing }
1194      addPart ag (Str "suffix" : Str "=" : xs) =
1195        ag{ nameSuffix = Just $ stringify xs }
1196      addPart ag (Space : xs) = addPart ag xs
1197      addPart ag _ = ag
1198  return $ foldl' addPart emptyName commaParts
1199-- First von Last
1200-- von Last, First
1201-- von Last, Jr ,First
1202-- NOTE: biblatex and bibtex differ on:
1203-- Drummond de Andrade, Carlos
1204-- bibtex takes "Drummond de" as the von;
1205-- biblatex takes the whole as a last name.
1206-- See https://github.com/plk/biblatex/issues/236
1207-- Here we implement the more sensible biblatex behavior.
1208toName opts ils = do
1209  let useprefix = optionSet "useprefix" opts
1210  let usecomma  = optionSet "juniorcomma" opts
1211  let bibtex    = optionSet "bibtex" opts
1212  let words' = wordsBy (\x -> x == Space || x == Str "\160")
1213  let commaParts = map words' $ splitWhen (== Str ",")
1214                              $ splitStrWhen
1215                                   (\c -> c == ',' || c == '\160') ils
1216  let (first, vonlast, jr) =
1217          case commaParts of
1218               --- First is the longest sequence of white-space separated
1219               -- words starting with an uppercase and that is not the
1220               -- whole string. von is the longest sequence of whitespace
1221               -- separated words whose last word starts with lower case
1222               -- and that is not the whole string.
1223               [fvl]      -> let (caps', rest') = span isCapitalized fvl
1224                             in  if null rest' && not (null caps')
1225                                 then (init caps', [last caps'], [])
1226                                 else (caps', rest', [])
1227               [vl,f]     -> (f, vl, [])
1228               (vl:j:f:_) -> (f, vl, j )
1229               []         -> ([], [], [])
1230
1231  let (von, lastname) =
1232         if bibtex
1233            then case span isCapitalized $ reverse vonlast of
1234                        ([],w:ws) -> (reverse ws, [w])
1235                        (vs, ws)    -> (reverse ws, reverse vs)
1236            else case break isCapitalized vonlast of
1237                        (vs@(_:_), []) -> (init vs, [last vs])
1238                        (vs, ws)       -> (vs, ws)
1239  let prefix = T.unwords $ map stringify von
1240  let family = T.unwords $ map stringify lastname
1241  let suffix = T.unwords $ map stringify jr
1242  let given = T.unwords $ map stringify first
1243  return
1244    Name {  nameFamily              = if T.null family
1245                                         then Nothing
1246                                         else Just family
1247          , nameGiven               = if T.null given
1248                                         then Nothing
1249                                         else Just given
1250          , nameDroppingParticle    = if useprefix || T.null prefix
1251                                         then Nothing
1252                                         else Just prefix
1253          , nameNonDroppingParticle = if useprefix && not (T.null prefix)
1254                                         then Just prefix
1255                                         else Nothing
1256          , nameSuffix              = if T.null suffix
1257                                         then Nothing
1258                                         else Just suffix
1259          , nameLiteral             = Nothing
1260          , nameCommaSuffix         = usecomma
1261          , nameStaticOrdering      = False
1262          }
1263
1264splitStrWhen :: (Char -> Bool) -> [Inline] -> [Inline]
1265splitStrWhen _ [] = []
1266splitStrWhen p (Str xs : ys) = map Str (go xs) ++ splitStrWhen p ys
1267  where go s =
1268          let (w,z) = T.break p s
1269           in if T.null z
1270                 then if T.null w
1271                         then []
1272                         else [w]
1273                 else if T.null w
1274                         then (T.take 1 z : go (T.drop 1 z))
1275                         else (w : T.take 1 z : go (T.drop 1 z))
1276splitStrWhen p (x : ys) = x : splitStrWhen p ys
1277
1278ordinalize :: Locale -> Text -> Text
1279ordinalize locale n =
1280  let terms = localeTerms locale
1281      pad0 t = case T.length t of
1282                 0 -> "00"
1283                 1 -> "0" <> t
1284                 _ -> t
1285   in case Map.lookup ("ordinal-" <> pad0 n) terms <|>
1286           Map.lookup "ordinal" terms of
1287        Nothing    -> n
1288        Just []    -> n
1289        Just (t:_) -> n <> snd t
1290
1291isCapitalized :: [Inline] -> Bool
1292isCapitalized (Str (T.uncons -> Just (c,cs)) : rest)
1293  | isUpper c = True
1294  | isDigit c = isCapitalized (Str cs : rest)
1295  | otherwise = False
1296isCapitalized (_:rest) = isCapitalized rest
1297isCapitalized [] = True
1298
1299optionSet :: Text -> Options -> Bool
1300optionSet key opts = case lookup key opts of
1301                      Just "true" -> True
1302                      Just s      -> s == mempty
1303                      _           -> False
1304
1305getTypeAndGenre :: Bib (Text, Maybe Text)
1306getTypeAndGenre = do
1307  lang <- gets localeLang
1308  et <- asks entryType
1309  reftype' <- resolveKey' lang <$> getRawField "type"
1310         <|> return mempty
1311  st <- getRawField "entrysubtype" <|> return mempty
1312  isEvent <- (True <$ (getRawField "eventdate"
1313                     <|> getRawField "eventtitle"
1314                     <|> getRawField "venue")) <|> return False
1315  let reftype =
1316        case et of
1317           "article"
1318             | st == "magazine"  -> "article-magazine"
1319             | st == "newspaper" -> "article-newspaper"
1320             | otherwise         -> "article-journal"
1321           "book"                -> "book"
1322           "booklet"             -> "pamphlet"
1323           "bookinbook"          -> "chapter"
1324           "collection"          -> "book"
1325           "dataset"             -> "dataset"
1326           "electronic"          -> "webpage"
1327           "inbook"              -> "chapter"
1328           "incollection"        -> "chapter"
1329           "inreference"         -> "entry-encyclopedia"
1330           "inproceedings"       -> "paper-conference"
1331           "manual"              -> "book"
1332           "mastersthesis"       -> "thesis"
1333           "misc"                -> ""
1334           "mvbook"              -> "book"
1335           "mvcollection"        -> "book"
1336           "mvproceedings"       -> "book"
1337           "mvreference"         -> "book"
1338           "online"              -> "webpage"
1339           "patent"              -> "patent"
1340           "periodical"
1341             | st == "magazine"  -> "article-magazine"
1342             | st == "newspaper" -> "article-newspaper"
1343             | otherwise         -> "article-journal"
1344           "phdthesis"           -> "thesis"
1345           "proceedings"         -> "book"
1346           "reference"           -> "book"
1347           "report"              -> "report"
1348           "software"            -> "book"    -- no "software" type in CSL
1349           "suppbook"            -> "chapter"
1350           "suppcollection"      -> "chapter"
1351           "suppperiodical"
1352             | st == "magazine"  -> "article-magazine"
1353             | st == "newspaper" -> "article-newspaper"
1354             | otherwise         -> "article-journal"
1355           "techreport"          -> "report"
1356           "thesis"              -> "thesis"
1357           "unpublished"         -> if isEvent then "speech" else "manuscript"
1358           "www"                 -> "webpage"
1359           -- biblatex, "unsupported"
1360           "artwork"             -> "graphic"
1361           "audio"               -> "song"    -- for audio *recordings*
1362           "commentary"          -> "book"
1363           "image"               -> "graphic"   -- or "figure" ?
1364           "jurisdiction"        -> "legal_case"
1365           "legislation"         -> "legislation"  -- or "bill" ?
1366           "legal"               -> "treaty"
1367           "letter"              -> "personal_communication"
1368           "movie"               -> "motion_picture"
1369           "music"               -> "song"        -- for musical *recordings*
1370           "performance"         -> "speech"
1371           "review"              -> "review"      -- or "review-book" ?
1372           "standard"            -> "legislation"
1373           "video"               -> "motion_picture"
1374           -- biblatex-apa:
1375           "data"                -> "dataset"
1376           "letters"             -> "personal_communication"
1377           "newsarticle"         -> "article-newspaper"
1378           _                     -> ""
1379
1380  let refgenre =
1381        case et of
1382          "mastersthesis"  -> if T.null reftype'
1383                                 then Just $ resolveKey' lang "mathesis"
1384                                 else Just reftype'
1385          "phdthesis"      -> if T.null reftype'
1386                                 then Just $ resolveKey' lang "phdthesis"
1387                                 else Just reftype'
1388          _                -> if T.null reftype'
1389                                 then Nothing
1390                                 else Just reftype'
1391  return (reftype, refgenre)
1392
1393
1394-- transformKey source target key
1395-- derived from Appendix C of bibtex manual
1396transformKey :: Text -> Text -> Text -> [Text]
1397transformKey _ _ "ids"            = []
1398transformKey _ _ "crossref"       = []
1399transformKey _ _ "xref"           = []
1400transformKey _ _ "entryset"       = []
1401transformKey _ _ "entrysubtype"   = []
1402transformKey _ _ "execute"        = []
1403transformKey _ _ "label"          = []
1404transformKey _ _ "options"        = []
1405transformKey _ _ "presort"        = []
1406transformKey _ _ "related"        = []
1407transformKey _ _ "relatedoptions" = []
1408transformKey _ _ "relatedstring"  = []
1409transformKey _ _ "relatedtype"    = []
1410transformKey _ _ "shorthand"      = []
1411transformKey _ _ "shorthandintro" = []
1412transformKey _ _ "sortkey"        = []
1413transformKey x y "author"
1414  | x `elem` ["mvbook", "book"] &&
1415    y `elem` ["inbook", "bookinbook", "suppbook"] = ["bookauthor", "author"]
1416-- note: this next clause is not in the biblatex manual, but it makes
1417-- sense in the context of CSL conversion:
1418transformKey x y "author"
1419  | x == "mvbook" && y == "book" = ["bookauthor", "author"]
1420transformKey "mvbook" y z
1421  | y `elem` ["book", "inbook", "bookinbook", "suppbook"] = standardTrans z
1422transformKey x y z
1423  | x `elem` ["mvcollection", "mvreference"] &&
1424    y `elem` ["collection", "reference", "incollection", "inreference",
1425               "suppcollection"] = standardTrans z
1426transformKey "mvproceedings" y z
1427  | y `elem` ["proceedings", "inproceedings"] = standardTrans z
1428transformKey "book" y z
1429  | y `elem` ["inbook", "bookinbook", "suppbook"] = bookTrans z
1430transformKey x y z
1431  | x `elem` ["collection", "reference"] &&
1432    y `elem` ["incollection", "inreference", "suppcollection"] = bookTrans z
1433transformKey "proceedings" "inproceedings" z = bookTrans z
1434transformKey "periodical" y z
1435  | y `elem` ["article", "suppperiodical"] =
1436  case z of
1437       "title"          -> ["journaltitle"]
1438       "subtitle"       -> ["journalsubtitle"]
1439       "shorttitle"     -> []
1440       "sorttitle"      -> []
1441       "indextitle"     -> []
1442       "indexsorttitle" -> []
1443       _                -> [z]
1444transformKey _ _ x                = [x]
1445
1446standardTrans :: Text -> [Text]
1447standardTrans z =
1448  case z of
1449       "title"          -> ["maintitle"]
1450       "subtitle"       -> ["mainsubtitle"]
1451       "titleaddon"     -> ["maintitleaddon"]
1452       "shorttitle"     -> []
1453       "sorttitle"      -> []
1454       "indextitle"     -> []
1455       "indexsorttitle" -> []
1456       _                -> [z]
1457
1458bookTrans :: Text -> [Text]
1459bookTrans z =
1460  case z of
1461       "title"          -> ["booktitle"]
1462       "subtitle"       -> ["booksubtitle"]
1463       "titleaddon"     -> ["booktitleaddon"]
1464       "shorttitle"     -> []
1465       "sorttitle"      -> []
1466       "indextitle"     -> []
1467       "indexsorttitle" -> []
1468       _                -> [z]
1469
1470resolveKey :: Lang -> Inlines -> Inlines
1471resolveKey lang ils = Walk.walk go ils
1472  where go (Str s) = Str $ resolveKey' lang s
1473        go x       = x
1474
1475resolveKey' :: Lang -> Text -> Text
1476resolveKey' lang@(Lang l _) k =
1477  case Map.lookup l biblatexStringMap >>= Map.lookup (T.toLower k) of
1478    Nothing     -> k
1479    Just (x, _) -> either (const k) stringify $ parseLaTeX lang x
1480
1481convertEnDash :: Inline -> Inline
1482convertEnDash (Str s) = Str (T.map (\c -> if c == '–' then '-' else c) s)
1483convertEnDash x       = x
1484