1{-# LANGUAGE CPP                 #-}
2{-# LANGUAGE FlexibleContexts    #-}
3{-# LANGUAGE PatternGuards       #-}
4{-# LANGUAGE OverloadedStrings   #-}
5{-# LANGUAGE ScopedTypeVariables #-}
6{- |
7   Module      : Text.Pandoc.Writers.EPUB
8   Copyright   : Copyright (C) 2010-2021 John MacFarlane
9   License     : GNU GPL, version 2 or above
10
11   Maintainer  : John MacFarlane <jgm@berkeley.edu>
12   Stability   : alpha
13   Portability : portable
14
15Conversion of 'Pandoc' documents to EPUB.
16-}
17module Text.Pandoc.Writers.EPUB ( writeEPUB2, writeEPUB3 ) where
18import Codec.Archive.Zip (Entry, addEntryToArchive, eRelativePath, emptyArchive,
19                          fromArchive, fromEntry, toEntry)
20import Control.Applicative ( (<|>) )
21import Control.Monad (mplus, unless, when, zipWithM)
22import Control.Monad.Except (catchError, throwError)
23import Control.Monad.State.Strict (StateT, evalState, evalStateT, get,
24                                   gets, lift, modify)
25import qualified Data.ByteString.Lazy as B
26import qualified Data.ByteString.Lazy.Char8 as B8
27import Data.Char (isAlphaNum, isAscii, isDigit, toLower)
28import Data.List (isInfixOf, isPrefixOf)
29import qualified Data.Map as M
30import Data.Maybe (fromMaybe, isNothing, mapMaybe, isJust)
31import qualified Data.Set as Set
32import qualified Data.Text as TS
33import qualified Data.Text.Lazy as TL
34import Network.HTTP (urlEncode)
35import System.FilePath (takeExtension, takeFileName, makeRelative)
36import Text.HTML.TagSoup (Tag (TagOpen), fromAttrib, parseTags)
37import Text.Pandoc.Builder (fromList, setMeta)
38import Text.Pandoc.Class.PandocMonad (PandocMonad, report)
39import qualified Text.Pandoc.Class.PandocPure as P
40import qualified Text.Pandoc.Class.PandocMonad as P
41import Data.Time
42import Text.Pandoc.Definition
43import Text.Pandoc.Error
44import Text.Pandoc.ImageSize
45import Text.Pandoc.Logging
46import Text.Pandoc.MIME (MimeType, extensionFromMimeType, getMimeType)
47import Text.Pandoc.Options (EPUBVersion (..), HTMLMathMethod (..),
48                            ObfuscationMethod (NoObfuscation), WrapOption (..),
49                            WriterOptions (..))
50import Text.Pandoc.Shared (makeSections, normalizeDate, renderTags',
51                           safeRead, stringify, trim, uniqueIdent, tshow)
52import qualified Text.Pandoc.UTF8 as UTF8
53import Text.Pandoc.UUID (getRandomUUID)
54import Text.Pandoc.Walk (query, walk, walkM)
55import Text.Pandoc.Writers.HTML (writeHtmlStringForEPUB)
56import Text.Printf (printf)
57import Text.XML.Light (Attr (..), Element (..), Node (..), QName (..),
58                       add_attrs, lookupAttr, node, onlyElems, parseXML,
59                       ppElement, showElement, strContent, unode, unqual)
60import Text.Pandoc.XML (escapeStringForXML)
61import Text.DocTemplates (FromContext(lookupContext), Context(..),
62                          ToContext(toVal), Val(..))
63
64-- A Chapter includes a list of blocks.
65newtype Chapter = Chapter [Block]
66  deriving (Show)
67
68data EPUBState = EPUBState {
69        stMediaPaths  :: [(FilePath, (FilePath, Maybe Entry))]
70      , stMediaNextId :: Int
71      , stEpubSubdir  :: String
72      }
73
74type E m = StateT EPUBState m
75
76data EPUBMetadata = EPUBMetadata{
77    epubIdentifier    :: [Identifier]
78  , epubTitle         :: [Title]
79  , epubDate          :: [Date]
80  , epubLanguage      :: String
81  , epubCreator       :: [Creator]
82  , epubContributor   :: [Creator]
83  , epubSubject       :: [String]
84  , epubDescription   :: Maybe String
85  , epubType          :: Maybe String
86  , epubFormat        :: Maybe String
87  , epubPublisher     :: Maybe String
88  , epubSource        :: Maybe String
89  , epubRelation      :: Maybe String
90  , epubCoverage      :: Maybe String
91  , epubRights        :: Maybe String
92  , epubCoverImage    :: Maybe String
93  , epubStylesheets   :: [FilePath]
94  , epubPageDirection :: Maybe ProgressionDirection
95  , epubIbooksFields  :: [(String, String)]
96  , epubCalibreFields :: [(String, String)]
97  } deriving Show
98
99data Date = Date{
100    dateText  :: String
101  , dateEvent :: Maybe String
102  } deriving Show
103
104data Creator = Creator{
105    creatorText   :: String
106  , creatorRole   :: Maybe String
107  , creatorFileAs :: Maybe String
108  } deriving Show
109
110data Identifier = Identifier{
111    identifierText   :: String
112  , identifierScheme :: Maybe String
113  } deriving Show
114
115data Title = Title{
116    titleText   :: String
117  , titleFileAs :: Maybe String
118  , titleType   :: Maybe String
119  } deriving Show
120
121data ProgressionDirection = LTR | RTL deriving Show
122
123dcName :: String -> QName
124dcName n = QName n Nothing (Just "dc")
125
126dcNode :: Node t => String -> t -> Element
127dcNode = node . dcName
128
129opfName :: String -> QName
130opfName n = QName n Nothing (Just "opf")
131
132toId :: FilePath -> String
133toId = map (\x -> if isAlphaNum x || x == '-' || x == '_'
134                     then x
135                     else '_') . takeFileName
136
137removeNote :: Inline -> Inline
138removeNote (Note _) = Str ""
139removeNote x        = x
140
141toVal' :: String -> Val TS.Text
142toVal' = toVal . TS.pack
143
144mkEntry :: PandocMonad m => FilePath -> B.ByteString -> E m Entry
145mkEntry path content = do
146  epubSubdir <- gets stEpubSubdir
147  let addEpubSubdir :: Entry -> Entry
148      addEpubSubdir e = e{ eRelativePath =
149          (if null epubSubdir
150              then ""
151              else epubSubdir ++ "/") ++ eRelativePath e }
152  epochtime <- floor <$> lift P.getPOSIXTime
153  return $
154       (if path == "mimetype" || "META-INF" `isPrefixOf` path
155           then id
156           else addEpubSubdir) $ toEntry path epochtime content
157
158getEPUBMetadata :: PandocMonad m => WriterOptions -> Meta -> E m EPUBMetadata
159getEPUBMetadata opts meta = do
160  let md = metadataFromMeta opts meta
161  let elts = maybe [] (onlyElems . parseXML) $ writerEpubMetadata opts
162  let md' = foldr addMetadataFromXML md elts
163  let addIdentifier m =
164       if null (epubIdentifier m)
165          then do
166            randomId <- getRandomUUID
167            return $ m{ epubIdentifier = [Identifier (show randomId) Nothing] }
168          else return m
169  let addLanguage m =
170       if null (epubLanguage m)
171          then case lookupContext "lang" (writerVariables opts) of
172                     Just x  -> return m{ epubLanguage = TS.unpack x }
173                     Nothing -> do
174                       mLang <- lift $ P.lookupEnv "LANG"
175                       let localeLang =
176                             case mLang of
177                               Just lang ->
178                                 TS.map (\c -> if c == '_' then '-' else c) $
179                                 TS.takeWhile (/='.') lang
180                               Nothing -> "en-US"
181                       return m{ epubLanguage = TS.unpack localeLang }
182          else return m
183  let fixDate m =
184       if null (epubDate m)
185          then do
186            currentTime <- lift P.getCurrentTime
187            return $ m{ epubDate = [ Date{
188                             dateText = showDateTimeISO8601 currentTime
189                           , dateEvent = Nothing } ] }
190          else return m
191  let addAuthor m =
192       if any (\c -> creatorRole c == Just "aut") $ epubCreator m
193          then return m
194          else do
195            let authors' = map stringify $ docAuthors meta
196            let toAuthor name = Creator{ creatorText = TS.unpack name
197                                       , creatorRole = Just "aut"
198                                       , creatorFileAs = Nothing }
199            return $ m{ epubCreator = map toAuthor authors' ++ epubCreator m }
200  addIdentifier md' >>= fixDate >>= addAuthor >>= addLanguage
201
202addMetadataFromXML :: Element -> EPUBMetadata -> EPUBMetadata
203addMetadataFromXML e@(Element (QName name _ (Just "dc")) attrs _ _) md
204  | name == "identifier" = md{ epubIdentifier =
205             Identifier{ identifierText = strContent e
206                       , identifierScheme = lookupAttr (opfName "scheme") attrs
207                       } : epubIdentifier md }
208  | name == "title" = md{ epubTitle =
209            Title{ titleText = strContent e
210                 , titleFileAs = getAttr "file-as"
211                 , titleType = getAttr "type"
212                 } : epubTitle md }
213  | name == "date" = md{ epubDate =
214             Date{ dateText = fromMaybe "" $ normalizeDate' $ strContent e
215                 , dateEvent = getAttr "event"
216                 } : epubDate md }
217  | name == "language" = md{ epubLanguage = strContent e }
218  | name == "creator" = md{ epubCreator =
219              Creator{ creatorText = strContent e
220                     , creatorRole = getAttr "role"
221                     , creatorFileAs = getAttr "file-as"
222                     } : epubCreator md }
223  | name == "contributor" = md{ epubContributor =
224              Creator  { creatorText = strContent e
225                       , creatorRole = getAttr "role"
226                       , creatorFileAs = getAttr "file-as"
227                       } : epubContributor md }
228  | name == "subject" = md{ epubSubject = strContent e : epubSubject md }
229  | name == "description" = md { epubDescription = Just $ strContent e }
230  | name == "type" = md { epubType = Just $ strContent e }
231  | name == "format" = md { epubFormat = Just $ strContent e }
232  | name == "type" = md { epubType = Just $ strContent e }
233  | name == "publisher" = md { epubPublisher = Just $ strContent e }
234  | name == "source" = md { epubSource = Just $ strContent e }
235  | name == "relation" = md { epubRelation = Just $ strContent e }
236  | name == "coverage" = md { epubCoverage = Just $ strContent e }
237  | name == "rights" = md { epubRights = Just $ strContent e }
238  | otherwise = md
239  where getAttr n = lookupAttr (opfName n) attrs
240addMetadataFromXML e@(Element (QName "meta" _ _) attrs _ _) md =
241  case getAttr "property" of
242       Just s | "ibooks:" `isPrefixOf` s ->
243                md{ epubIbooksFields = (drop 7 s, strContent e) :
244                       epubIbooksFields md }
245       _ -> case getAttr "name" of
246                 Just s | "calibre:" `isPrefixOf` s ->
247                   md{ epubCalibreFields =
248                         (drop 8 s, fromMaybe "" $ getAttr "content") :
249                          epubCalibreFields md }
250                 _ -> md
251  where getAttr n = lookupAttr (unqual n) attrs
252addMetadataFromXML _ md = md
253
254metaValueToString :: MetaValue -> String
255metaValueToString (MetaString s)    = TS.unpack s
256metaValueToString (MetaInlines ils) = TS.unpack $ stringify ils
257metaValueToString (MetaBlocks bs)   = TS.unpack $ stringify bs
258metaValueToString (MetaBool True)   = "true"
259metaValueToString (MetaBool False)  = "false"
260metaValueToString _                 = ""
261
262metaValueToPaths :: MetaValue -> [FilePath]
263metaValueToPaths (MetaList xs) = map metaValueToString xs
264metaValueToPaths x             = [metaValueToString x]
265
266getList :: TS.Text -> Meta -> (MetaValue -> a) -> [a]
267getList s meta handleMetaValue =
268  case lookupMeta s meta of
269       Just (MetaList xs) -> map handleMetaValue xs
270       Just mv            -> [handleMetaValue mv]
271       Nothing            -> []
272
273getIdentifier :: Meta -> [Identifier]
274getIdentifier meta = getList "identifier" meta handleMetaValue
275  where handleMetaValue (MetaMap m) =
276           Identifier{ identifierText = maybe "" metaValueToString
277                                        $ M.lookup "text" m
278                     , identifierScheme = metaValueToString <$>
279                                          M.lookup "scheme" m }
280        handleMetaValue mv = Identifier (metaValueToString mv) Nothing
281
282getTitle :: Meta -> [Title]
283getTitle meta = getList "title" meta handleMetaValue
284  where handleMetaValue (MetaMap m) =
285           Title{ titleText = maybe "" metaValueToString $ M.lookup "text" m
286                , titleFileAs = metaValueToString <$> M.lookup "file-as" m
287                , titleType = metaValueToString <$> M.lookup "type" m }
288        handleMetaValue mv = Title (metaValueToString mv) Nothing Nothing
289
290getCreator :: TS.Text -> Meta -> [Creator]
291getCreator s meta = getList s meta handleMetaValue
292  where handleMetaValue (MetaMap m) =
293           Creator{ creatorText = maybe "" metaValueToString $ M.lookup "text" m
294                  , creatorFileAs = metaValueToString <$> M.lookup "file-as" m
295                  , creatorRole = metaValueToString <$> M.lookup "role" m }
296        handleMetaValue mv = Creator (metaValueToString mv) Nothing Nothing
297
298getDate :: TS.Text -> Meta -> [Date]
299getDate s meta = getList s meta handleMetaValue
300  where handleMetaValue (MetaMap m) =
301           Date{ dateText = fromMaybe "" $
302                   M.lookup "text" m >>= normalizeDate' . metaValueToString
303               , dateEvent = metaValueToString <$> M.lookup "event" m }
304        handleMetaValue mv = Date { dateText = fromMaybe "" $ normalizeDate' $ metaValueToString mv
305                                  , dateEvent = Nothing }
306
307simpleList :: TS.Text -> Meta -> [String]
308simpleList s meta =
309  case lookupMeta s meta of
310       Just (MetaList xs) -> map metaValueToString xs
311       Just x             -> [metaValueToString x]
312       Nothing            -> []
313
314metadataFromMeta :: WriterOptions -> Meta -> EPUBMetadata
315metadataFromMeta opts meta = EPUBMetadata{
316      epubIdentifier         = identifiers
317    , epubTitle              = titles
318    , epubDate               = date
319    , epubLanguage           = language
320    , epubCreator            = creators
321    , epubContributor        = contributors
322    , epubSubject            = subjects
323    , epubDescription        = description
324    , epubType               = epubtype
325    , epubFormat             = format
326    , epubPublisher          = publisher
327    , epubSource             = source
328    , epubRelation           = relation
329    , epubCoverage           = coverage
330    , epubRights             = rights
331    , epubCoverImage         = coverImage
332    , epubStylesheets        = stylesheets
333    , epubPageDirection      = pageDirection
334    , epubIbooksFields       = ibooksFields
335    , epubCalibreFields      = calibreFields
336    }
337  where identifiers = getIdentifier meta
338        titles = getTitle meta
339        date = getDate "date" meta
340        language = maybe "" metaValueToString $
341           lookupMeta "language" meta `mplus` lookupMeta "lang" meta
342        creators = getCreator "creator" meta
343        contributors = getCreator "contributor" meta
344        subjects = simpleList "subject" meta
345        description = metaValueToString <$> lookupMeta "description" meta
346        epubtype = metaValueToString <$> lookupMeta "type" meta
347        format = metaValueToString <$> lookupMeta "format" meta
348        publisher = metaValueToString <$> lookupMeta "publisher" meta
349        source = metaValueToString <$> lookupMeta "source" meta
350        relation = metaValueToString <$> lookupMeta "relation" meta
351        coverage = metaValueToString <$> lookupMeta "coverage" meta
352        rights = metaValueToString <$> lookupMeta "rights" meta
353        coverImage =
354            (TS.unpack <$> lookupContext "epub-cover-image"
355                              (writerVariables opts))
356            `mplus` (metaValueToString <$> lookupMeta "cover-image" meta)
357        mCss = lookupMeta "css" meta <|> lookupMeta "stylesheet" meta
358        stylesheets = maybe [] metaValueToPaths mCss ++
359                      case lookupContext "css" (writerVariables opts) of
360                         Just xs -> map TS.unpack xs
361                         Nothing ->
362                           case lookupContext "css" (writerVariables opts) of
363                             Just x  -> [TS.unpack x]
364                             Nothing -> []
365        pageDirection = case map toLower . metaValueToString <$>
366                             lookupMeta "page-progression-direction" meta of
367                              Just "ltr" -> Just LTR
368                              Just "rtl" -> Just RTL
369                              _          -> Nothing
370        ibooksFields = case lookupMeta "ibooks" meta of
371                            Just (MetaMap mp)
372                               -> M.toList $ M.mapKeys TS.unpack $ M.map metaValueToString mp
373                            _  -> []
374        calibreFields = case lookupMeta "calibre" meta of
375                            Just (MetaMap mp)
376                               -> M.toList $ M.mapKeys TS.unpack $ M.map metaValueToString mp
377                            _  -> []
378
379-- | Produce an EPUB2 file from a Pandoc document.
380writeEPUB2 :: PandocMonad m
381          => WriterOptions  -- ^ Writer options
382          -> Pandoc         -- ^ Document to convert
383          -> m B.ByteString
384writeEPUB2 = writeEPUB EPUB2
385
386-- | Produce an EPUB3 file from a Pandoc document.
387writeEPUB3 :: PandocMonad m
388          => WriterOptions  -- ^ Writer options
389          -> Pandoc         -- ^ Document to convert
390          -> m B.ByteString
391writeEPUB3 = writeEPUB EPUB3
392
393-- | Produce an EPUB file from a Pandoc document.
394writeEPUB :: PandocMonad m
395          => EPUBVersion
396          -> WriterOptions  -- ^ Writer options
397          -> Pandoc         -- ^ Document to convert
398          -> m B.ByteString
399writeEPUB epubVersion opts doc = do
400  let epubSubdir = writerEpubSubdirectory opts
401  -- sanity check on epubSubdir
402  unless (TS.all (\c -> isAscii c && isAlphaNum c) epubSubdir) $
403    throwError $ PandocEpubSubdirectoryError epubSubdir
404  let initState = EPUBState { stMediaPaths = [], stMediaNextId = 0, stEpubSubdir = TS.unpack epubSubdir }
405  evalStateT (pandocToEPUB epubVersion opts doc) initState
406
407pandocToEPUB :: PandocMonad m
408             => EPUBVersion
409             -> WriterOptions
410             -> Pandoc
411             -> E m B.ByteString
412pandocToEPUB version opts doc = do
413  -- handle pictures
414  Pandoc meta blocks <- walkM (transformInline opts) doc >>=
415                        walkM transformBlock
416  picEntries <- mapMaybe (snd . snd) <$> gets stMediaPaths
417
418  epubSubdir <- gets stEpubSubdir
419  let epub3 = version == EPUB3
420  let writeHtml o = fmap (UTF8.fromTextLazy . TL.fromStrict) .
421                      writeHtmlStringForEPUB version o
422  metadata <- getEPUBMetadata opts meta
423
424  let plainTitle = case docTitle' meta of
425                        [] -> case epubTitle metadata of
426                                   []    -> "UNTITLED"
427                                   (x:_) -> titleText x
428                        x  -> TS.unpack $ stringify x
429
430  -- stylesheet
431  stylesheets <- case epubStylesheets metadata of
432                      [] -> (\x -> [B.fromChunks [x]]) <$>
433                             P.readDataFile "epub.css"
434                      fs -> mapM P.readFileLazy fs
435  stylesheetEntries <- zipWithM
436        (\bs n -> mkEntry ("styles/stylesheet" ++ show n ++ ".css") bs)
437        stylesheets [(1 :: Int)..]
438
439  let vars = Context $
440               M.delete "css" .
441               M.insert "epub3"
442                 (toVal' $ if epub3 then "true" else "false") .
443               M.insert "lang" (toVal' $ epubLanguage metadata)
444             $ unContext $ writerVariables opts
445
446  let cssvars useprefix = Context $ M.insert "css"
447                           (ListVal $ map
448                             (\e -> toVal' $
449                                (if useprefix then "../" else "") <>
450                                makeRelative epubSubdir (eRelativePath e))
451                             stylesheetEntries)
452                             mempty
453
454  let opts' = opts{ writerEmailObfuscation = NoObfuscation
455                  , writerSectionDivs = True
456                  , writerVariables = vars
457                  , writerHTMLMathMethod =
458                       if epub3
459                          then MathML
460                          else writerHTMLMathMethod opts
461                  , writerWrapText = WrapAuto }
462
463  -- cover page
464  (cpgEntry, cpicEntry) <-
465                case epubCoverImage metadata of
466                     Nothing   -> return ([],[])
467                     Just img  -> do
468                       let fp = takeFileName img
469                       mediaPaths <- gets (map (fst . snd) . stMediaPaths)
470                       coverImageName <-  -- see #4206
471                            if ("media/" <> fp) `elem` mediaPaths
472                               then getMediaNextNewName (takeExtension fp)
473                               else return fp
474                       imgContent <- lift $ P.readFileLazy img
475                       (coverImageWidth, coverImageHeight) <-
476                             case imageSize opts' (B.toStrict imgContent) of
477                               Right sz  -> return $ sizeInPixels sz
478                               Left err' -> (0, 0) <$ report
479                                 (CouldNotDetermineImageSize (TS.pack img) err')
480                       cpContent <- lift $ writeHtml
481                            opts'{ writerVariables =
482                                   Context (M.fromList [
483                                    ("coverpage", toVal' "true"),
484                                    ("pagetitle", toVal $
485                                      escapeStringForXML $ TS.pack plainTitle),
486                                    ("cover-image", toVal' coverImageName),
487                                    ("cover-image-width", toVal' $
488                                       show coverImageWidth),
489                                    ("cover-image-height", toVal' $
490                                       show coverImageHeight)]) <>
491                                     cssvars True <> vars }
492                            (Pandoc meta [])
493                       coverEntry <- mkEntry "text/cover.xhtml" cpContent
494                       coverImageEntry <- mkEntry ("media/" ++ coverImageName)
495                                             imgContent
496                       return ( [ coverEntry ]
497                              , [ coverImageEntry ] )
498
499  -- title page
500  tpContent <- lift $ writeHtml opts'{
501                                  writerVariables =
502                                      Context (M.fromList [
503                                        ("titlepage", toVal' "true"),
504                                        ("body-type",  toVal' "frontmatter"),
505                                        ("pagetitle", toVal $
506                                            escapeStringForXML $ TS.pack plainTitle)])
507                                      <> cssvars True <> vars }
508                               (Pandoc meta [])
509  tpEntry <- mkEntry "text/title_page.xhtml" tpContent
510
511  -- handle fonts
512  let matchingGlob f = do
513        xs <- lift $ P.glob f
514        when (null xs) $
515          report $ CouldNotFetchResource (TS.pack f) "glob did not match any font files"
516        return xs
517  let mkFontEntry f = mkEntry ("fonts/" ++ takeFileName f) =<<
518                        lift (P.readFileLazy f)
519  fontFiles <- concat <$> mapM matchingGlob (writerEpubFonts opts')
520  fontEntries <- mapM mkFontEntry fontFiles
521
522  -- set page progression direction attribution
523  let progressionDirection = case epubPageDirection metadata of
524                                  Just LTR | epub3 ->
525                                    [("page-progression-direction", "ltr")]
526                                  Just RTL | epub3 ->
527                                    [("page-progression-direction", "rtl")]
528                                  _  -> []
529
530  -- body pages
531
532  let chapterHeaderLevel = writerEpubChapterLevel opts
533
534  let isChapterHeader (Div _ (Header n _ _:_)) = n <= chapterHeaderLevel
535      isChapterHeader _ = False
536
537  let secsToChapters :: [Block] -> [Chapter]
538      secsToChapters [] = []
539      secsToChapters (d@(Div attr (h@(Header lvl _ _) : bs)) : rest)
540        | chapterHeaderLevel == lvl =
541           Chapter [d] : secsToChapters rest
542        | chapterHeaderLevel > lvl =
543           Chapter [Div attr (h:xs)] :
544           secsToChapters ys ++ secsToChapters rest
545             where (xs, ys) = break isChapterHeader bs
546      secsToChapters bs =
547          (if null xs then id else (Chapter xs :)) $ secsToChapters ys
548            where (xs, ys) = break isChapterHeader bs
549
550  -- add level 1 header to beginning if none there
551  let secs = makeSections True Nothing
552              $ addIdentifiers opts
553              $ case blocks of
554                  (Div _
555                    (Header{}:_) : _) -> blocks
556                  (Header 1 _ _ : _)  -> blocks
557                  _                   -> Header 1 ("",["unnumbered"],[])
558                                             (docTitle' meta) : blocks
559
560  let chapters' = secsToChapters secs
561
562  let extractLinkURL' :: Int -> Inline -> [(TS.Text, TS.Text)]
563      extractLinkURL' num (Span (ident, _, _) _)
564        | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)]
565      extractLinkURL' num (Link (ident, _, _) _ _)
566        | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)]
567      extractLinkURL' num (Image (ident, _, _) _ _)
568        | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)]
569      extractLinkURL' num (RawInline fmt raw)
570        | isHtmlFormat fmt
571        = foldr (\tag ->
572                   case tag of
573                     TagOpen{} ->
574                       case fromAttrib "id" tag of
575                         "" -> id
576                         x  -> ((x, TS.pack (showChapter num) <> "#" <> x):)
577                     _ -> id)
578            [] (parseTags raw)
579      extractLinkURL' _ _ = []
580
581  let extractLinkURL :: Int -> Block -> [(TS.Text, TS.Text)]
582      extractLinkURL num (Div (ident, _, _) _)
583        | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)]
584      extractLinkURL num (Header _ (ident, _, _) _)
585        | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)]
586      extractLinkURL num (Table (ident,_,_) _ _ _ _ _)
587        | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)]
588      extractLinkURL num (RawBlock fmt raw)
589        | isHtmlFormat fmt
590        = foldr (\tag ->
591                   case tag of
592                     TagOpen{} ->
593                       case fromAttrib "id" tag of
594                         "" -> id
595                         x  -> ((x, TS.pack (showChapter num) <> "#" <> x):)
596                     _ -> id)
597            [] (parseTags raw)
598      extractLinkURL num b = query (extractLinkURL' num) b
599
600  let reftable = concat $ zipWith (\(Chapter bs) num ->
601                                    query (extractLinkURL num) bs)
602                          chapters' [1..]
603
604  let fixInternalReferences :: Inline -> Inline
605      fixInternalReferences (Link attr lab (src, tit))
606        | Just ('#', xs) <- TS.uncons src = case lookup xs reftable of
607             Just ys -> Link attr lab (ys, tit)
608             Nothing -> Link attr lab (src, tit)
609      fixInternalReferences x = x
610
611  -- internal reference IDs change when we chunk the file,
612  -- so that '#my-header-1' might turn into 'chap004.xhtml#my-header'.
613  -- this fixes that:
614  let chapters = map (\(Chapter bs) ->
615                         Chapter $ walk fixInternalReferences bs)
616                 chapters'
617
618  let chapToEntry num (Chapter bs) =
619        mkEntry ("text/" ++ showChapter num) =<<
620        writeHtml opts'{ writerVariables =
621                            Context (M.fromList
622                                     [("body-type", toVal' bodyType),
623                                      ("pagetitle", toVal' $
624                                           showChapter num)])
625                            <> cssvars True <> vars } pdoc
626         where (pdoc, bodyType) =
627                 case bs of
628                     (Div (_,"section":_,kvs)
629                       (Header _ _ xs : _) : _) ->
630                       -- remove notes or we get doubled footnotes
631                       (Pandoc (setMeta "title"
632                           (walk removeNote $ fromList xs) nullMeta) bs,
633                        case lookup "epub:type" kvs of
634                             Nothing -> "bodymatter"
635                             Just x
636                               | x `elem` frontMatterTypes -> "frontmatter"
637                               | x `elem` backMatterTypes  -> "backmatter"
638                               | otherwise                 -> "bodymatter")
639                     _                   -> (Pandoc nullMeta bs, "bodymatter")
640               frontMatterTypes = ["prologue", "abstract", "acknowledgments",
641                                   "copyright-page", "dedication",
642                                   "credits", "keywords", "imprint",
643                                   "contributors", "other-credits",
644                                   "errata", "revision-history",
645                                   "titlepage", "halftitlepage", "seriespage",
646                                   "foreword", "preface",
647                                   "seriespage", "titlepage"]
648               backMatterTypes = ["appendix", "colophon", "bibliography",
649                                  "index"]
650
651  chapterEntries <- zipWithM chapToEntry [1..] chapters
652
653  -- incredibly inefficient (TODO):
654  let containsMathML ent = epub3 &&
655                           "<math" `isInfixOf`
656        B8.unpack (fromEntry ent)
657  let containsSVG ent    = epub3 &&
658                           "<svg" `isInfixOf`
659        B8.unpack (fromEntry ent)
660  let props ent = ["mathml" | containsMathML ent] ++ ["svg" | containsSVG ent]
661
662  -- contents.opf
663  let chapterNode ent = unode "item" !
664                           ([("id", toId $ makeRelative epubSubdir
665                                         $ eRelativePath ent),
666                             ("href", makeRelative epubSubdir
667                                      $ eRelativePath ent),
668                             ("media-type", "application/xhtml+xml")]
669                            ++ case props ent of
670                                    [] -> []
671                                    xs -> [("properties", unwords xs)])
672                        $ ()
673
674  let chapterRefNode ent = unode "itemref" !
675                             [("idref", toId $ makeRelative epubSubdir
676                                             $ eRelativePath ent)] $ ()
677  let pictureNode ent = unode "item" !
678                           [("id", toId $ makeRelative epubSubdir
679                                        $ eRelativePath ent),
680                            ("href", makeRelative epubSubdir
681                                     $ eRelativePath ent),
682                            ("media-type",
683                               maybe "application/octet-stream" TS.unpack
684                               $ mediaTypeOf $ eRelativePath ent)] $ ()
685  let fontNode ent = unode "item" !
686                           [("id", toId $ makeRelative epubSubdir
687                                        $ eRelativePath ent),
688                            ("href", makeRelative epubSubdir
689                                     $ eRelativePath ent),
690                            ("media-type", maybe "" TS.unpack $
691                                  getMimeType $ eRelativePath ent)] $ ()
692
693  let tocTitle = maybe plainTitle
694                   metaValueToString $ lookupMeta "toc-title" meta
695  uuid <- case epubIdentifier metadata of
696            (x:_) -> return $ identifierText x  -- use first identifier as UUID
697            []    -> throwError $ PandocShouldNeverHappenError "epubIdentifier is null"  -- shouldn't happen
698  currentTime <- lift P.getCurrentTime
699  let contentsData = UTF8.fromStringLazy $ ppTopElement $
700        unode "package" !
701          ([("version", case version of
702                             EPUB2 -> "2.0"
703                             EPUB3 -> "3.0")
704           ,("xmlns","http://www.idpf.org/2007/opf")
705           ,("unique-identifier","epub-id-1")
706           ] ++
707           [("prefix","ibooks: http://vocabulary.itunes.apple.com/rdf/ibooks/vocabulary-extensions-1.0/") | version == EPUB3]) $
708          [ metadataElement version metadata currentTime
709          , unode "manifest" $
710             [ unode "item" ! [("id","ncx"), ("href","toc.ncx")
711                              ,("media-type","application/x-dtbncx+xml")] $ ()
712             , unode "item" ! ([("id","nav")
713                               ,("href","nav.xhtml")
714                               ,("media-type","application/xhtml+xml")] ++
715                               [("properties","nav") | epub3 ]) $ ()
716             ] ++
717             [ unode "item" ! [("id","stylesheet" ++ show n), ("href",fp)
718                              ,("media-type","text/css")] $ () |
719                             (n :: Int, fp) <- zip [1..] (map
720                               (makeRelative epubSubdir . eRelativePath)
721                               stylesheetEntries) ] ++
722             map chapterNode (cpgEntry ++ (tpEntry : chapterEntries)) ++
723             (case cpicEntry of
724                    []    -> []
725                    (x:_) -> [add_attrs
726                              [Attr (unqual "properties") "cover-image" | epub3]
727                              (pictureNode x)]) ++
728             map pictureNode picEntries ++
729             map fontNode fontEntries
730          , unode "spine" ! (
731             ("toc","ncx") : progressionDirection) $
732              case epubCoverImage metadata of
733                    Nothing -> []
734                    Just _ -> [ unode "itemref" !
735                                [("idref", "cover_xhtml")] $ () ]
736              ++ ((unode "itemref" ! [("idref", "title_page_xhtml")
737                                     ,("linear",
738                                         case lookupMeta "title" meta of
739                                               Just _  -> "yes"
740                                               Nothing -> "no")] $ ()) :
741                  [unode "itemref" ! [("idref", "nav")] $ ()
742                         | writerTableOfContents opts ] ++
743                  map chapterRefNode chapterEntries)
744          , unode "guide" $
745             (unode "reference" !
746                 [("type","toc"),("title", tocTitle),
747                  ("href","nav.xhtml")] $ ()
748             ) :
749             [ unode "reference" !
750                   [("type","cover")
751                   ,("title","Cover")
752                   ,("href","text/cover.xhtml")] $ ()
753               | isJust (epubCoverImage metadata)
754             ]
755          ]
756  contentsEntry <- mkEntry "content.opf" contentsData
757
758  -- toc.ncx
759  let tocLevel = writerTOCDepth opts
760
761  let navPointNode :: PandocMonad m
762                   => (Int -> [Inline] -> TS.Text -> [Element] -> Element)
763                   -> Block -> StateT Int m [Element]
764      navPointNode formatter (Div (ident,_,_)
765                                (Header lvl (_,_,kvs) ils : children)) =
766        if lvl > tocLevel
767           then return []
768           else do
769             n <- get
770             modify (+1)
771             let num = fromMaybe "" $ lookup "number" kvs
772             let tit = if writerNumberSections opts && not (TS.null num)
773                          then Span ("", ["section-header-number"], [])
774                                [Str num] : Space : ils
775                          else ils
776             src <- case lookup ident reftable of
777                      Just x  -> return x
778                      Nothing -> throwError $ PandocSomeError $
779                                    ident <> " not found in reftable"
780             subs <- concat <$> mapM (navPointNode formatter) children
781             return [formatter n tit src subs]
782      navPointNode formatter (Div _ bs) =
783        concat <$> mapM (navPointNode formatter) bs
784      navPointNode _ _ = return []
785
786  let navMapFormatter :: Int -> [Inline] -> TS.Text -> [Element] -> Element
787      navMapFormatter n tit src subs = unode "navPoint" !
788               [("id", "navPoint-" ++ show n)] $
789                  [ unode "navLabel" $ unode "text" $ TS.unpack $ stringify tit
790                  , unode "content" ! [("src", "text/" <> TS.unpack src)] $ ()
791                  ] ++ subs
792
793  let tpNode = unode "navPoint" !  [("id", "navPoint-0")] $
794                  [ unode "navLabel" $ unode "text" (TS.unpack $ stringify $ docTitle' meta)
795                  , unode "content" ! [("src", "text/title_page.xhtml")]
796                  $ () ]
797
798  navMap <- lift $ evalStateT
799             (concat <$> mapM (navPointNode navMapFormatter) secs) 1
800  let tocData = UTF8.fromStringLazy $ ppTopElement $
801        unode "ncx" ! [("version","2005-1")
802                       ,("xmlns","http://www.daisy.org/z3986/2005/ncx/")] $
803          [ unode "head" $
804             [ unode "meta" ! [("name","dtb:uid")
805                              ,("content", uuid)] $ ()
806             , unode "meta" ! [("name","dtb:depth")
807                              ,("content", "1")] $ ()
808             , unode "meta" ! [("name","dtb:totalPageCount")
809                              ,("content", "0")] $ ()
810             , unode "meta" ! [("name","dtb:maxPageNumber")
811                              ,("content", "0")] $ ()
812             ] ++ case epubCoverImage metadata of
813                        Nothing  -> []
814                        Just img -> [unode "meta" ! [("name","cover"),
815                                            ("content", toId img)] $ ()]
816          , unode "docTitle" $ unode "text" plainTitle
817          , unode "navMap" $
818              tpNode : navMap
819          ]
820  tocEntry <- mkEntry "toc.ncx" tocData
821
822  let navXhtmlFormatter :: Int -> [Inline] -> TS.Text -> [Element] -> Element
823      navXhtmlFormatter n tit src subs = unode "li" !
824                                       [("id", "toc-li-" ++ show n)] $
825                                            (unode "a" !
826                                                [("href", "text/" <> TS.unpack src)]
827                                             $ titElements)
828                                            : case subs of
829                                                 []    -> []
830                                                 (_:_) -> [unode "ol" ! [("class","toc")] $ subs]
831          where titElements = parseXML titRendered
832                titRendered = case P.runPure
833                               (writeHtmlStringForEPUB version
834                                 opts{ writerTemplate = Nothing
835                                     , writerVariables =
836                                         Context (M.fromList
837                                           [("pagetitle", toVal $
838                                             escapeStringForXML $ TS.pack plainTitle)])
839                                       <> writerVariables opts}
840                                 (Pandoc nullMeta
841                                   [Plain $ walk clean tit])) of
842                                Left _  -> stringify tit
843                                Right x -> x
844                -- can't have <a> elements inside generated links...
845                clean (Link _ ils _) = Span ("", [], []) ils
846                clean (Note _)       = Str ""
847                clean x              = x
848
849  let navtag = if epub3 then "nav" else "div"
850  tocBlocks <- lift $ evalStateT
851                 (concat <$> mapM (navPointNode navXhtmlFormatter) secs) 1
852  let navBlocks = [RawBlock (Format "html")
853                  $ TS.pack $ showElement $ -- prettyprinting introduces bad spaces
854                   unode navtag ! ([("epub:type","toc") | epub3] ++
855                                   [("id","toc")]) $
856                    [ unode "h1" ! [("id","toc-title")] $ tocTitle
857                    , unode "ol" ! [("class","toc")] $ tocBlocks ]]
858  let landmarkItems = if epub3
859                         then unode "li"
860                                [ unode "a" ! [("href",
861                                                  "text/title_page.xhtml")
862                                               ,("epub:type", "titlepage")] $
863                                  ("Title Page" :: String) ] :
864                              [ unode "li"
865                                [ unode "a" ! [("href", "text/cover.xhtml")
866                                              ,("epub:type", "cover")] $
867                                  ("Cover" :: String)] |
868                                  isJust (epubCoverImage metadata)
869                              ] ++
870                              [ unode "li"
871                                [ unode "a" ! [("href", "#toc")
872                                              ,("epub:type", "toc")] $
873                                    ("Table of Contents" :: String)
874                                ] | writerTableOfContents opts
875                              ]
876                         else []
877  let landmarks = [RawBlock (Format "html") $ TS.pack $ ppElement $
878                    unode "nav" ! [("epub:type","landmarks")
879                                  ,("id","landmarks")
880                                  ,("hidden","hidden")] $
881                    [ unode "ol" landmarkItems ]
882                  | not (null landmarkItems)]
883  navData <- lift $ writeHtml opts'{ writerVariables =
884                     Context (M.fromList [("navpage", toVal' "true")
885                                         ,("body-type",  toVal' "frontmatter")
886                                         ])
887                     <> cssvars False <> vars }
888            (Pandoc (setMeta "title"
889                     (walk removeNote $ fromList $ docTitle' meta) nullMeta)
890               (navBlocks ++ landmarks))
891  navEntry <- mkEntry "nav.xhtml" navData
892
893  -- mimetype
894  mimetypeEntry <- mkEntry "mimetype" $
895                        UTF8.fromStringLazy "application/epub+zip"
896
897  -- container.xml
898  let containerData = UTF8.fromStringLazy $ ppTopElement $
899       unode "container" ! [("version","1.0")
900              ,("xmlns","urn:oasis:names:tc:opendocument:xmlns:container")] $
901         unode "rootfiles" $
902           unode "rootfile" ! [("full-path",
903                    (if null epubSubdir
904                        then ""
905                        else epubSubdir ++ "/") ++ "content.opf")
906               ,("media-type","application/oebps-package+xml")] $ ()
907  containerEntry <- mkEntry "META-INF/container.xml" containerData
908
909  -- com.apple.ibooks.display-options.xml
910  let apple = UTF8.fromStringLazy $ ppTopElement $
911        unode "display_options" $
912          unode "platform" ! [("name","*")] $
913            unode "option" ! [("name","specified-fonts")] $ ("true" :: String)
914  appleEntry <- mkEntry "META-INF/com.apple.ibooks.display-options.xml" apple
915
916  -- construct archive
917  let archive = foldr addEntryToArchive emptyArchive $
918                 [mimetypeEntry, containerEntry, appleEntry,
919                  contentsEntry, tocEntry, navEntry, tpEntry] ++
920                  stylesheetEntries ++ picEntries ++ cpicEntry ++
921                  cpgEntry ++ chapterEntries ++ fontEntries
922  return $ fromArchive archive
923
924metadataElement :: EPUBVersion -> EPUBMetadata -> UTCTime -> Element
925metadataElement version md currentTime =
926  unode "metadata" ! [("xmlns:dc","http://purl.org/dc/elements/1.1/")
927                     ,("xmlns:opf","http://www.idpf.org/2007/opf")] $ mdNodes
928  where mdNodes = identifierNodes ++ titleNodes ++ dateNodes
929                  ++ languageNodes ++ ibooksNodes ++ calibreNodes
930                  ++ creatorNodes ++ contributorNodes ++ subjectNodes
931                  ++ descriptionNodes ++ typeNodes ++ formatNodes
932                  ++ publisherNodes ++ sourceNodes ++ relationNodes
933                  ++ coverageNodes ++ rightsNodes ++ coverImageNodes
934                  ++ modifiedNodes
935        withIds base f = concat . zipWith f (map (\x -> base ++ ('-' : show x))
936                         ([1..] :: [Int]))
937        identifierNodes = withIds "epub-id" toIdentifierNode $
938                          epubIdentifier md
939        titleNodes = withIds "epub-title" toTitleNode $ epubTitle md
940        dateNodes = if version == EPUB2
941                       then withIds "epub-date" toDateNode $ epubDate md
942                       else -- epub3 allows only one dc:date
943                            -- http://www.idpf.org/epub/30/spec/epub30-publications.html#sec-opf-dcdate
944                            case epubDate md of
945                                 [] -> []
946                                 (x:_) -> [dcNode "date" ! [("id","epub-date")]
947                                            $ dateText x]
948        ibooksNodes = map ibooksNode (epubIbooksFields md)
949        ibooksNode (k, v) = unode "meta" ! [("property", "ibooks:" ++ k)] $ v
950        calibreNodes = map calibreNode (epubCalibreFields md)
951        calibreNode (k, v) = unode "meta" ! [("name", "calibre:" ++ k),
952                                             ("content", v)] $ ()
953        languageNodes = [dcTag "language" $ epubLanguage md]
954        creatorNodes = withIds "epub-creator" (toCreatorNode "creator") $
955                       epubCreator md
956        contributorNodes = withIds "epub-contributor"
957                           (toCreatorNode "contributor") $ epubContributor md
958        subjectNodes = map (dcTag "subject") $ epubSubject md
959        descriptionNodes = maybe [] (dcTag' "description") $ epubDescription md
960        typeNodes = maybe [] (dcTag' "type") $ epubType md
961        formatNodes = maybe [] (dcTag' "format") $ epubFormat md
962        publisherNodes = maybe [] (dcTag' "publisher") $ epubPublisher md
963        sourceNodes = maybe [] (dcTag' "source") $ epubSource md
964        relationNodes = maybe [] (dcTag' "relation") $ epubRelation md
965        coverageNodes = maybe [] (dcTag' "coverage") $ epubCoverage md
966        rightsNodes = maybe [] (dcTag' "rights") $ epubRights md
967        coverImageNodes = maybe []
968            (\img -> [unode "meta" !  [("name","cover"),
969                                       ("content",toId img)] $ ()])
970            $ epubCoverImage md
971        modifiedNodes = [ unode "meta" ! [("property", "dcterms:modified")] $
972               showDateTimeISO8601 currentTime | version == EPUB3 ]
973        dcTag n s = unode ("dc:" ++ n) s
974        dcTag' n s = [dcTag n s]
975        toIdentifierNode id' (Identifier txt scheme)
976          | version == EPUB2 = [dcNode "identifier" !
977              (("id",id') : maybe [] (\x -> [("opf:scheme", x)]) scheme) $
978              txt]
979          | otherwise = (dcNode "identifier" ! [("id",id')] $ txt) :
980              maybe [] ((\x -> [unode "meta" !
981                                [ ("refines",'#':id')
982                                , ("property","identifier-type")
983                                , ("scheme","onix:codelist5")
984                                ]
985                                $ x
986                               ])
987                        . schemeToOnix)
988                    scheme
989        toCreatorNode s id' creator
990          | version == EPUB2 = [dcNode s !
991             (("id",id') :
992              maybe [] (\x -> [("opf:file-as",x)]) (creatorFileAs creator) ++
993              maybe [] (\x -> [("opf:role",x)])
994               (creatorRole creator >>= toRelator)) $ creatorText creator]
995          | otherwise = [dcNode s ! [("id",id')] $ creatorText creator] ++
996              maybe [] (\x -> [unode "meta" !
997                   [("refines",'#':id'),("property","file-as")] $ x])
998                   (creatorFileAs creator) ++
999              maybe [] (\x -> [unode "meta" !
1000                   [("refines",'#':id'),("property","role"),
1001                     ("scheme","marc:relators")] $ x])
1002                   (creatorRole creator >>= toRelator)
1003        toTitleNode id' title
1004          | version == EPUB2 = [dcNode "title" !
1005             (("id",id') :
1006              -- note: EPUB2 doesn't accept opf:title-type
1007              maybe [] (\x -> [("opf:file-as",x)]) (titleFileAs title)) $
1008              titleText title]
1009          | otherwise = [dcNode "title" ! [("id",id')] $ titleText title]
1010              ++
1011              maybe [] (\x -> [unode "meta" !
1012                   [("refines",'#':id'),("property","file-as")] $ x])
1013                   (titleFileAs title) ++
1014              maybe [] (\x -> [unode "meta" !
1015                   [("refines",'#':id'),("property","title-type")] $ x])
1016                   (titleType title)
1017        toDateNode id' date = [dcNode "date" !
1018             (("id",id') :
1019                maybe [] (\x -> [("opf:event",x)]) (dateEvent date)) $
1020                 dateText date]
1021        schemeToOnix :: String -> String
1022        schemeToOnix "ISBN-10"              = "02"
1023        schemeToOnix "GTIN-13"              = "03"
1024        schemeToOnix "UPC"                  = "04"
1025        schemeToOnix "ISMN-10"              = "05"
1026        schemeToOnix "DOI"                  = "06"
1027        schemeToOnix "LCCN"                 = "13"
1028        schemeToOnix "GTIN-14"              = "14"
1029        schemeToOnix "ISBN-13"              = "15"
1030        schemeToOnix "Legal deposit number" = "17"
1031        schemeToOnix "URN"                  = "22"
1032        schemeToOnix "OCLC"                 = "23"
1033        schemeToOnix "ISMN-13"              = "25"
1034        schemeToOnix "ISBN-A"               = "26"
1035        schemeToOnix "JP"                   = "27"
1036        schemeToOnix "OLCC"                 = "28"
1037        schemeToOnix _                      = "01"
1038
1039showDateTimeISO8601 :: UTCTime -> String
1040showDateTimeISO8601 = formatTime defaultTimeLocale "%FT%TZ"
1041
1042transformTag :: PandocMonad m
1043             => Tag TS.Text
1044             -> E m (Tag TS.Text)
1045transformTag tag@(TagOpen name attr)
1046  | name `elem` ["video", "source", "img", "audio"] &&
1047    isNothing (lookup "data-external" attr) = do
1048  let src = fromAttrib "src" tag
1049  let poster = fromAttrib "poster" tag
1050  newsrc <- modifyMediaRef $ TS.unpack src
1051  newposter <- modifyMediaRef $ TS.unpack poster
1052  let attr' = filter (\(x,_) -> x /= "src" && x /= "poster") attr ++
1053              [("src", "../" <> newsrc) | not (TS.null newsrc)] ++
1054              [("poster", "../" <> newposter) | not (TS.null newposter)]
1055  return $ TagOpen name attr'
1056transformTag tag = return tag
1057
1058modifyMediaRef :: PandocMonad m
1059               => FilePath
1060               -> E m TS.Text
1061modifyMediaRef "" = return ""
1062modifyMediaRef oldsrc = do
1063  media <- gets stMediaPaths
1064  case lookup oldsrc media of
1065         Just (n,_) -> return $ TS.pack n
1066         Nothing    -> catchError
1067           (do (img, mbMime) <- P.fetchItem $ TS.pack oldsrc
1068               let ext = maybe (takeExtension (takeWhile (/='?') oldsrc)) TS.unpack
1069                         (("." <>) <$> (mbMime >>= extensionFromMimeType))
1070               newName <- getMediaNextNewName ext
1071               let newPath = "media/" ++ newName
1072               entry <- mkEntry newPath (B.fromChunks . (:[]) $ img)
1073               modify $ \st -> st{ stMediaPaths =
1074                            (oldsrc, (newPath, Just entry)):media}
1075               return $ TS.pack newPath)
1076           (\e -> do
1077                report $ CouldNotFetchResource (TS.pack oldsrc) (tshow e)
1078                return $ TS.pack oldsrc)
1079
1080getMediaNextNewName :: PandocMonad m => String -> E m String
1081getMediaNextNewName ext = do
1082  nextId <- gets stMediaNextId
1083  modify $ \st -> st { stMediaNextId = nextId + 1 }
1084  return $ "file" ++ show nextId ++ ext
1085
1086isHtmlFormat :: Format -> Bool
1087isHtmlFormat (Format "html") = True
1088isHtmlFormat (Format "html4") = True
1089isHtmlFormat (Format "html5") = True
1090isHtmlFormat _ = False
1091
1092transformBlock  :: PandocMonad m
1093                => Block
1094                -> E m Block
1095transformBlock (RawBlock fmt raw)
1096  | isHtmlFormat fmt = do
1097  let tags = parseTags raw
1098  tags' <- mapM transformTag tags
1099  return $ RawBlock fmt (renderTags' tags')
1100transformBlock b = return b
1101
1102transformInline  :: PandocMonad m
1103                 => WriterOptions
1104                 -> Inline
1105                 -> E m Inline
1106transformInline _opts (Image attr lab (src,tit)) = do
1107    newsrc <- modifyMediaRef $ TS.unpack src
1108    return $ Image attr lab ("../" <> newsrc, tit)
1109transformInline opts x@(Math t m)
1110  | WebTeX url <- writerHTMLMathMethod opts = do
1111    newsrc <- modifyMediaRef (TS.unpack url <> urlEncode (TS.unpack m))
1112    let mathclass = if t == DisplayMath then "display" else "inline"
1113    return $ Span ("",["math",mathclass],[])
1114                [Image nullAttr [x] ("../" <> newsrc, "")]
1115transformInline _opts (RawInline fmt raw)
1116  | isHtmlFormat fmt = do
1117  let tags = parseTags raw
1118  tags' <- mapM transformTag tags
1119  return $ RawInline fmt (renderTags' tags')
1120transformInline _ x = return x
1121
1122(!) :: (t -> Element) -> [(String, String)] -> t -> Element
1123(!) f attrs n = add_attrs (map (\(k,v) -> Attr (unqual k) v) attrs) (f n)
1124
1125-- | Version of 'ppTopElement' that specifies UTF-8 encoding.
1126ppTopElement :: Element -> String
1127ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . unEntity . ppElement
1128  -- unEntity removes numeric  entities introduced by ppElement
1129  -- (kindlegen seems to choke on these).
1130  where unEntity [] = ""
1131        unEntity ('&':'#':xs) =
1132                   let (ds,ys) = break (==';') xs
1133                       rest = drop 1 ys
1134                   in  case safeRead (TS.pack $ "'\\" <> ds <> "'") of
1135                          Just x  -> x : unEntity rest
1136                          Nothing -> '&':'#':unEntity xs
1137        unEntity (x:xs) = x : unEntity xs
1138
1139mediaTypeOf :: FilePath -> Maybe MimeType
1140mediaTypeOf x =
1141  let mediaPrefixes = ["image", "video", "audio"] in
1142  case getMimeType x of
1143    Just y | any (`TS.isPrefixOf` y) mediaPrefixes -> Just y
1144    _      -> Nothing
1145
1146-- Returns filename for chapter number.
1147showChapter :: Int -> String
1148showChapter = printf "ch%03d.xhtml"
1149
1150-- Add identifiers to any headers without them.
1151addIdentifiers :: WriterOptions -> [Block] -> [Block]
1152addIdentifiers opts bs = evalState (mapM go bs) Set.empty
1153 where go (Header n (ident,classes,kvs) ils) = do
1154         ids <- get
1155         let ident' = if TS.null ident
1156                         then uniqueIdent (writerExtensions opts) ils ids
1157                         else ident
1158         modify $ Set.insert ident'
1159         return $ Header n (ident',classes,kvs) ils
1160       go x = return x
1161
1162-- Variant of normalizeDate that allows partial dates: YYYY, YYYY-MM
1163normalizeDate' :: String -> Maybe String
1164normalizeDate' = fmap TS.unpack . go . trim . TS.pack
1165  where
1166    go xs
1167      | TS.length xs == 4            -- YYY
1168      , TS.all isDigit xs = Just xs
1169      | (y, s) <- TS.splitAt 4 xs    -- YYY-MM
1170      , Just ('-', m) <- TS.uncons s
1171      , TS.length m == 2
1172      , TS.all isDigit y && TS.all isDigit m = Just xs
1173      | otherwise = normalizeDate xs
1174
1175toRelator :: String -> Maybe String
1176toRelator x
1177  | x `elem` relators = Just x
1178  | otherwise         = lookup (map toLower x) relatorMap
1179
1180relators :: [String]
1181relators = map snd relatorMap
1182
1183relatorMap :: [(String, String)]
1184relatorMap =
1185           [("abridger", "abr")
1186           ,("actor", "act")
1187           ,("adapter", "adp")
1188           ,("addressee", "rcp")
1189           ,("analyst", "anl")
1190           ,("animator", "anm")
1191           ,("annotator", "ann")
1192           ,("appellant", "apl")
1193           ,("appellee", "ape")
1194           ,("applicant", "app")
1195           ,("architect", "arc")
1196           ,("arranger", "arr")
1197           ,("art copyist", "acp")
1198           ,("art director", "adi")
1199           ,("artist", "art")
1200           ,("artistic director", "ard")
1201           ,("assignee", "asg")
1202           ,("associated name", "asn")
1203           ,("attributed name", "att")
1204           ,("auctioneer", "auc")
1205           ,("author", "aut")
1206           ,("author in quotations or text abstracts", "aqt")
1207           ,("author of afterword, colophon, etc.", "aft")
1208           ,("author of dialog", "aud")
1209           ,("author of introduction, etc.", "aui")
1210           ,("autographer", "ato")
1211           ,("bibliographic antecedent", "ant")
1212           ,("binder", "bnd")
1213           ,("binding designer", "bdd")
1214           ,("blurb writer", "blw")
1215           ,("book designer", "bkd")
1216           ,("book producer", "bkp")
1217           ,("bookjacket designer", "bjd")
1218           ,("bookplate designer", "bpd")
1219           ,("bookseller", "bsl")
1220           ,("braille embosser", "brl")
1221           ,("broadcaster", "brd")
1222           ,("calligrapher", "cll")
1223           ,("cartographer", "ctg")
1224           ,("caster", "cas")
1225           ,("censor", "cns")
1226           ,("choreographer", "chr")
1227           ,("cinematographer", "cng")
1228           ,("client", "cli")
1229           ,("collection registrar", "cor")
1230           ,("collector", "col")
1231           ,("collotyper", "clt")
1232           ,("colorist", "clr")
1233           ,("commentator", "cmm")
1234           ,("commentator for written text", "cwt")
1235           ,("compiler", "com")
1236           ,("complainant", "cpl")
1237           ,("complainant-appellant", "cpt")
1238           ,("complainant-appellee", "cpe")
1239           ,("composer", "cmp")
1240           ,("compositor", "cmt")
1241           ,("conceptor", "ccp")
1242           ,("conductor", "cnd")
1243           ,("conservator", "con")
1244           ,("consultant", "csl")
1245           ,("consultant to a project", "csp")
1246           ,("contestant", "cos")
1247           ,("contestant-appellant", "cot")
1248           ,("contestant-appellee", "coe")
1249           ,("contestee", "cts")
1250           ,("contestee-appellant", "ctt")
1251           ,("contestee-appellee", "cte")
1252           ,("contractor", "ctr")
1253           ,("contributor", "ctb")
1254           ,("copyright claimant", "cpc")
1255           ,("copyright holder", "cph")
1256           ,("corrector", "crr")
1257           ,("correspondent", "crp")
1258           ,("costume designer", "cst")
1259           ,("court governed", "cou")
1260           ,("court reporter", "crt")
1261           ,("cover designer", "cov")
1262           ,("creator", "cre")
1263           ,("curator", "cur")
1264           ,("dancer", "dnc")
1265           ,("data contributor", "dtc")
1266           ,("data manager", "dtm")
1267           ,("dedicatee", "dte")
1268           ,("dedicator", "dto")
1269           ,("defendant", "dfd")
1270           ,("defendant-appellant", "dft")
1271           ,("defendant-appellee", "dfe")
1272           ,("degree granting institution", "dgg")
1273           ,("delineator", "dln")
1274           ,("depicted", "dpc")
1275           ,("depositor", "dpt")
1276           ,("designer", "dsr")
1277           ,("director", "drt")
1278           ,("dissertant", "dis")
1279           ,("distribution place", "dbp")
1280           ,("distributor", "dst")
1281           ,("donor", "dnr")
1282           ,("draftsman", "drm")
1283           ,("dubious author", "dub")
1284           ,("editor", "edt")
1285           ,("editor of compilation", "edc")
1286           ,("editor of moving image work", "edm")
1287           ,("electrician", "elg")
1288           ,("electrotyper", "elt")
1289           ,("enacting jurisdiction", "enj")
1290           ,("engineer", "eng")
1291           ,("engraver", "egr")
1292           ,("etcher", "etr")
1293           ,("event place", "evp")
1294           ,("expert", "exp")
1295           ,("facsimilist", "fac")
1296           ,("field director", "fld")
1297           ,("film director", "fmd")
1298           ,("film distributor", "fds")
1299           ,("film editor", "flm")
1300           ,("film producer", "fmp")
1301           ,("filmmaker", "fmk")
1302           ,("first party", "fpy")
1303           ,("forger", "frg")
1304           ,("former owner", "fmo")
1305           ,("funder", "fnd")
1306           ,("geographic information specialist", "gis")
1307           ,("honoree", "hnr")
1308           ,("host", "hst")
1309           ,("host institution", "his")
1310           ,("illuminator", "ilu")
1311           ,("illustrator", "ill")
1312           ,("inscriber", "ins")
1313           ,("instrumentalist", "itr")
1314           ,("interviewee", "ive")
1315           ,("interviewer", "ivr")
1316           ,("inventor", "inv")
1317           ,("issuing body", "isb")
1318           ,("judge", "jud")
1319           ,("jurisdiction governed", "jug")
1320           ,("laboratory", "lbr")
1321           ,("laboratory director", "ldr")
1322           ,("landscape architect", "lsa")
1323           ,("lead", "led")
1324           ,("lender", "len")
1325           ,("libelant", "lil")
1326           ,("libelant-appellant", "lit")
1327           ,("libelant-appellee", "lie")
1328           ,("libelee", "lel")
1329           ,("libelee-appellant", "let")
1330           ,("libelee-appellee", "lee")
1331           ,("librettist", "lbt")
1332           ,("licensee", "lse")
1333           ,("licensor", "lso")
1334           ,("lighting designer", "lgd")
1335           ,("lithographer", "ltg")
1336           ,("lyricist", "lyr")
1337           ,("manufacture place", "mfp")
1338           ,("manufacturer", "mfr")
1339           ,("marbler", "mrb")
1340           ,("markup editor", "mrk")
1341           ,("metadata contact", "mdc")
1342           ,("metal-engraver", "mte")
1343           ,("moderator", "mod")
1344           ,("monitor", "mon")
1345           ,("music copyist", "mcp")
1346           ,("musical director", "msd")
1347           ,("musician", "mus")
1348           ,("narrator", "nrt")
1349           ,("onscreen presenter", "osp")
1350           ,("opponent", "opn")
1351           ,("organizer of meeting", "orm")
1352           ,("originator", "org")
1353           ,("other", "oth")
1354           ,("owner", "own")
1355           ,("panelist", "pan")
1356           ,("papermaker", "ppm")
1357           ,("patent applicant", "pta")
1358           ,("patent holder", "pth")
1359           ,("patron", "pat")
1360           ,("performer", "prf")
1361           ,("permitting agency", "pma")
1362           ,("photographer", "pht")
1363           ,("plaintiff", "ptf")
1364           ,("plaintiff-appellant", "ptt")
1365           ,("plaintiff-appellee", "pte")
1366           ,("platemaker", "plt")
1367           ,("praeses", "pra")
1368           ,("presenter", "pre")
1369           ,("printer", "prt")
1370           ,("printer of plates", "pop")
1371           ,("printmaker", "prm")
1372           ,("process contact", "prc")
1373           ,("producer", "pro")
1374           ,("production company", "prn")
1375           ,("production designer", "prs")
1376           ,("production manager", "pmn")
1377           ,("production personnel", "prd")
1378           ,("production place", "prp")
1379           ,("programmer", "prg")
1380           ,("project director", "pdr")
1381           ,("proofreader", "pfr")
1382           ,("provider", "prv")
1383           ,("publication place", "pup")
1384           ,("publisher", "pbl")
1385           ,("publishing director", "pbd")
1386           ,("puppeteer", "ppt")
1387           ,("radio director", "rdd")
1388           ,("radio producer", "rpc")
1389           ,("recording engineer", "rce")
1390           ,("recordist", "rcd")
1391           ,("redaktor", "red")
1392           ,("renderer", "ren")
1393           ,("reporter", "rpt")
1394           ,("repository", "rps")
1395           ,("research team head", "rth")
1396           ,("research team member", "rtm")
1397           ,("researcher", "res")
1398           ,("respondent", "rsp")
1399           ,("respondent-appellant", "rst")
1400           ,("respondent-appellee", "rse")
1401           ,("responsible party", "rpy")
1402           ,("restager", "rsg")
1403           ,("restorationist", "rsr")
1404           ,("reviewer", "rev")
1405           ,("rubricator", "rbr")
1406           ,("scenarist", "sce")
1407           ,("scientific advisor", "sad")
1408           ,("screenwriter", "aus")
1409           ,("scribe", "scr")
1410           ,("sculptor", "scl")
1411           ,("second party", "spy")
1412           ,("secretary", "sec")
1413           ,("seller", "sll")
1414           ,("set designer", "std")
1415           ,("setting", "stg")
1416           ,("signer", "sgn")
1417           ,("singer", "sng")
1418           ,("sound designer", "sds")
1419           ,("speaker", "spk")
1420           ,("sponsor", "spn")
1421           ,("stage director", "sgd")
1422           ,("stage manager", "stm")
1423           ,("standards body", "stn")
1424           ,("stereotyper", "str")
1425           ,("storyteller", "stl")
1426           ,("supporting host", "sht")
1427           ,("surveyor", "srv")
1428           ,("teacher", "tch")
1429           ,("technical director", "tcd")
1430           ,("television director", "tld")
1431           ,("television producer", "tlp")
1432           ,("thesis advisor", "ths")
1433           ,("transcriber", "trc")
1434           ,("translator", "trl")
1435           ,("type designer", "tyd")
1436           ,("typographer", "tyg")
1437           ,("university place", "uvp")
1438           ,("videographer", "vdg")
1439           ,("witness", "wit")
1440           ,("wood engraver", "wde")
1441           ,("woodcutter", "wdc")
1442           ,("writer of accompanying material", "wam")
1443           ,("writer of added commentary", "wac")
1444           ,("writer of added lyrics", "wal")
1445           ,("writer of added text", "wat")
1446           ]
1447
1448docTitle' :: Meta -> [Inline]
1449docTitle' meta = maybe [] go $ lookupMeta "title" meta
1450  where go (MetaString s) = [Str s]
1451        go (MetaInlines xs) = xs
1452        go (MetaBlocks [Para xs]) = xs
1453        go (MetaBlocks [Plain xs]) = xs
1454        go (MetaMap m) =
1455              case M.lookup "type" m of
1456                   Just x | stringify x == "main" ->
1457                              maybe [] go $ M.lookup "text" m
1458                   _ -> []
1459        go (MetaList xs) = concatMap go xs
1460        go _ = []
1461