1{-# LANGUAGE CPP #-} 2{-# LANGUAGE FlexibleContexts #-} 3{-# LANGUAGE PatternGuards #-} 4{-# LANGUAGE OverloadedStrings #-} 5{-# LANGUAGE ScopedTypeVariables #-} 6{- | 7 Module : Text.Pandoc.Writers.EPUB 8 Copyright : Copyright (C) 2010-2021 John MacFarlane 9 License : GNU GPL, version 2 or above 10 11 Maintainer : John MacFarlane <jgm@berkeley.edu> 12 Stability : alpha 13 Portability : portable 14 15Conversion of 'Pandoc' documents to EPUB. 16-} 17module Text.Pandoc.Writers.EPUB ( writeEPUB2, writeEPUB3 ) where 18import Codec.Archive.Zip (Entry, addEntryToArchive, eRelativePath, emptyArchive, 19 fromArchive, fromEntry, toEntry) 20import Control.Applicative ( (<|>) ) 21import Control.Monad (mplus, unless, when, zipWithM) 22import Control.Monad.Except (catchError, throwError) 23import Control.Monad.State.Strict (StateT, evalState, evalStateT, get, 24 gets, lift, modify) 25import qualified Data.ByteString.Lazy as B 26import qualified Data.ByteString.Lazy.Char8 as B8 27import Data.Char (isAlphaNum, isAscii, isDigit, toLower) 28import Data.List (isInfixOf, isPrefixOf) 29import qualified Data.Map as M 30import Data.Maybe (fromMaybe, isNothing, mapMaybe, isJust) 31import qualified Data.Set as Set 32import qualified Data.Text as TS 33import qualified Data.Text.Lazy as TL 34import Network.HTTP (urlEncode) 35import System.FilePath (takeExtension, takeFileName, makeRelative) 36import Text.HTML.TagSoup (Tag (TagOpen), fromAttrib, parseTags) 37import Text.Pandoc.Builder (fromList, setMeta) 38import Text.Pandoc.Class.PandocMonad (PandocMonad, report) 39import qualified Text.Pandoc.Class.PandocPure as P 40import qualified Text.Pandoc.Class.PandocMonad as P 41import Data.Time 42import Text.Pandoc.Definition 43import Text.Pandoc.Error 44import Text.Pandoc.ImageSize 45import Text.Pandoc.Logging 46import Text.Pandoc.MIME (MimeType, extensionFromMimeType, getMimeType) 47import Text.Pandoc.Options (EPUBVersion (..), HTMLMathMethod (..), 48 ObfuscationMethod (NoObfuscation), WrapOption (..), 49 WriterOptions (..)) 50import Text.Pandoc.Shared (makeSections, normalizeDate, renderTags', 51 safeRead, stringify, trim, uniqueIdent, tshow) 52import qualified Text.Pandoc.UTF8 as UTF8 53import Text.Pandoc.UUID (getRandomUUID) 54import Text.Pandoc.Walk (query, walk, walkM) 55import Text.Pandoc.Writers.HTML (writeHtmlStringForEPUB) 56import Text.Printf (printf) 57import Text.XML.Light (Attr (..), Element (..), Node (..), QName (..), 58 add_attrs, lookupAttr, node, onlyElems, parseXML, 59 ppElement, showElement, strContent, unode, unqual) 60import Text.Pandoc.XML (escapeStringForXML) 61import Text.DocTemplates (FromContext(lookupContext), Context(..), 62 ToContext(toVal), Val(..)) 63 64-- A Chapter includes a list of blocks. 65newtype Chapter = Chapter [Block] 66 deriving (Show) 67 68data EPUBState = EPUBState { 69 stMediaPaths :: [(FilePath, (FilePath, Maybe Entry))] 70 , stMediaNextId :: Int 71 , stEpubSubdir :: String 72 } 73 74type E m = StateT EPUBState m 75 76data EPUBMetadata = EPUBMetadata{ 77 epubIdentifier :: [Identifier] 78 , epubTitle :: [Title] 79 , epubDate :: [Date] 80 , epubLanguage :: String 81 , epubCreator :: [Creator] 82 , epubContributor :: [Creator] 83 , epubSubject :: [String] 84 , epubDescription :: Maybe String 85 , epubType :: Maybe String 86 , epubFormat :: Maybe String 87 , epubPublisher :: Maybe String 88 , epubSource :: Maybe String 89 , epubRelation :: Maybe String 90 , epubCoverage :: Maybe String 91 , epubRights :: Maybe String 92 , epubCoverImage :: Maybe String 93 , epubStylesheets :: [FilePath] 94 , epubPageDirection :: Maybe ProgressionDirection 95 , epubIbooksFields :: [(String, String)] 96 , epubCalibreFields :: [(String, String)] 97 } deriving Show 98 99data Date = Date{ 100 dateText :: String 101 , dateEvent :: Maybe String 102 } deriving Show 103 104data Creator = Creator{ 105 creatorText :: String 106 , creatorRole :: Maybe String 107 , creatorFileAs :: Maybe String 108 } deriving Show 109 110data Identifier = Identifier{ 111 identifierText :: String 112 , identifierScheme :: Maybe String 113 } deriving Show 114 115data Title = Title{ 116 titleText :: String 117 , titleFileAs :: Maybe String 118 , titleType :: Maybe String 119 } deriving Show 120 121data ProgressionDirection = LTR | RTL deriving Show 122 123dcName :: String -> QName 124dcName n = QName n Nothing (Just "dc") 125 126dcNode :: Node t => String -> t -> Element 127dcNode = node . dcName 128 129opfName :: String -> QName 130opfName n = QName n Nothing (Just "opf") 131 132toId :: FilePath -> String 133toId = map (\x -> if isAlphaNum x || x == '-' || x == '_' 134 then x 135 else '_') . takeFileName 136 137removeNote :: Inline -> Inline 138removeNote (Note _) = Str "" 139removeNote x = x 140 141toVal' :: String -> Val TS.Text 142toVal' = toVal . TS.pack 143 144mkEntry :: PandocMonad m => FilePath -> B.ByteString -> E m Entry 145mkEntry path content = do 146 epubSubdir <- gets stEpubSubdir 147 let addEpubSubdir :: Entry -> Entry 148 addEpubSubdir e = e{ eRelativePath = 149 (if null epubSubdir 150 then "" 151 else epubSubdir ++ "/") ++ eRelativePath e } 152 epochtime <- floor <$> lift P.getPOSIXTime 153 return $ 154 (if path == "mimetype" || "META-INF" `isPrefixOf` path 155 then id 156 else addEpubSubdir) $ toEntry path epochtime content 157 158getEPUBMetadata :: PandocMonad m => WriterOptions -> Meta -> E m EPUBMetadata 159getEPUBMetadata opts meta = do 160 let md = metadataFromMeta opts meta 161 let elts = maybe [] (onlyElems . parseXML) $ writerEpubMetadata opts 162 let md' = foldr addMetadataFromXML md elts 163 let addIdentifier m = 164 if null (epubIdentifier m) 165 then do 166 randomId <- getRandomUUID 167 return $ m{ epubIdentifier = [Identifier (show randomId) Nothing] } 168 else return m 169 let addLanguage m = 170 if null (epubLanguage m) 171 then case lookupContext "lang" (writerVariables opts) of 172 Just x -> return m{ epubLanguage = TS.unpack x } 173 Nothing -> do 174 mLang <- lift $ P.lookupEnv "LANG" 175 let localeLang = 176 case mLang of 177 Just lang -> 178 TS.map (\c -> if c == '_' then '-' else c) $ 179 TS.takeWhile (/='.') lang 180 Nothing -> "en-US" 181 return m{ epubLanguage = TS.unpack localeLang } 182 else return m 183 let fixDate m = 184 if null (epubDate m) 185 then do 186 currentTime <- lift P.getCurrentTime 187 return $ m{ epubDate = [ Date{ 188 dateText = showDateTimeISO8601 currentTime 189 , dateEvent = Nothing } ] } 190 else return m 191 let addAuthor m = 192 if any (\c -> creatorRole c == Just "aut") $ epubCreator m 193 then return m 194 else do 195 let authors' = map stringify $ docAuthors meta 196 let toAuthor name = Creator{ creatorText = TS.unpack name 197 , creatorRole = Just "aut" 198 , creatorFileAs = Nothing } 199 return $ m{ epubCreator = map toAuthor authors' ++ epubCreator m } 200 addIdentifier md' >>= fixDate >>= addAuthor >>= addLanguage 201 202addMetadataFromXML :: Element -> EPUBMetadata -> EPUBMetadata 203addMetadataFromXML e@(Element (QName name _ (Just "dc")) attrs _ _) md 204 | name == "identifier" = md{ epubIdentifier = 205 Identifier{ identifierText = strContent e 206 , identifierScheme = lookupAttr (opfName "scheme") attrs 207 } : epubIdentifier md } 208 | name == "title" = md{ epubTitle = 209 Title{ titleText = strContent e 210 , titleFileAs = getAttr "file-as" 211 , titleType = getAttr "type" 212 } : epubTitle md } 213 | name == "date" = md{ epubDate = 214 Date{ dateText = fromMaybe "" $ normalizeDate' $ strContent e 215 , dateEvent = getAttr "event" 216 } : epubDate md } 217 | name == "language" = md{ epubLanguage = strContent e } 218 | name == "creator" = md{ epubCreator = 219 Creator{ creatorText = strContent e 220 , creatorRole = getAttr "role" 221 , creatorFileAs = getAttr "file-as" 222 } : epubCreator md } 223 | name == "contributor" = md{ epubContributor = 224 Creator { creatorText = strContent e 225 , creatorRole = getAttr "role" 226 , creatorFileAs = getAttr "file-as" 227 } : epubContributor md } 228 | name == "subject" = md{ epubSubject = strContent e : epubSubject md } 229 | name == "description" = md { epubDescription = Just $ strContent e } 230 | name == "type" = md { epubType = Just $ strContent e } 231 | name == "format" = md { epubFormat = Just $ strContent e } 232 | name == "type" = md { epubType = Just $ strContent e } 233 | name == "publisher" = md { epubPublisher = Just $ strContent e } 234 | name == "source" = md { epubSource = Just $ strContent e } 235 | name == "relation" = md { epubRelation = Just $ strContent e } 236 | name == "coverage" = md { epubCoverage = Just $ strContent e } 237 | name == "rights" = md { epubRights = Just $ strContent e } 238 | otherwise = md 239 where getAttr n = lookupAttr (opfName n) attrs 240addMetadataFromXML e@(Element (QName "meta" _ _) attrs _ _) md = 241 case getAttr "property" of 242 Just s | "ibooks:" `isPrefixOf` s -> 243 md{ epubIbooksFields = (drop 7 s, strContent e) : 244 epubIbooksFields md } 245 _ -> case getAttr "name" of 246 Just s | "calibre:" `isPrefixOf` s -> 247 md{ epubCalibreFields = 248 (drop 8 s, fromMaybe "" $ getAttr "content") : 249 epubCalibreFields md } 250 _ -> md 251 where getAttr n = lookupAttr (unqual n) attrs 252addMetadataFromXML _ md = md 253 254metaValueToString :: MetaValue -> String 255metaValueToString (MetaString s) = TS.unpack s 256metaValueToString (MetaInlines ils) = TS.unpack $ stringify ils 257metaValueToString (MetaBlocks bs) = TS.unpack $ stringify bs 258metaValueToString (MetaBool True) = "true" 259metaValueToString (MetaBool False) = "false" 260metaValueToString _ = "" 261 262metaValueToPaths :: MetaValue -> [FilePath] 263metaValueToPaths (MetaList xs) = map metaValueToString xs 264metaValueToPaths x = [metaValueToString x] 265 266getList :: TS.Text -> Meta -> (MetaValue -> a) -> [a] 267getList s meta handleMetaValue = 268 case lookupMeta s meta of 269 Just (MetaList xs) -> map handleMetaValue xs 270 Just mv -> [handleMetaValue mv] 271 Nothing -> [] 272 273getIdentifier :: Meta -> [Identifier] 274getIdentifier meta = getList "identifier" meta handleMetaValue 275 where handleMetaValue (MetaMap m) = 276 Identifier{ identifierText = maybe "" metaValueToString 277 $ M.lookup "text" m 278 , identifierScheme = metaValueToString <$> 279 M.lookup "scheme" m } 280 handleMetaValue mv = Identifier (metaValueToString mv) Nothing 281 282getTitle :: Meta -> [Title] 283getTitle meta = getList "title" meta handleMetaValue 284 where handleMetaValue (MetaMap m) = 285 Title{ titleText = maybe "" metaValueToString $ M.lookup "text" m 286 , titleFileAs = metaValueToString <$> M.lookup "file-as" m 287 , titleType = metaValueToString <$> M.lookup "type" m } 288 handleMetaValue mv = Title (metaValueToString mv) Nothing Nothing 289 290getCreator :: TS.Text -> Meta -> [Creator] 291getCreator s meta = getList s meta handleMetaValue 292 where handleMetaValue (MetaMap m) = 293 Creator{ creatorText = maybe "" metaValueToString $ M.lookup "text" m 294 , creatorFileAs = metaValueToString <$> M.lookup "file-as" m 295 , creatorRole = metaValueToString <$> M.lookup "role" m } 296 handleMetaValue mv = Creator (metaValueToString mv) Nothing Nothing 297 298getDate :: TS.Text -> Meta -> [Date] 299getDate s meta = getList s meta handleMetaValue 300 where handleMetaValue (MetaMap m) = 301 Date{ dateText = fromMaybe "" $ 302 M.lookup "text" m >>= normalizeDate' . metaValueToString 303 , dateEvent = metaValueToString <$> M.lookup "event" m } 304 handleMetaValue mv = Date { dateText = fromMaybe "" $ normalizeDate' $ metaValueToString mv 305 , dateEvent = Nothing } 306 307simpleList :: TS.Text -> Meta -> [String] 308simpleList s meta = 309 case lookupMeta s meta of 310 Just (MetaList xs) -> map metaValueToString xs 311 Just x -> [metaValueToString x] 312 Nothing -> [] 313 314metadataFromMeta :: WriterOptions -> Meta -> EPUBMetadata 315metadataFromMeta opts meta = EPUBMetadata{ 316 epubIdentifier = identifiers 317 , epubTitle = titles 318 , epubDate = date 319 , epubLanguage = language 320 , epubCreator = creators 321 , epubContributor = contributors 322 , epubSubject = subjects 323 , epubDescription = description 324 , epubType = epubtype 325 , epubFormat = format 326 , epubPublisher = publisher 327 , epubSource = source 328 , epubRelation = relation 329 , epubCoverage = coverage 330 , epubRights = rights 331 , epubCoverImage = coverImage 332 , epubStylesheets = stylesheets 333 , epubPageDirection = pageDirection 334 , epubIbooksFields = ibooksFields 335 , epubCalibreFields = calibreFields 336 } 337 where identifiers = getIdentifier meta 338 titles = getTitle meta 339 date = getDate "date" meta 340 language = maybe "" metaValueToString $ 341 lookupMeta "language" meta `mplus` lookupMeta "lang" meta 342 creators = getCreator "creator" meta 343 contributors = getCreator "contributor" meta 344 subjects = simpleList "subject" meta 345 description = metaValueToString <$> lookupMeta "description" meta 346 epubtype = metaValueToString <$> lookupMeta "type" meta 347 format = metaValueToString <$> lookupMeta "format" meta 348 publisher = metaValueToString <$> lookupMeta "publisher" meta 349 source = metaValueToString <$> lookupMeta "source" meta 350 relation = metaValueToString <$> lookupMeta "relation" meta 351 coverage = metaValueToString <$> lookupMeta "coverage" meta 352 rights = metaValueToString <$> lookupMeta "rights" meta 353 coverImage = 354 (TS.unpack <$> lookupContext "epub-cover-image" 355 (writerVariables opts)) 356 `mplus` (metaValueToString <$> lookupMeta "cover-image" meta) 357 mCss = lookupMeta "css" meta <|> lookupMeta "stylesheet" meta 358 stylesheets = maybe [] metaValueToPaths mCss ++ 359 case lookupContext "css" (writerVariables opts) of 360 Just xs -> map TS.unpack xs 361 Nothing -> 362 case lookupContext "css" (writerVariables opts) of 363 Just x -> [TS.unpack x] 364 Nothing -> [] 365 pageDirection = case map toLower . metaValueToString <$> 366 lookupMeta "page-progression-direction" meta of 367 Just "ltr" -> Just LTR 368 Just "rtl" -> Just RTL 369 _ -> Nothing 370 ibooksFields = case lookupMeta "ibooks" meta of 371 Just (MetaMap mp) 372 -> M.toList $ M.mapKeys TS.unpack $ M.map metaValueToString mp 373 _ -> [] 374 calibreFields = case lookupMeta "calibre" meta of 375 Just (MetaMap mp) 376 -> M.toList $ M.mapKeys TS.unpack $ M.map metaValueToString mp 377 _ -> [] 378 379-- | Produce an EPUB2 file from a Pandoc document. 380writeEPUB2 :: PandocMonad m 381 => WriterOptions -- ^ Writer options 382 -> Pandoc -- ^ Document to convert 383 -> m B.ByteString 384writeEPUB2 = writeEPUB EPUB2 385 386-- | Produce an EPUB3 file from a Pandoc document. 387writeEPUB3 :: PandocMonad m 388 => WriterOptions -- ^ Writer options 389 -> Pandoc -- ^ Document to convert 390 -> m B.ByteString 391writeEPUB3 = writeEPUB EPUB3 392 393-- | Produce an EPUB file from a Pandoc document. 394writeEPUB :: PandocMonad m 395 => EPUBVersion 396 -> WriterOptions -- ^ Writer options 397 -> Pandoc -- ^ Document to convert 398 -> m B.ByteString 399writeEPUB epubVersion opts doc = do 400 let epubSubdir = writerEpubSubdirectory opts 401 -- sanity check on epubSubdir 402 unless (TS.all (\c -> isAscii c && isAlphaNum c) epubSubdir) $ 403 throwError $ PandocEpubSubdirectoryError epubSubdir 404 let initState = EPUBState { stMediaPaths = [], stMediaNextId = 0, stEpubSubdir = TS.unpack epubSubdir } 405 evalStateT (pandocToEPUB epubVersion opts doc) initState 406 407pandocToEPUB :: PandocMonad m 408 => EPUBVersion 409 -> WriterOptions 410 -> Pandoc 411 -> E m B.ByteString 412pandocToEPUB version opts doc = do 413 -- handle pictures 414 Pandoc meta blocks <- walkM (transformInline opts) doc >>= 415 walkM transformBlock 416 picEntries <- mapMaybe (snd . snd) <$> gets stMediaPaths 417 418 epubSubdir <- gets stEpubSubdir 419 let epub3 = version == EPUB3 420 let writeHtml o = fmap (UTF8.fromTextLazy . TL.fromStrict) . 421 writeHtmlStringForEPUB version o 422 metadata <- getEPUBMetadata opts meta 423 424 let plainTitle = case docTitle' meta of 425 [] -> case epubTitle metadata of 426 [] -> "UNTITLED" 427 (x:_) -> titleText x 428 x -> TS.unpack $ stringify x 429 430 -- stylesheet 431 stylesheets <- case epubStylesheets metadata of 432 [] -> (\x -> [B.fromChunks [x]]) <$> 433 P.readDataFile "epub.css" 434 fs -> mapM P.readFileLazy fs 435 stylesheetEntries <- zipWithM 436 (\bs n -> mkEntry ("styles/stylesheet" ++ show n ++ ".css") bs) 437 stylesheets [(1 :: Int)..] 438 439 let vars = Context $ 440 M.delete "css" . 441 M.insert "epub3" 442 (toVal' $ if epub3 then "true" else "false") . 443 M.insert "lang" (toVal' $ epubLanguage metadata) 444 $ unContext $ writerVariables opts 445 446 let cssvars useprefix = Context $ M.insert "css" 447 (ListVal $ map 448 (\e -> toVal' $ 449 (if useprefix then "../" else "") <> 450 makeRelative epubSubdir (eRelativePath e)) 451 stylesheetEntries) 452 mempty 453 454 let opts' = opts{ writerEmailObfuscation = NoObfuscation 455 , writerSectionDivs = True 456 , writerVariables = vars 457 , writerHTMLMathMethod = 458 if epub3 459 then MathML 460 else writerHTMLMathMethod opts 461 , writerWrapText = WrapAuto } 462 463 -- cover page 464 (cpgEntry, cpicEntry) <- 465 case epubCoverImage metadata of 466 Nothing -> return ([],[]) 467 Just img -> do 468 let fp = takeFileName img 469 mediaPaths <- gets (map (fst . snd) . stMediaPaths) 470 coverImageName <- -- see #4206 471 if ("media/" <> fp) `elem` mediaPaths 472 then getMediaNextNewName (takeExtension fp) 473 else return fp 474 imgContent <- lift $ P.readFileLazy img 475 (coverImageWidth, coverImageHeight) <- 476 case imageSize opts' (B.toStrict imgContent) of 477 Right sz -> return $ sizeInPixels sz 478 Left err' -> (0, 0) <$ report 479 (CouldNotDetermineImageSize (TS.pack img) err') 480 cpContent <- lift $ writeHtml 481 opts'{ writerVariables = 482 Context (M.fromList [ 483 ("coverpage", toVal' "true"), 484 ("pagetitle", toVal $ 485 escapeStringForXML $ TS.pack plainTitle), 486 ("cover-image", toVal' coverImageName), 487 ("cover-image-width", toVal' $ 488 show coverImageWidth), 489 ("cover-image-height", toVal' $ 490 show coverImageHeight)]) <> 491 cssvars True <> vars } 492 (Pandoc meta []) 493 coverEntry <- mkEntry "text/cover.xhtml" cpContent 494 coverImageEntry <- mkEntry ("media/" ++ coverImageName) 495 imgContent 496 return ( [ coverEntry ] 497 , [ coverImageEntry ] ) 498 499 -- title page 500 tpContent <- lift $ writeHtml opts'{ 501 writerVariables = 502 Context (M.fromList [ 503 ("titlepage", toVal' "true"), 504 ("body-type", toVal' "frontmatter"), 505 ("pagetitle", toVal $ 506 escapeStringForXML $ TS.pack plainTitle)]) 507 <> cssvars True <> vars } 508 (Pandoc meta []) 509 tpEntry <- mkEntry "text/title_page.xhtml" tpContent 510 511 -- handle fonts 512 let matchingGlob f = do 513 xs <- lift $ P.glob f 514 when (null xs) $ 515 report $ CouldNotFetchResource (TS.pack f) "glob did not match any font files" 516 return xs 517 let mkFontEntry f = mkEntry ("fonts/" ++ takeFileName f) =<< 518 lift (P.readFileLazy f) 519 fontFiles <- concat <$> mapM matchingGlob (writerEpubFonts opts') 520 fontEntries <- mapM mkFontEntry fontFiles 521 522 -- set page progression direction attribution 523 let progressionDirection = case epubPageDirection metadata of 524 Just LTR | epub3 -> 525 [("page-progression-direction", "ltr")] 526 Just RTL | epub3 -> 527 [("page-progression-direction", "rtl")] 528 _ -> [] 529 530 -- body pages 531 532 let chapterHeaderLevel = writerEpubChapterLevel opts 533 534 let isChapterHeader (Div _ (Header n _ _:_)) = n <= chapterHeaderLevel 535 isChapterHeader _ = False 536 537 let secsToChapters :: [Block] -> [Chapter] 538 secsToChapters [] = [] 539 secsToChapters (d@(Div attr (h@(Header lvl _ _) : bs)) : rest) 540 | chapterHeaderLevel == lvl = 541 Chapter [d] : secsToChapters rest 542 | chapterHeaderLevel > lvl = 543 Chapter [Div attr (h:xs)] : 544 secsToChapters ys ++ secsToChapters rest 545 where (xs, ys) = break isChapterHeader bs 546 secsToChapters bs = 547 (if null xs then id else (Chapter xs :)) $ secsToChapters ys 548 where (xs, ys) = break isChapterHeader bs 549 550 -- add level 1 header to beginning if none there 551 let secs = makeSections True Nothing 552 $ addIdentifiers opts 553 $ case blocks of 554 (Div _ 555 (Header{}:_) : _) -> blocks 556 (Header 1 _ _ : _) -> blocks 557 _ -> Header 1 ("",["unnumbered"],[]) 558 (docTitle' meta) : blocks 559 560 let chapters' = secsToChapters secs 561 562 let extractLinkURL' :: Int -> Inline -> [(TS.Text, TS.Text)] 563 extractLinkURL' num (Span (ident, _, _) _) 564 | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)] 565 extractLinkURL' num (Link (ident, _, _) _ _) 566 | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)] 567 extractLinkURL' num (Image (ident, _, _) _ _) 568 | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)] 569 extractLinkURL' num (RawInline fmt raw) 570 | isHtmlFormat fmt 571 = foldr (\tag -> 572 case tag of 573 TagOpen{} -> 574 case fromAttrib "id" tag of 575 "" -> id 576 x -> ((x, TS.pack (showChapter num) <> "#" <> x):) 577 _ -> id) 578 [] (parseTags raw) 579 extractLinkURL' _ _ = [] 580 581 let extractLinkURL :: Int -> Block -> [(TS.Text, TS.Text)] 582 extractLinkURL num (Div (ident, _, _) _) 583 | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)] 584 extractLinkURL num (Header _ (ident, _, _) _) 585 | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)] 586 extractLinkURL num (Table (ident,_,_) _ _ _ _ _) 587 | not (TS.null ident) = [(ident, TS.pack (showChapter num) <> "#" <> ident)] 588 extractLinkURL num (RawBlock fmt raw) 589 | isHtmlFormat fmt 590 = foldr (\tag -> 591 case tag of 592 TagOpen{} -> 593 case fromAttrib "id" tag of 594 "" -> id 595 x -> ((x, TS.pack (showChapter num) <> "#" <> x):) 596 _ -> id) 597 [] (parseTags raw) 598 extractLinkURL num b = query (extractLinkURL' num) b 599 600 let reftable = concat $ zipWith (\(Chapter bs) num -> 601 query (extractLinkURL num) bs) 602 chapters' [1..] 603 604 let fixInternalReferences :: Inline -> Inline 605 fixInternalReferences (Link attr lab (src, tit)) 606 | Just ('#', xs) <- TS.uncons src = case lookup xs reftable of 607 Just ys -> Link attr lab (ys, tit) 608 Nothing -> Link attr lab (src, tit) 609 fixInternalReferences x = x 610 611 -- internal reference IDs change when we chunk the file, 612 -- so that '#my-header-1' might turn into 'chap004.xhtml#my-header'. 613 -- this fixes that: 614 let chapters = map (\(Chapter bs) -> 615 Chapter $ walk fixInternalReferences bs) 616 chapters' 617 618 let chapToEntry num (Chapter bs) = 619 mkEntry ("text/" ++ showChapter num) =<< 620 writeHtml opts'{ writerVariables = 621 Context (M.fromList 622 [("body-type", toVal' bodyType), 623 ("pagetitle", toVal' $ 624 showChapter num)]) 625 <> cssvars True <> vars } pdoc 626 where (pdoc, bodyType) = 627 case bs of 628 (Div (_,"section":_,kvs) 629 (Header _ _ xs : _) : _) -> 630 -- remove notes or we get doubled footnotes 631 (Pandoc (setMeta "title" 632 (walk removeNote $ fromList xs) nullMeta) bs, 633 case lookup "epub:type" kvs of 634 Nothing -> "bodymatter" 635 Just x 636 | x `elem` frontMatterTypes -> "frontmatter" 637 | x `elem` backMatterTypes -> "backmatter" 638 | otherwise -> "bodymatter") 639 _ -> (Pandoc nullMeta bs, "bodymatter") 640 frontMatterTypes = ["prologue", "abstract", "acknowledgments", 641 "copyright-page", "dedication", 642 "credits", "keywords", "imprint", 643 "contributors", "other-credits", 644 "errata", "revision-history", 645 "titlepage", "halftitlepage", "seriespage", 646 "foreword", "preface", 647 "seriespage", "titlepage"] 648 backMatterTypes = ["appendix", "colophon", "bibliography", 649 "index"] 650 651 chapterEntries <- zipWithM chapToEntry [1..] chapters 652 653 -- incredibly inefficient (TODO): 654 let containsMathML ent = epub3 && 655 "<math" `isInfixOf` 656 B8.unpack (fromEntry ent) 657 let containsSVG ent = epub3 && 658 "<svg" `isInfixOf` 659 B8.unpack (fromEntry ent) 660 let props ent = ["mathml" | containsMathML ent] ++ ["svg" | containsSVG ent] 661 662 -- contents.opf 663 let chapterNode ent = unode "item" ! 664 ([("id", toId $ makeRelative epubSubdir 665 $ eRelativePath ent), 666 ("href", makeRelative epubSubdir 667 $ eRelativePath ent), 668 ("media-type", "application/xhtml+xml")] 669 ++ case props ent of 670 [] -> [] 671 xs -> [("properties", unwords xs)]) 672 $ () 673 674 let chapterRefNode ent = unode "itemref" ! 675 [("idref", toId $ makeRelative epubSubdir 676 $ eRelativePath ent)] $ () 677 let pictureNode ent = unode "item" ! 678 [("id", toId $ makeRelative epubSubdir 679 $ eRelativePath ent), 680 ("href", makeRelative epubSubdir 681 $ eRelativePath ent), 682 ("media-type", 683 maybe "application/octet-stream" TS.unpack 684 $ mediaTypeOf $ eRelativePath ent)] $ () 685 let fontNode ent = unode "item" ! 686 [("id", toId $ makeRelative epubSubdir 687 $ eRelativePath ent), 688 ("href", makeRelative epubSubdir 689 $ eRelativePath ent), 690 ("media-type", maybe "" TS.unpack $ 691 getMimeType $ eRelativePath ent)] $ () 692 693 let tocTitle = maybe plainTitle 694 metaValueToString $ lookupMeta "toc-title" meta 695 uuid <- case epubIdentifier metadata of 696 (x:_) -> return $ identifierText x -- use first identifier as UUID 697 [] -> throwError $ PandocShouldNeverHappenError "epubIdentifier is null" -- shouldn't happen 698 currentTime <- lift P.getCurrentTime 699 let contentsData = UTF8.fromStringLazy $ ppTopElement $ 700 unode "package" ! 701 ([("version", case version of 702 EPUB2 -> "2.0" 703 EPUB3 -> "3.0") 704 ,("xmlns","http://www.idpf.org/2007/opf") 705 ,("unique-identifier","epub-id-1") 706 ] ++ 707 [("prefix","ibooks: http://vocabulary.itunes.apple.com/rdf/ibooks/vocabulary-extensions-1.0/") | version == EPUB3]) $ 708 [ metadataElement version metadata currentTime 709 , unode "manifest" $ 710 [ unode "item" ! [("id","ncx"), ("href","toc.ncx") 711 ,("media-type","application/x-dtbncx+xml")] $ () 712 , unode "item" ! ([("id","nav") 713 ,("href","nav.xhtml") 714 ,("media-type","application/xhtml+xml")] ++ 715 [("properties","nav") | epub3 ]) $ () 716 ] ++ 717 [ unode "item" ! [("id","stylesheet" ++ show n), ("href",fp) 718 ,("media-type","text/css")] $ () | 719 (n :: Int, fp) <- zip [1..] (map 720 (makeRelative epubSubdir . eRelativePath) 721 stylesheetEntries) ] ++ 722 map chapterNode (cpgEntry ++ (tpEntry : chapterEntries)) ++ 723 (case cpicEntry of 724 [] -> [] 725 (x:_) -> [add_attrs 726 [Attr (unqual "properties") "cover-image" | epub3] 727 (pictureNode x)]) ++ 728 map pictureNode picEntries ++ 729 map fontNode fontEntries 730 , unode "spine" ! ( 731 ("toc","ncx") : progressionDirection) $ 732 case epubCoverImage metadata of 733 Nothing -> [] 734 Just _ -> [ unode "itemref" ! 735 [("idref", "cover_xhtml")] $ () ] 736 ++ ((unode "itemref" ! [("idref", "title_page_xhtml") 737 ,("linear", 738 case lookupMeta "title" meta of 739 Just _ -> "yes" 740 Nothing -> "no")] $ ()) : 741 [unode "itemref" ! [("idref", "nav")] $ () 742 | writerTableOfContents opts ] ++ 743 map chapterRefNode chapterEntries) 744 , unode "guide" $ 745 (unode "reference" ! 746 [("type","toc"),("title", tocTitle), 747 ("href","nav.xhtml")] $ () 748 ) : 749 [ unode "reference" ! 750 [("type","cover") 751 ,("title","Cover") 752 ,("href","text/cover.xhtml")] $ () 753 | isJust (epubCoverImage metadata) 754 ] 755 ] 756 contentsEntry <- mkEntry "content.opf" contentsData 757 758 -- toc.ncx 759 let tocLevel = writerTOCDepth opts 760 761 let navPointNode :: PandocMonad m 762 => (Int -> [Inline] -> TS.Text -> [Element] -> Element) 763 -> Block -> StateT Int m [Element] 764 navPointNode formatter (Div (ident,_,_) 765 (Header lvl (_,_,kvs) ils : children)) = 766 if lvl > tocLevel 767 then return [] 768 else do 769 n <- get 770 modify (+1) 771 let num = fromMaybe "" $ lookup "number" kvs 772 let tit = if writerNumberSections opts && not (TS.null num) 773 then Span ("", ["section-header-number"], []) 774 [Str num] : Space : ils 775 else ils 776 src <- case lookup ident reftable of 777 Just x -> return x 778 Nothing -> throwError $ PandocSomeError $ 779 ident <> " not found in reftable" 780 subs <- concat <$> mapM (navPointNode formatter) children 781 return [formatter n tit src subs] 782 navPointNode formatter (Div _ bs) = 783 concat <$> mapM (navPointNode formatter) bs 784 navPointNode _ _ = return [] 785 786 let navMapFormatter :: Int -> [Inline] -> TS.Text -> [Element] -> Element 787 navMapFormatter n tit src subs = unode "navPoint" ! 788 [("id", "navPoint-" ++ show n)] $ 789 [ unode "navLabel" $ unode "text" $ TS.unpack $ stringify tit 790 , unode "content" ! [("src", "text/" <> TS.unpack src)] $ () 791 ] ++ subs 792 793 let tpNode = unode "navPoint" ! [("id", "navPoint-0")] $ 794 [ unode "navLabel" $ unode "text" (TS.unpack $ stringify $ docTitle' meta) 795 , unode "content" ! [("src", "text/title_page.xhtml")] 796 $ () ] 797 798 navMap <- lift $ evalStateT 799 (concat <$> mapM (navPointNode navMapFormatter) secs) 1 800 let tocData = UTF8.fromStringLazy $ ppTopElement $ 801 unode "ncx" ! [("version","2005-1") 802 ,("xmlns","http://www.daisy.org/z3986/2005/ncx/")] $ 803 [ unode "head" $ 804 [ unode "meta" ! [("name","dtb:uid") 805 ,("content", uuid)] $ () 806 , unode "meta" ! [("name","dtb:depth") 807 ,("content", "1")] $ () 808 , unode "meta" ! [("name","dtb:totalPageCount") 809 ,("content", "0")] $ () 810 , unode "meta" ! [("name","dtb:maxPageNumber") 811 ,("content", "0")] $ () 812 ] ++ case epubCoverImage metadata of 813 Nothing -> [] 814 Just img -> [unode "meta" ! [("name","cover"), 815 ("content", toId img)] $ ()] 816 , unode "docTitle" $ unode "text" plainTitle 817 , unode "navMap" $ 818 tpNode : navMap 819 ] 820 tocEntry <- mkEntry "toc.ncx" tocData 821 822 let navXhtmlFormatter :: Int -> [Inline] -> TS.Text -> [Element] -> Element 823 navXhtmlFormatter n tit src subs = unode "li" ! 824 [("id", "toc-li-" ++ show n)] $ 825 (unode "a" ! 826 [("href", "text/" <> TS.unpack src)] 827 $ titElements) 828 : case subs of 829 [] -> [] 830 (_:_) -> [unode "ol" ! [("class","toc")] $ subs] 831 where titElements = parseXML titRendered 832 titRendered = case P.runPure 833 (writeHtmlStringForEPUB version 834 opts{ writerTemplate = Nothing 835 , writerVariables = 836 Context (M.fromList 837 [("pagetitle", toVal $ 838 escapeStringForXML $ TS.pack plainTitle)]) 839 <> writerVariables opts} 840 (Pandoc nullMeta 841 [Plain $ walk clean tit])) of 842 Left _ -> stringify tit 843 Right x -> x 844 -- can't have <a> elements inside generated links... 845 clean (Link _ ils _) = Span ("", [], []) ils 846 clean (Note _) = Str "" 847 clean x = x 848 849 let navtag = if epub3 then "nav" else "div" 850 tocBlocks <- lift $ evalStateT 851 (concat <$> mapM (navPointNode navXhtmlFormatter) secs) 1 852 let navBlocks = [RawBlock (Format "html") 853 $ TS.pack $ showElement $ -- prettyprinting introduces bad spaces 854 unode navtag ! ([("epub:type","toc") | epub3] ++ 855 [("id","toc")]) $ 856 [ unode "h1" ! [("id","toc-title")] $ tocTitle 857 , unode "ol" ! [("class","toc")] $ tocBlocks ]] 858 let landmarkItems = if epub3 859 then unode "li" 860 [ unode "a" ! [("href", 861 "text/title_page.xhtml") 862 ,("epub:type", "titlepage")] $ 863 ("Title Page" :: String) ] : 864 [ unode "li" 865 [ unode "a" ! [("href", "text/cover.xhtml") 866 ,("epub:type", "cover")] $ 867 ("Cover" :: String)] | 868 isJust (epubCoverImage metadata) 869 ] ++ 870 [ unode "li" 871 [ unode "a" ! [("href", "#toc") 872 ,("epub:type", "toc")] $ 873 ("Table of Contents" :: String) 874 ] | writerTableOfContents opts 875 ] 876 else [] 877 let landmarks = [RawBlock (Format "html") $ TS.pack $ ppElement $ 878 unode "nav" ! [("epub:type","landmarks") 879 ,("id","landmarks") 880 ,("hidden","hidden")] $ 881 [ unode "ol" landmarkItems ] 882 | not (null landmarkItems)] 883 navData <- lift $ writeHtml opts'{ writerVariables = 884 Context (M.fromList [("navpage", toVal' "true") 885 ,("body-type", toVal' "frontmatter") 886 ]) 887 <> cssvars False <> vars } 888 (Pandoc (setMeta "title" 889 (walk removeNote $ fromList $ docTitle' meta) nullMeta) 890 (navBlocks ++ landmarks)) 891 navEntry <- mkEntry "nav.xhtml" navData 892 893 -- mimetype 894 mimetypeEntry <- mkEntry "mimetype" $ 895 UTF8.fromStringLazy "application/epub+zip" 896 897 -- container.xml 898 let containerData = UTF8.fromStringLazy $ ppTopElement $ 899 unode "container" ! [("version","1.0") 900 ,("xmlns","urn:oasis:names:tc:opendocument:xmlns:container")] $ 901 unode "rootfiles" $ 902 unode "rootfile" ! [("full-path", 903 (if null epubSubdir 904 then "" 905 else epubSubdir ++ "/") ++ "content.opf") 906 ,("media-type","application/oebps-package+xml")] $ () 907 containerEntry <- mkEntry "META-INF/container.xml" containerData 908 909 -- com.apple.ibooks.display-options.xml 910 let apple = UTF8.fromStringLazy $ ppTopElement $ 911 unode "display_options" $ 912 unode "platform" ! [("name","*")] $ 913 unode "option" ! [("name","specified-fonts")] $ ("true" :: String) 914 appleEntry <- mkEntry "META-INF/com.apple.ibooks.display-options.xml" apple 915 916 -- construct archive 917 let archive = foldr addEntryToArchive emptyArchive $ 918 [mimetypeEntry, containerEntry, appleEntry, 919 contentsEntry, tocEntry, navEntry, tpEntry] ++ 920 stylesheetEntries ++ picEntries ++ cpicEntry ++ 921 cpgEntry ++ chapterEntries ++ fontEntries 922 return $ fromArchive archive 923 924metadataElement :: EPUBVersion -> EPUBMetadata -> UTCTime -> Element 925metadataElement version md currentTime = 926 unode "metadata" ! [("xmlns:dc","http://purl.org/dc/elements/1.1/") 927 ,("xmlns:opf","http://www.idpf.org/2007/opf")] $ mdNodes 928 where mdNodes = identifierNodes ++ titleNodes ++ dateNodes 929 ++ languageNodes ++ ibooksNodes ++ calibreNodes 930 ++ creatorNodes ++ contributorNodes ++ subjectNodes 931 ++ descriptionNodes ++ typeNodes ++ formatNodes 932 ++ publisherNodes ++ sourceNodes ++ relationNodes 933 ++ coverageNodes ++ rightsNodes ++ coverImageNodes 934 ++ modifiedNodes 935 withIds base f = concat . zipWith f (map (\x -> base ++ ('-' : show x)) 936 ([1..] :: [Int])) 937 identifierNodes = withIds "epub-id" toIdentifierNode $ 938 epubIdentifier md 939 titleNodes = withIds "epub-title" toTitleNode $ epubTitle md 940 dateNodes = if version == EPUB2 941 then withIds "epub-date" toDateNode $ epubDate md 942 else -- epub3 allows only one dc:date 943 -- http://www.idpf.org/epub/30/spec/epub30-publications.html#sec-opf-dcdate 944 case epubDate md of 945 [] -> [] 946 (x:_) -> [dcNode "date" ! [("id","epub-date")] 947 $ dateText x] 948 ibooksNodes = map ibooksNode (epubIbooksFields md) 949 ibooksNode (k, v) = unode "meta" ! [("property", "ibooks:" ++ k)] $ v 950 calibreNodes = map calibreNode (epubCalibreFields md) 951 calibreNode (k, v) = unode "meta" ! [("name", "calibre:" ++ k), 952 ("content", v)] $ () 953 languageNodes = [dcTag "language" $ epubLanguage md] 954 creatorNodes = withIds "epub-creator" (toCreatorNode "creator") $ 955 epubCreator md 956 contributorNodes = withIds "epub-contributor" 957 (toCreatorNode "contributor") $ epubContributor md 958 subjectNodes = map (dcTag "subject") $ epubSubject md 959 descriptionNodes = maybe [] (dcTag' "description") $ epubDescription md 960 typeNodes = maybe [] (dcTag' "type") $ epubType md 961 formatNodes = maybe [] (dcTag' "format") $ epubFormat md 962 publisherNodes = maybe [] (dcTag' "publisher") $ epubPublisher md 963 sourceNodes = maybe [] (dcTag' "source") $ epubSource md 964 relationNodes = maybe [] (dcTag' "relation") $ epubRelation md 965 coverageNodes = maybe [] (dcTag' "coverage") $ epubCoverage md 966 rightsNodes = maybe [] (dcTag' "rights") $ epubRights md 967 coverImageNodes = maybe [] 968 (\img -> [unode "meta" ! [("name","cover"), 969 ("content",toId img)] $ ()]) 970 $ epubCoverImage md 971 modifiedNodes = [ unode "meta" ! [("property", "dcterms:modified")] $ 972 showDateTimeISO8601 currentTime | version == EPUB3 ] 973 dcTag n s = unode ("dc:" ++ n) s 974 dcTag' n s = [dcTag n s] 975 toIdentifierNode id' (Identifier txt scheme) 976 | version == EPUB2 = [dcNode "identifier" ! 977 (("id",id') : maybe [] (\x -> [("opf:scheme", x)]) scheme) $ 978 txt] 979 | otherwise = (dcNode "identifier" ! [("id",id')] $ txt) : 980 maybe [] ((\x -> [unode "meta" ! 981 [ ("refines",'#':id') 982 , ("property","identifier-type") 983 , ("scheme","onix:codelist5") 984 ] 985 $ x 986 ]) 987 . schemeToOnix) 988 scheme 989 toCreatorNode s id' creator 990 | version == EPUB2 = [dcNode s ! 991 (("id",id') : 992 maybe [] (\x -> [("opf:file-as",x)]) (creatorFileAs creator) ++ 993 maybe [] (\x -> [("opf:role",x)]) 994 (creatorRole creator >>= toRelator)) $ creatorText creator] 995 | otherwise = [dcNode s ! [("id",id')] $ creatorText creator] ++ 996 maybe [] (\x -> [unode "meta" ! 997 [("refines",'#':id'),("property","file-as")] $ x]) 998 (creatorFileAs creator) ++ 999 maybe [] (\x -> [unode "meta" ! 1000 [("refines",'#':id'),("property","role"), 1001 ("scheme","marc:relators")] $ x]) 1002 (creatorRole creator >>= toRelator) 1003 toTitleNode id' title 1004 | version == EPUB2 = [dcNode "title" ! 1005 (("id",id') : 1006 -- note: EPUB2 doesn't accept opf:title-type 1007 maybe [] (\x -> [("opf:file-as",x)]) (titleFileAs title)) $ 1008 titleText title] 1009 | otherwise = [dcNode "title" ! [("id",id')] $ titleText title] 1010 ++ 1011 maybe [] (\x -> [unode "meta" ! 1012 [("refines",'#':id'),("property","file-as")] $ x]) 1013 (titleFileAs title) ++ 1014 maybe [] (\x -> [unode "meta" ! 1015 [("refines",'#':id'),("property","title-type")] $ x]) 1016 (titleType title) 1017 toDateNode id' date = [dcNode "date" ! 1018 (("id",id') : 1019 maybe [] (\x -> [("opf:event",x)]) (dateEvent date)) $ 1020 dateText date] 1021 schemeToOnix :: String -> String 1022 schemeToOnix "ISBN-10" = "02" 1023 schemeToOnix "GTIN-13" = "03" 1024 schemeToOnix "UPC" = "04" 1025 schemeToOnix "ISMN-10" = "05" 1026 schemeToOnix "DOI" = "06" 1027 schemeToOnix "LCCN" = "13" 1028 schemeToOnix "GTIN-14" = "14" 1029 schemeToOnix "ISBN-13" = "15" 1030 schemeToOnix "Legal deposit number" = "17" 1031 schemeToOnix "URN" = "22" 1032 schemeToOnix "OCLC" = "23" 1033 schemeToOnix "ISMN-13" = "25" 1034 schemeToOnix "ISBN-A" = "26" 1035 schemeToOnix "JP" = "27" 1036 schemeToOnix "OLCC" = "28" 1037 schemeToOnix _ = "01" 1038 1039showDateTimeISO8601 :: UTCTime -> String 1040showDateTimeISO8601 = formatTime defaultTimeLocale "%FT%TZ" 1041 1042transformTag :: PandocMonad m 1043 => Tag TS.Text 1044 -> E m (Tag TS.Text) 1045transformTag tag@(TagOpen name attr) 1046 | name `elem` ["video", "source", "img", "audio"] && 1047 isNothing (lookup "data-external" attr) = do 1048 let src = fromAttrib "src" tag 1049 let poster = fromAttrib "poster" tag 1050 newsrc <- modifyMediaRef $ TS.unpack src 1051 newposter <- modifyMediaRef $ TS.unpack poster 1052 let attr' = filter (\(x,_) -> x /= "src" && x /= "poster") attr ++ 1053 [("src", "../" <> newsrc) | not (TS.null newsrc)] ++ 1054 [("poster", "../" <> newposter) | not (TS.null newposter)] 1055 return $ TagOpen name attr' 1056transformTag tag = return tag 1057 1058modifyMediaRef :: PandocMonad m 1059 => FilePath 1060 -> E m TS.Text 1061modifyMediaRef "" = return "" 1062modifyMediaRef oldsrc = do 1063 media <- gets stMediaPaths 1064 case lookup oldsrc media of 1065 Just (n,_) -> return $ TS.pack n 1066 Nothing -> catchError 1067 (do (img, mbMime) <- P.fetchItem $ TS.pack oldsrc 1068 let ext = maybe (takeExtension (takeWhile (/='?') oldsrc)) TS.unpack 1069 (("." <>) <$> (mbMime >>= extensionFromMimeType)) 1070 newName <- getMediaNextNewName ext 1071 let newPath = "media/" ++ newName 1072 entry <- mkEntry newPath (B.fromChunks . (:[]) $ img) 1073 modify $ \st -> st{ stMediaPaths = 1074 (oldsrc, (newPath, Just entry)):media} 1075 return $ TS.pack newPath) 1076 (\e -> do 1077 report $ CouldNotFetchResource (TS.pack oldsrc) (tshow e) 1078 return $ TS.pack oldsrc) 1079 1080getMediaNextNewName :: PandocMonad m => String -> E m String 1081getMediaNextNewName ext = do 1082 nextId <- gets stMediaNextId 1083 modify $ \st -> st { stMediaNextId = nextId + 1 } 1084 return $ "file" ++ show nextId ++ ext 1085 1086isHtmlFormat :: Format -> Bool 1087isHtmlFormat (Format "html") = True 1088isHtmlFormat (Format "html4") = True 1089isHtmlFormat (Format "html5") = True 1090isHtmlFormat _ = False 1091 1092transformBlock :: PandocMonad m 1093 => Block 1094 -> E m Block 1095transformBlock (RawBlock fmt raw) 1096 | isHtmlFormat fmt = do 1097 let tags = parseTags raw 1098 tags' <- mapM transformTag tags 1099 return $ RawBlock fmt (renderTags' tags') 1100transformBlock b = return b 1101 1102transformInline :: PandocMonad m 1103 => WriterOptions 1104 -> Inline 1105 -> E m Inline 1106transformInline _opts (Image attr lab (src,tit)) = do 1107 newsrc <- modifyMediaRef $ TS.unpack src 1108 return $ Image attr lab ("../" <> newsrc, tit) 1109transformInline opts x@(Math t m) 1110 | WebTeX url <- writerHTMLMathMethod opts = do 1111 newsrc <- modifyMediaRef (TS.unpack url <> urlEncode (TS.unpack m)) 1112 let mathclass = if t == DisplayMath then "display" else "inline" 1113 return $ Span ("",["math",mathclass],[]) 1114 [Image nullAttr [x] ("../" <> newsrc, "")] 1115transformInline _opts (RawInline fmt raw) 1116 | isHtmlFormat fmt = do 1117 let tags = parseTags raw 1118 tags' <- mapM transformTag tags 1119 return $ RawInline fmt (renderTags' tags') 1120transformInline _ x = return x 1121 1122(!) :: (t -> Element) -> [(String, String)] -> t -> Element 1123(!) f attrs n = add_attrs (map (\(k,v) -> Attr (unqual k) v) attrs) (f n) 1124 1125-- | Version of 'ppTopElement' that specifies UTF-8 encoding. 1126ppTopElement :: Element -> String 1127ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . unEntity . ppElement 1128 -- unEntity removes numeric entities introduced by ppElement 1129 -- (kindlegen seems to choke on these). 1130 where unEntity [] = "" 1131 unEntity ('&':'#':xs) = 1132 let (ds,ys) = break (==';') xs 1133 rest = drop 1 ys 1134 in case safeRead (TS.pack $ "'\\" <> ds <> "'") of 1135 Just x -> x : unEntity rest 1136 Nothing -> '&':'#':unEntity xs 1137 unEntity (x:xs) = x : unEntity xs 1138 1139mediaTypeOf :: FilePath -> Maybe MimeType 1140mediaTypeOf x = 1141 let mediaPrefixes = ["image", "video", "audio"] in 1142 case getMimeType x of 1143 Just y | any (`TS.isPrefixOf` y) mediaPrefixes -> Just y 1144 _ -> Nothing 1145 1146-- Returns filename for chapter number. 1147showChapter :: Int -> String 1148showChapter = printf "ch%03d.xhtml" 1149 1150-- Add identifiers to any headers without them. 1151addIdentifiers :: WriterOptions -> [Block] -> [Block] 1152addIdentifiers opts bs = evalState (mapM go bs) Set.empty 1153 where go (Header n (ident,classes,kvs) ils) = do 1154 ids <- get 1155 let ident' = if TS.null ident 1156 then uniqueIdent (writerExtensions opts) ils ids 1157 else ident 1158 modify $ Set.insert ident' 1159 return $ Header n (ident',classes,kvs) ils 1160 go x = return x 1161 1162-- Variant of normalizeDate that allows partial dates: YYYY, YYYY-MM 1163normalizeDate' :: String -> Maybe String 1164normalizeDate' = fmap TS.unpack . go . trim . TS.pack 1165 where 1166 go xs 1167 | TS.length xs == 4 -- YYY 1168 , TS.all isDigit xs = Just xs 1169 | (y, s) <- TS.splitAt 4 xs -- YYY-MM 1170 , Just ('-', m) <- TS.uncons s 1171 , TS.length m == 2 1172 , TS.all isDigit y && TS.all isDigit m = Just xs 1173 | otherwise = normalizeDate xs 1174 1175toRelator :: String -> Maybe String 1176toRelator x 1177 | x `elem` relators = Just x 1178 | otherwise = lookup (map toLower x) relatorMap 1179 1180relators :: [String] 1181relators = map snd relatorMap 1182 1183relatorMap :: [(String, String)] 1184relatorMap = 1185 [("abridger", "abr") 1186 ,("actor", "act") 1187 ,("adapter", "adp") 1188 ,("addressee", "rcp") 1189 ,("analyst", "anl") 1190 ,("animator", "anm") 1191 ,("annotator", "ann") 1192 ,("appellant", "apl") 1193 ,("appellee", "ape") 1194 ,("applicant", "app") 1195 ,("architect", "arc") 1196 ,("arranger", "arr") 1197 ,("art copyist", "acp") 1198 ,("art director", "adi") 1199 ,("artist", "art") 1200 ,("artistic director", "ard") 1201 ,("assignee", "asg") 1202 ,("associated name", "asn") 1203 ,("attributed name", "att") 1204 ,("auctioneer", "auc") 1205 ,("author", "aut") 1206 ,("author in quotations or text abstracts", "aqt") 1207 ,("author of afterword, colophon, etc.", "aft") 1208 ,("author of dialog", "aud") 1209 ,("author of introduction, etc.", "aui") 1210 ,("autographer", "ato") 1211 ,("bibliographic antecedent", "ant") 1212 ,("binder", "bnd") 1213 ,("binding designer", "bdd") 1214 ,("blurb writer", "blw") 1215 ,("book designer", "bkd") 1216 ,("book producer", "bkp") 1217 ,("bookjacket designer", "bjd") 1218 ,("bookplate designer", "bpd") 1219 ,("bookseller", "bsl") 1220 ,("braille embosser", "brl") 1221 ,("broadcaster", "brd") 1222 ,("calligrapher", "cll") 1223 ,("cartographer", "ctg") 1224 ,("caster", "cas") 1225 ,("censor", "cns") 1226 ,("choreographer", "chr") 1227 ,("cinematographer", "cng") 1228 ,("client", "cli") 1229 ,("collection registrar", "cor") 1230 ,("collector", "col") 1231 ,("collotyper", "clt") 1232 ,("colorist", "clr") 1233 ,("commentator", "cmm") 1234 ,("commentator for written text", "cwt") 1235 ,("compiler", "com") 1236 ,("complainant", "cpl") 1237 ,("complainant-appellant", "cpt") 1238 ,("complainant-appellee", "cpe") 1239 ,("composer", "cmp") 1240 ,("compositor", "cmt") 1241 ,("conceptor", "ccp") 1242 ,("conductor", "cnd") 1243 ,("conservator", "con") 1244 ,("consultant", "csl") 1245 ,("consultant to a project", "csp") 1246 ,("contestant", "cos") 1247 ,("contestant-appellant", "cot") 1248 ,("contestant-appellee", "coe") 1249 ,("contestee", "cts") 1250 ,("contestee-appellant", "ctt") 1251 ,("contestee-appellee", "cte") 1252 ,("contractor", "ctr") 1253 ,("contributor", "ctb") 1254 ,("copyright claimant", "cpc") 1255 ,("copyright holder", "cph") 1256 ,("corrector", "crr") 1257 ,("correspondent", "crp") 1258 ,("costume designer", "cst") 1259 ,("court governed", "cou") 1260 ,("court reporter", "crt") 1261 ,("cover designer", "cov") 1262 ,("creator", "cre") 1263 ,("curator", "cur") 1264 ,("dancer", "dnc") 1265 ,("data contributor", "dtc") 1266 ,("data manager", "dtm") 1267 ,("dedicatee", "dte") 1268 ,("dedicator", "dto") 1269 ,("defendant", "dfd") 1270 ,("defendant-appellant", "dft") 1271 ,("defendant-appellee", "dfe") 1272 ,("degree granting institution", "dgg") 1273 ,("delineator", "dln") 1274 ,("depicted", "dpc") 1275 ,("depositor", "dpt") 1276 ,("designer", "dsr") 1277 ,("director", "drt") 1278 ,("dissertant", "dis") 1279 ,("distribution place", "dbp") 1280 ,("distributor", "dst") 1281 ,("donor", "dnr") 1282 ,("draftsman", "drm") 1283 ,("dubious author", "dub") 1284 ,("editor", "edt") 1285 ,("editor of compilation", "edc") 1286 ,("editor of moving image work", "edm") 1287 ,("electrician", "elg") 1288 ,("electrotyper", "elt") 1289 ,("enacting jurisdiction", "enj") 1290 ,("engineer", "eng") 1291 ,("engraver", "egr") 1292 ,("etcher", "etr") 1293 ,("event place", "evp") 1294 ,("expert", "exp") 1295 ,("facsimilist", "fac") 1296 ,("field director", "fld") 1297 ,("film director", "fmd") 1298 ,("film distributor", "fds") 1299 ,("film editor", "flm") 1300 ,("film producer", "fmp") 1301 ,("filmmaker", "fmk") 1302 ,("first party", "fpy") 1303 ,("forger", "frg") 1304 ,("former owner", "fmo") 1305 ,("funder", "fnd") 1306 ,("geographic information specialist", "gis") 1307 ,("honoree", "hnr") 1308 ,("host", "hst") 1309 ,("host institution", "his") 1310 ,("illuminator", "ilu") 1311 ,("illustrator", "ill") 1312 ,("inscriber", "ins") 1313 ,("instrumentalist", "itr") 1314 ,("interviewee", "ive") 1315 ,("interviewer", "ivr") 1316 ,("inventor", "inv") 1317 ,("issuing body", "isb") 1318 ,("judge", "jud") 1319 ,("jurisdiction governed", "jug") 1320 ,("laboratory", "lbr") 1321 ,("laboratory director", "ldr") 1322 ,("landscape architect", "lsa") 1323 ,("lead", "led") 1324 ,("lender", "len") 1325 ,("libelant", "lil") 1326 ,("libelant-appellant", "lit") 1327 ,("libelant-appellee", "lie") 1328 ,("libelee", "lel") 1329 ,("libelee-appellant", "let") 1330 ,("libelee-appellee", "lee") 1331 ,("librettist", "lbt") 1332 ,("licensee", "lse") 1333 ,("licensor", "lso") 1334 ,("lighting designer", "lgd") 1335 ,("lithographer", "ltg") 1336 ,("lyricist", "lyr") 1337 ,("manufacture place", "mfp") 1338 ,("manufacturer", "mfr") 1339 ,("marbler", "mrb") 1340 ,("markup editor", "mrk") 1341 ,("metadata contact", "mdc") 1342 ,("metal-engraver", "mte") 1343 ,("moderator", "mod") 1344 ,("monitor", "mon") 1345 ,("music copyist", "mcp") 1346 ,("musical director", "msd") 1347 ,("musician", "mus") 1348 ,("narrator", "nrt") 1349 ,("onscreen presenter", "osp") 1350 ,("opponent", "opn") 1351 ,("organizer of meeting", "orm") 1352 ,("originator", "org") 1353 ,("other", "oth") 1354 ,("owner", "own") 1355 ,("panelist", "pan") 1356 ,("papermaker", "ppm") 1357 ,("patent applicant", "pta") 1358 ,("patent holder", "pth") 1359 ,("patron", "pat") 1360 ,("performer", "prf") 1361 ,("permitting agency", "pma") 1362 ,("photographer", "pht") 1363 ,("plaintiff", "ptf") 1364 ,("plaintiff-appellant", "ptt") 1365 ,("plaintiff-appellee", "pte") 1366 ,("platemaker", "plt") 1367 ,("praeses", "pra") 1368 ,("presenter", "pre") 1369 ,("printer", "prt") 1370 ,("printer of plates", "pop") 1371 ,("printmaker", "prm") 1372 ,("process contact", "prc") 1373 ,("producer", "pro") 1374 ,("production company", "prn") 1375 ,("production designer", "prs") 1376 ,("production manager", "pmn") 1377 ,("production personnel", "prd") 1378 ,("production place", "prp") 1379 ,("programmer", "prg") 1380 ,("project director", "pdr") 1381 ,("proofreader", "pfr") 1382 ,("provider", "prv") 1383 ,("publication place", "pup") 1384 ,("publisher", "pbl") 1385 ,("publishing director", "pbd") 1386 ,("puppeteer", "ppt") 1387 ,("radio director", "rdd") 1388 ,("radio producer", "rpc") 1389 ,("recording engineer", "rce") 1390 ,("recordist", "rcd") 1391 ,("redaktor", "red") 1392 ,("renderer", "ren") 1393 ,("reporter", "rpt") 1394 ,("repository", "rps") 1395 ,("research team head", "rth") 1396 ,("research team member", "rtm") 1397 ,("researcher", "res") 1398 ,("respondent", "rsp") 1399 ,("respondent-appellant", "rst") 1400 ,("respondent-appellee", "rse") 1401 ,("responsible party", "rpy") 1402 ,("restager", "rsg") 1403 ,("restorationist", "rsr") 1404 ,("reviewer", "rev") 1405 ,("rubricator", "rbr") 1406 ,("scenarist", "sce") 1407 ,("scientific advisor", "sad") 1408 ,("screenwriter", "aus") 1409 ,("scribe", "scr") 1410 ,("sculptor", "scl") 1411 ,("second party", "spy") 1412 ,("secretary", "sec") 1413 ,("seller", "sll") 1414 ,("set designer", "std") 1415 ,("setting", "stg") 1416 ,("signer", "sgn") 1417 ,("singer", "sng") 1418 ,("sound designer", "sds") 1419 ,("speaker", "spk") 1420 ,("sponsor", "spn") 1421 ,("stage director", "sgd") 1422 ,("stage manager", "stm") 1423 ,("standards body", "stn") 1424 ,("stereotyper", "str") 1425 ,("storyteller", "stl") 1426 ,("supporting host", "sht") 1427 ,("surveyor", "srv") 1428 ,("teacher", "tch") 1429 ,("technical director", "tcd") 1430 ,("television director", "tld") 1431 ,("television producer", "tlp") 1432 ,("thesis advisor", "ths") 1433 ,("transcriber", "trc") 1434 ,("translator", "trl") 1435 ,("type designer", "tyd") 1436 ,("typographer", "tyg") 1437 ,("university place", "uvp") 1438 ,("videographer", "vdg") 1439 ,("witness", "wit") 1440 ,("wood engraver", "wde") 1441 ,("woodcutter", "wdc") 1442 ,("writer of accompanying material", "wam") 1443 ,("writer of added commentary", "wac") 1444 ,("writer of added lyrics", "wal") 1445 ,("writer of added text", "wat") 1446 ] 1447 1448docTitle' :: Meta -> [Inline] 1449docTitle' meta = maybe [] go $ lookupMeta "title" meta 1450 where go (MetaString s) = [Str s] 1451 go (MetaInlines xs) = xs 1452 go (MetaBlocks [Para xs]) = xs 1453 go (MetaBlocks [Plain xs]) = xs 1454 go (MetaMap m) = 1455 case M.lookup "type" m of 1456 Just x | stringify x == "main" -> 1457 maybe [] go $ M.lookup "text" m 1458 _ -> [] 1459 go (MetaList xs) = concatMap go xs 1460 go _ = [] 1461