1{-# LANGUAGE OverloadedStrings #-} 2{- | 3 Module : Text.Pandoc.Reader.Odt 4 Copyright : Copyright (C) 2015 Martin Linnemann 5 License : GNU GPL, version 2 or above 6 7 Maintainer : Martin Linnemann <theCodingMarlin@googlemail.com> 8 Stability : alpha 9 Portability : portable 10 11Entry point to the odt reader. 12-} 13 14module Text.Pandoc.Readers.Odt ( readOdt ) where 15 16import Codec.Archive.Zip 17import Text.Pandoc.XML.Light 18 19import qualified Data.ByteString.Lazy as B 20 21import System.FilePath 22 23import Control.Monad.Except (throwError) 24 25import qualified Data.Text as T 26 27import Text.Pandoc.Class.PandocMonad (PandocMonad) 28import qualified Text.Pandoc.Class.PandocMonad as P 29import Text.Pandoc.Definition 30import Text.Pandoc.Error 31import Text.Pandoc.MediaBag 32import Text.Pandoc.Options 33import qualified Text.Pandoc.UTF8 as UTF8 34 35import Text.Pandoc.Readers.Odt.ContentReader 36import Text.Pandoc.Readers.Odt.StyleReader 37 38import Text.Pandoc.Readers.Odt.Generic.Fallible 39import Text.Pandoc.Readers.Odt.Generic.XMLConverter 40import Text.Pandoc.Shared (filteredFilesFromArchive) 41 42readOdt :: PandocMonad m 43 => ReaderOptions 44 -> B.ByteString 45 -> m Pandoc 46readOdt opts bytes = case readOdt' opts bytes of 47 Right (doc, mb) -> do 48 P.setMediaBag mb 49 return doc 50 Left e -> throwError e 51 52-- 53readOdt' :: ReaderOptions 54 -> B.ByteString 55 -> Either PandocError (Pandoc, MediaBag) 56readOdt' _ bytes = bytesToOdt bytes-- of 57-- Right (pandoc, mediaBag) -> Right (pandoc , mediaBag) 58-- Left err -> Left err 59 60-- 61bytesToOdt :: B.ByteString -> Either PandocError (Pandoc, MediaBag) 62bytesToOdt bytes = case toArchiveOrFail bytes of 63 Right archive -> archiveToOdt archive 64 Left err -> Left $ PandocParseError 65 $ "Could not unzip ODT: " <> T.pack err 66 67-- 68archiveToOdt :: Archive -> Either PandocError (Pandoc, MediaBag) 69archiveToOdt archive = do 70 let onFailure msg Nothing = Left $ PandocParseError msg 71 onFailure _ (Just x) = Right x 72 contentEntry <- onFailure "Could not find content.xml" 73 (findEntryByPath "content.xml" archive) 74 stylesEntry <- onFailure "Could not find styles.xml" 75 (findEntryByPath "styles.xml" archive) 76 contentElem <- entryToXmlElem contentEntry 77 stylesElem <- entryToXmlElem stylesEntry 78 styles <- either 79 (\_ -> Left $ PandocParseError "Could not read styles") 80 Right 81 (chooseMax (readStylesAt stylesElem ) (readStylesAt contentElem)) 82 let filePathIsOdtMedia :: FilePath -> Bool 83 filePathIsOdtMedia fp = 84 let (dir, name) = splitFileName fp 85 in (dir == "Pictures/") || (dir /= "./" && name == "content.xml") 86 let media = filteredFilesFromArchive archive filePathIsOdtMedia 87 let startState = readerState styles media 88 either (\_ -> Left $ PandocParseError "Could not convert opendocument") Right 89 (runConverter' read_body startState contentElem) 90 91 92-- 93entryToXmlElem :: Entry -> Either PandocError Element 94entryToXmlElem entry = 95 case parseXMLElement . UTF8.toTextLazy . fromEntry $ entry of 96 Right x -> Right x 97 Left msg -> Left $ PandocXMLError (T.pack $ eRelativePath entry) msg 98