1{-# LANGUAGE OverloadedStrings #-}
2{- |
3   Module      : Text.Pandoc.Reader.Odt
4   Copyright   : Copyright (C) 2015 Martin Linnemann
5   License     : GNU GPL, version 2 or above
6
7   Maintainer  : Martin Linnemann <theCodingMarlin@googlemail.com>
8   Stability   : alpha
9   Portability : portable
10
11Entry point to the odt reader.
12-}
13
14module Text.Pandoc.Readers.Odt ( readOdt ) where
15
16import Codec.Archive.Zip
17import Text.Pandoc.XML.Light
18
19import qualified Data.ByteString.Lazy as B
20
21import System.FilePath
22
23import Control.Monad.Except (throwError)
24
25import qualified Data.Text as T
26
27import Text.Pandoc.Class.PandocMonad (PandocMonad)
28import qualified Text.Pandoc.Class.PandocMonad as P
29import Text.Pandoc.Definition
30import Text.Pandoc.Error
31import Text.Pandoc.MediaBag
32import Text.Pandoc.Options
33import qualified Text.Pandoc.UTF8 as UTF8
34
35import Text.Pandoc.Readers.Odt.ContentReader
36import Text.Pandoc.Readers.Odt.StyleReader
37
38import Text.Pandoc.Readers.Odt.Generic.Fallible
39import Text.Pandoc.Readers.Odt.Generic.XMLConverter
40import Text.Pandoc.Shared (filteredFilesFromArchive)
41
42readOdt :: PandocMonad m
43        => ReaderOptions
44        -> B.ByteString
45        -> m Pandoc
46readOdt opts bytes = case readOdt' opts bytes of
47  Right (doc, mb) -> do
48    P.setMediaBag mb
49    return doc
50  Left e -> throwError e
51
52--
53readOdt' :: ReaderOptions
54         -> B.ByteString
55         -> Either PandocError (Pandoc, MediaBag)
56readOdt' _ bytes = bytesToOdt bytes-- of
57--                    Right (pandoc, mediaBag) -> Right (pandoc , mediaBag)
58--                    Left  err                -> Left err
59
60--
61bytesToOdt :: B.ByteString -> Either PandocError (Pandoc, MediaBag)
62bytesToOdt bytes = case toArchiveOrFail bytes of
63  Right archive -> archiveToOdt archive
64  Left err      -> Left $ PandocParseError
65                        $ "Could not unzip ODT: " <> T.pack err
66
67--
68archiveToOdt :: Archive -> Either PandocError (Pandoc, MediaBag)
69archiveToOdt archive = do
70  let onFailure msg Nothing = Left $ PandocParseError msg
71      onFailure _   (Just x) = Right x
72  contentEntry <- onFailure "Could not find content.xml"
73                   (findEntryByPath "content.xml" archive)
74  stylesEntry <- onFailure "Could not find styles.xml"
75                   (findEntryByPath "styles.xml" archive)
76  contentElem <- entryToXmlElem contentEntry
77  stylesElem <- entryToXmlElem stylesEntry
78  styles <- either
79               (\_ -> Left $ PandocParseError "Could not read styles")
80               Right
81               (chooseMax (readStylesAt stylesElem ) (readStylesAt contentElem))
82  let filePathIsOdtMedia :: FilePath -> Bool
83      filePathIsOdtMedia fp =
84        let (dir, name) = splitFileName fp
85        in  (dir == "Pictures/") || (dir /= "./" && name == "content.xml")
86  let media = filteredFilesFromArchive archive filePathIsOdtMedia
87  let startState = readerState styles media
88  either (\_ -> Left $ PandocParseError "Could not convert opendocument") Right
89    (runConverter' read_body startState contentElem)
90
91
92--
93entryToXmlElem :: Entry -> Either PandocError Element
94entryToXmlElem entry =
95  case parseXMLElement . UTF8.toTextLazy . fromEntry $ entry of
96    Right x  -> Right x
97    Left msg -> Left $ PandocXMLError (T.pack $ eRelativePath entry) msg
98