1{-# LANGUAGE OverloadedStrings, ScopedTypeVariables #-}
2{-# LANGUAGE ViewPatterns      #-}
3{-# OPTIONS_GHC -fno-warn-type-defaults #-}
4{- |
5Module      : Text.Pandoc.ImageSize
6Copyright   : Copyright (C) 2011-2021 John MacFarlane
7License     : GNU GPL, version 2 or above
8
9Maintainer  : John MacFarlane <jgm@berkeley.edu>
10Stability   : alpha
11Portability : portable
12
13Functions for determining the size of a PNG, JPEG, or GIF image.
14-}
15module Text.Pandoc.ImageSize ( ImageType(..)
16                             , imageType
17                             , imageSize
18                             , sizeInPixels
19                             , sizeInPoints
20                             , desiredSizeInPoints
21                             , Dimension(..)
22                             , Direction(..)
23                             , dimension
24                             , lengthToDim
25                             , scaleDimension
26                             , inInch
27                             , inPixel
28                             , inPoints
29                             , inEm
30                             , numUnit
31                             , showInInch
32                             , showInPixel
33                             , showFl
34                             ) where
35import Data.ByteString (ByteString)
36import qualified Data.ByteString.Char8 as B
37import qualified Data.ByteString.Lazy as BL
38import Data.Binary.Get
39import Data.Char (isDigit)
40import Control.Monad
41import Text.Pandoc.Shared (safeRead)
42import Data.Default (Default)
43import Numeric (showFFloat)
44import Text.Pandoc.Definition
45import Text.Pandoc.Options
46import qualified Text.Pandoc.UTF8 as UTF8
47import qualified Text.XML.Light as Xml
48import qualified Data.Text as T
49import qualified Data.Text.Encoding as TE
50import Control.Applicative
51import qualified Data.Attoparsec.ByteString.Char8 as A
52import qualified Codec.Picture.Metadata as Metadata
53import qualified Codec.Picture.Metadata.Exif as Exif
54import Codec.Picture (decodeImageWithMetadata)
55
56-- quick and dirty functions to get image sizes
57-- algorithms borrowed from wwwis.pl
58
59data ImageType = Png | Gif | Jpeg | Svg | Pdf | Eps | Emf deriving Show
60data Direction = Width | Height
61instance Show Direction where
62  show Width  = "width"
63  show Height = "height"
64
65data Dimension = Pixel Integer
66               | Centimeter Double
67               | Millimeter Double
68               | Inch Double
69               | Percent Double
70               | Em Double
71               deriving Eq
72
73instance Show Dimension where
74  show (Pixel a)      = show a              ++ "px"
75  show (Centimeter a) = T.unpack (showFl a) ++ "cm"
76  show (Millimeter a) = T.unpack (showFl a) ++ "mm"
77  show (Inch a)       = T.unpack (showFl a) ++ "in"
78  show (Percent a)    = show a              ++ "%"
79  show (Em a)         = T.unpack (showFl a) ++ "em"
80
81data ImageSize = ImageSize{
82                     pxX   :: Integer
83                   , pxY   :: Integer
84                   , dpiX  :: Integer
85                   , dpiY  :: Integer
86                   } deriving (Read, Show, Eq)
87instance Default ImageSize where
88  def = ImageSize 300 200 72 72
89
90showFl :: (RealFloat a) => a -> T.Text
91showFl a = removeExtra0s $ T.pack $ showFFloat (Just 5) a ""
92
93removeExtra0s :: T.Text -> T.Text
94removeExtra0s s = case T.dropWhileEnd (=='0') s of
95  (T.unsnoc -> Just (xs, '.')) -> xs
96  xs                           -> xs
97
98imageType :: ByteString -> Maybe ImageType
99imageType img = case B.take 4 img of
100                     "\x89\x50\x4e\x47" -> return Png
101                     "\x47\x49\x46\x38" -> return Gif
102                     "\xff\xd8\xff\xe0" -> return Jpeg  -- JFIF
103                     "\xff\xd8\xff\xe1" -> return Jpeg  -- Exif
104                     "%PDF"             -> return Pdf
105                     "<svg"             -> return Svg
106                     "<?xm"
107                       | findSvgTag img
108                                        -> return Svg
109                     "%!PS"
110                       |  B.take 4 (B.drop 1 $ B.dropWhile (/=' ') img) == "EPSF"
111                                        -> return Eps
112                     "\x01\x00\x00\x00"
113                       | B.take 4 (B.drop 40 img) == " EMF"
114                                        -> return Emf
115                     _                  -> mzero
116
117findSvgTag :: ByteString -> Bool
118findSvgTag img = "<svg" `B.isInfixOf` img || "<SVG" `B.isInfixOf` img
119
120imageSize :: WriterOptions -> ByteString -> Either T.Text ImageSize
121imageSize opts img = checkDpi <$>
122  case imageType img of
123       Just Png  -> getSize img
124       Just Gif  -> getSize img
125       Just Jpeg -> getSize img
126       Just Svg  -> mbToEither "could not determine SVG size" $ svgSize opts img
127       Just Eps  -> mbToEither "could not determine EPS size" $ epsSize img
128       Just Pdf  -> mbToEither "could not determine PDF size" $ pdfSize img
129       Just Emf  -> mbToEither "could not determine EMF size" $ emfSize img
130       Nothing   -> Left "could not determine image type"
131  where mbToEither msg Nothing  = Left msg
132        mbToEither _   (Just x) = Right x
133        -- see #6880, some defective JPEGs may encode dpi 0, so default to 72
134        -- if that value is 0
135        checkDpi size =
136          size{ dpiX = if dpiX size == 0 then 72 else dpiX size
137              , dpiY = if dpiY size == 0 then 72 else dpiY size }
138
139
140sizeInPixels :: ImageSize -> (Integer, Integer)
141sizeInPixels s = (pxX s, pxY s)
142
143-- | Calculate (height, width) in points using the image file's dpi metadata,
144-- using 72 Points == 1 Inch.
145sizeInPoints :: ImageSize -> (Double, Double)
146sizeInPoints s = (pxXf * 72 / dpiXf, pxYf * 72 / dpiYf)
147  where
148    pxXf  = fromIntegral $ pxX s
149    pxYf  = fromIntegral $ pxY s
150    dpiXf = fromIntegral $ dpiX s
151    dpiYf = fromIntegral $ dpiY s
152
153-- | Calculate (height, width) in points, considering the desired dimensions in the
154-- attribute, while falling back on the image file's dpi metadata if no dimensions
155-- are specified in the attribute (or only dimensions in percentages).
156desiredSizeInPoints :: WriterOptions -> Attr -> ImageSize -> (Double, Double)
157desiredSizeInPoints opts attr s =
158  case (getDim Width, getDim Height) of
159    (Just w, Just h)   -> (w, h)
160    (Just w, Nothing)  -> (w, w / ratio)
161    (Nothing, Just h)  -> (h * ratio, h)
162    (Nothing, Nothing) -> sizeInPoints s
163  where
164    ratio = fromIntegral (pxX s) / fromIntegral (pxY s)
165    getDim dir = case dimension dir attr of
166                   Just (Percent _) -> Nothing
167                   Just dim         -> Just $ inPoints opts dim
168                   Nothing          -> Nothing
169
170inPoints :: WriterOptions -> Dimension -> Double
171inPoints opts dim = 72 * inInch opts dim
172
173inEm :: WriterOptions -> Dimension -> Double
174inEm opts dim = (64/11) * inInch opts dim
175
176inInch :: WriterOptions -> Dimension -> Double
177inInch opts dim =
178  case dim of
179    (Pixel a)      -> fromIntegral a / fromIntegral (writerDpi opts)
180    (Centimeter a) -> a * 0.3937007874
181    (Millimeter a) -> a * 0.03937007874
182    (Inch a)       -> a
183    (Percent _)    -> 0
184    (Em a)         -> a * (11/64)
185
186inPixel :: WriterOptions -> Dimension -> Integer
187inPixel opts dim =
188  case dim of
189    (Pixel a)      -> a
190    (Centimeter a) -> floor $ dpi * a * 0.3937007874 :: Integer
191    (Millimeter a) -> floor $ dpi * a * 0.03937007874 :: Integer
192    (Inch a)       -> floor $ dpi * a :: Integer
193    (Percent _)    -> 0
194    (Em a)         -> floor $ dpi * a * (11/64) :: Integer
195  where
196    dpi = fromIntegral $ writerDpi opts
197
198-- | Convert a Dimension to Text denoting its equivalent in inches, for example "2.00000".
199-- Note: Dimensions in percentages are converted to the empty string.
200showInInch :: WriterOptions -> Dimension -> T.Text
201showInInch _ (Percent _) = ""
202showInInch opts dim = showFl $ inInch opts dim
203
204-- | Convert a Dimension to Text denoting its equivalent in pixels, for example "600".
205-- Note: Dimensions in percentages are converted to the empty string.
206showInPixel :: WriterOptions -> Dimension -> T.Text
207showInPixel _ (Percent _) = ""
208showInPixel opts dim = T.pack $ show $ inPixel opts dim
209
210-- | Maybe split a string into a leading number and trailing unit, e.g. "3cm" to Just (3.0, "cm")
211numUnit :: T.Text -> Maybe (Double, T.Text)
212numUnit s =
213  let (nums, unit) = T.span (\c -> isDigit c || ('.'==c)) s
214  in (\n -> (n, unit)) <$> safeRead nums
215
216-- | Scale a dimension by a factor.
217scaleDimension :: Double -> Dimension -> Dimension
218scaleDimension factor dim =
219  case dim of
220        Pixel x      -> Pixel (round $ factor * fromIntegral x)
221        Centimeter x -> Centimeter (factor * x)
222        Millimeter x -> Millimeter (factor * x)
223        Inch x       -> Inch (factor * x)
224        Percent x    -> Percent (factor * x)
225        Em x         -> Em (factor * x)
226
227-- | Read a Dimension from an Attr attribute.
228-- `dimension Width attr` might return `Just (Pixel 3)` or for example `Just (Centimeter 2.0)`, etc.
229dimension :: Direction -> Attr -> Maybe Dimension
230dimension dir (_, _, kvs) =
231  case dir of
232    Width  -> extractDim "width"
233    Height -> extractDim "height"
234  where
235    extractDim key = lookup key kvs >>= lengthToDim
236
237lengthToDim :: T.Text -> Maybe Dimension
238lengthToDim s = numUnit s >>= uncurry toDim
239  where
240    toDim a "cm"   = Just $ Centimeter a
241    toDim a "mm"   = Just $ Millimeter a
242    toDim a "in"   = Just $ Inch a
243    toDim a "inch" = Just $ Inch a
244    toDim a "%"    = Just $ Percent a
245    toDim a "px"   = Just $ Pixel (floor a::Integer)
246    toDim a ""     = Just $ Pixel (floor a::Integer)
247    toDim a "pt"   = Just $ Inch (a / 72)
248    toDim a "pc"   = Just $ Inch (a / 6)
249    toDim a "em"   = Just $ Em a
250    toDim _ _      = Nothing
251
252epsSize :: ByteString -> Maybe ImageSize
253epsSize img = do
254  let ls = takeWhile ("%" `B.isPrefixOf`) $ B.lines img
255  let ls' = dropWhile (not . ("%%BoundingBox:" `B.isPrefixOf`)) ls
256  case ls' of
257       []    -> mzero
258       (x:_) -> case B.words x of
259                     [_, _, _, ux, uy] -> do
260                        ux' <- safeRead $ TE.decodeUtf8 ux
261                        uy' <- safeRead $ TE.decodeUtf8 uy
262                        return ImageSize{
263                            pxX  = ux'
264                          , pxY  = uy'
265                          , dpiX = 72
266                          , dpiY = 72 }
267                     _ -> mzero
268
269pdfSize :: ByteString -> Maybe ImageSize
270pdfSize img =
271  case A.parseOnly pPdfSize img of
272    Left _   -> Nothing
273    Right sz -> Just sz
274
275pPdfSize :: A.Parser ImageSize
276pPdfSize = do
277  A.skipWhile (/='/')
278  A.char8 '/'
279  (do A.string "MediaBox"
280      A.skipSpace
281      A.char8 '['
282      A.skipSpace
283      [x1,y1,x2,y2] <- A.count 4 $ do
284        A.skipSpace
285        raw <- A.many1 $ A.satisfy (\c -> isDigit c || c == '.')
286        case safeRead $ T.pack raw of
287          Just (r :: Double) -> return $ floor r
288          Nothing            -> mzero
289      A.skipSpace
290      A.char8 ']'
291      return $ ImageSize{
292              pxX  = x2 - x1
293            , pxY  = y2 - y1
294            , dpiX = 72
295            , dpiY = 72 }
296   ) <|> pPdfSize
297
298getSize :: ByteString -> Either T.Text ImageSize
299getSize img =
300  case decodeImageWithMetadata img of
301    Left e -> Left (T.pack e)
302    Right (_, meta) -> do
303      pxx <- maybe (Left "Could not determine width") Right $
304                   -- first look for exif image width, then width
305                   (Metadata.lookup
306                     (Metadata.Exif (Exif.TagUnknown 0xA002)) meta >>=
307                       exifDataToWord) <|>
308                   Metadata.lookup Metadata.Width meta
309      pxy <- maybe (Left "Could not determine height") Right $
310                  -- first look for exif image height, then height
311                  (Metadata.lookup
312                     (Metadata.Exif (Exif.TagUnknown 0xA003)) meta >>=
313                       exifDataToWord) <|>
314                  Metadata.lookup Metadata.Height meta
315      dpix <- maybe (Right 72) Right $ Metadata.lookup Metadata.DpiX meta
316      dpiy <- maybe (Right 72) Right $ Metadata.lookup Metadata.DpiY meta
317      return $ ImageSize
318                { pxX = fromIntegral pxx
319                , pxY = fromIntegral pxy
320                , dpiX = fromIntegral dpix
321                , dpiY = fromIntegral dpiy }
322 where
323  exifDataToWord (Exif.ExifLong x) = Just $ fromIntegral x
324  exifDataToWord (Exif.ExifShort x) = Just $ fromIntegral x
325  exifDataToWord _ = Nothing
326
327
328svgSize :: WriterOptions -> ByteString -> Maybe ImageSize
329svgSize opts img = do
330  doc <- Xml.parseXMLDoc $ UTF8.toString img
331  let viewboxSize = do
332        vb <- Xml.findAttrBy (== Xml.QName "viewBox" Nothing Nothing) doc
333        [_,_,w,h] <- mapM safeRead (T.words (T.pack vb))
334        return (w,h)
335  let dpi = fromIntegral $ writerDpi opts
336  let dirToInt dir = do
337        dim <- Xml.findAttrBy (== Xml.QName dir Nothing Nothing) doc >>= lengthToDim . T.pack
338        return $ inPixel opts dim
339  w <- dirToInt "width" <|> (fst <$> viewboxSize)
340  h <- dirToInt "height" <|> (snd <$> viewboxSize)
341  return ImageSize {
342    pxX  = w
343  , pxY  = h
344  , dpiX = dpi
345  , dpiY = dpi
346  }
347
348emfSize :: ByteString -> Maybe ImageSize
349emfSize img =
350  let
351    parseheader = runGetOrFail $ do
352      skip 0x18             -- 0x00
353      frameL <- getWord32le -- 0x18  measured in 1/100 of a millimetre
354      frameT <- getWord32le -- 0x1C
355      frameR <- getWord32le -- 0x20
356      frameB <- getWord32le -- 0x24
357      skip 0x20             -- 0x28
358      deviceX <- getWord32le  -- 0x48 pixels of reference device
359      deviceY <- getWord32le  -- 0x4C
360      mmX <- getWord32le      -- 0x50 real mm of reference device (always 320*240?)
361      mmY <- getWord32le      -- 0x58
362      -- end of header
363      let
364        w = (deviceX * (frameR - frameL)) `quot` (mmX * 100)
365        h = (deviceY * (frameB - frameT)) `quot` (mmY * 100)
366        dpiW = (deviceX * 254) `quot` (mmX * 10)
367        dpiH = (deviceY * 254) `quot` (mmY * 10)
368      return $ ImageSize
369        { pxX = fromIntegral w
370        , pxY = fromIntegral h
371        , dpiX = fromIntegral dpiW
372        , dpiY = fromIntegral dpiH
373        }
374  in
375    case parseheader . BL.fromStrict $ img of
376      Left _ -> Nothing
377      Right (_, _, size) -> Just size
378