1{-# LANGUAGE OverloadedStrings, ScopedTypeVariables #-} 2{-# LANGUAGE ViewPatterns #-} 3{-# OPTIONS_GHC -fno-warn-type-defaults #-} 4{- | 5Module : Text.Pandoc.ImageSize 6Copyright : Copyright (C) 2011-2021 John MacFarlane 7License : GNU GPL, version 2 or above 8 9Maintainer : John MacFarlane <jgm@berkeley.edu> 10Stability : alpha 11Portability : portable 12 13Functions for determining the size of a PNG, JPEG, or GIF image. 14-} 15module Text.Pandoc.ImageSize ( ImageType(..) 16 , imageType 17 , imageSize 18 , sizeInPixels 19 , sizeInPoints 20 , desiredSizeInPoints 21 , Dimension(..) 22 , Direction(..) 23 , dimension 24 , lengthToDim 25 , scaleDimension 26 , inInch 27 , inPixel 28 , inPoints 29 , inEm 30 , numUnit 31 , showInInch 32 , showInPixel 33 , showFl 34 ) where 35import Data.ByteString (ByteString) 36import qualified Data.ByteString.Char8 as B 37import qualified Data.ByteString.Lazy as BL 38import Data.Binary.Get 39import Data.Char (isDigit) 40import Control.Monad 41import Text.Pandoc.Shared (safeRead) 42import Data.Default (Default) 43import Numeric (showFFloat) 44import Text.Pandoc.Definition 45import Text.Pandoc.Options 46import qualified Text.Pandoc.UTF8 as UTF8 47import qualified Text.XML.Light as Xml 48import qualified Data.Text as T 49import qualified Data.Text.Encoding as TE 50import Control.Applicative 51import qualified Data.Attoparsec.ByteString.Char8 as A 52import qualified Codec.Picture.Metadata as Metadata 53import qualified Codec.Picture.Metadata.Exif as Exif 54import Codec.Picture (decodeImageWithMetadata) 55 56-- quick and dirty functions to get image sizes 57-- algorithms borrowed from wwwis.pl 58 59data ImageType = Png | Gif | Jpeg | Svg | Pdf | Eps | Emf deriving Show 60data Direction = Width | Height 61instance Show Direction where 62 show Width = "width" 63 show Height = "height" 64 65data Dimension = Pixel Integer 66 | Centimeter Double 67 | Millimeter Double 68 | Inch Double 69 | Percent Double 70 | Em Double 71 deriving Eq 72 73instance Show Dimension where 74 show (Pixel a) = show a ++ "px" 75 show (Centimeter a) = T.unpack (showFl a) ++ "cm" 76 show (Millimeter a) = T.unpack (showFl a) ++ "mm" 77 show (Inch a) = T.unpack (showFl a) ++ "in" 78 show (Percent a) = show a ++ "%" 79 show (Em a) = T.unpack (showFl a) ++ "em" 80 81data ImageSize = ImageSize{ 82 pxX :: Integer 83 , pxY :: Integer 84 , dpiX :: Integer 85 , dpiY :: Integer 86 } deriving (Read, Show, Eq) 87instance Default ImageSize where 88 def = ImageSize 300 200 72 72 89 90showFl :: (RealFloat a) => a -> T.Text 91showFl a = removeExtra0s $ T.pack $ showFFloat (Just 5) a "" 92 93removeExtra0s :: T.Text -> T.Text 94removeExtra0s s = case T.dropWhileEnd (=='0') s of 95 (T.unsnoc -> Just (xs, '.')) -> xs 96 xs -> xs 97 98imageType :: ByteString -> Maybe ImageType 99imageType img = case B.take 4 img of 100 "\x89\x50\x4e\x47" -> return Png 101 "\x47\x49\x46\x38" -> return Gif 102 "\xff\xd8\xff\xe0" -> return Jpeg -- JFIF 103 "\xff\xd8\xff\xe1" -> return Jpeg -- Exif 104 "%PDF" -> return Pdf 105 "<svg" -> return Svg 106 "<?xm" 107 | findSvgTag img 108 -> return Svg 109 "%!PS" 110 | B.take 4 (B.drop 1 $ B.dropWhile (/=' ') img) == "EPSF" 111 -> return Eps 112 "\x01\x00\x00\x00" 113 | B.take 4 (B.drop 40 img) == " EMF" 114 -> return Emf 115 _ -> mzero 116 117findSvgTag :: ByteString -> Bool 118findSvgTag img = "<svg" `B.isInfixOf` img || "<SVG" `B.isInfixOf` img 119 120imageSize :: WriterOptions -> ByteString -> Either T.Text ImageSize 121imageSize opts img = checkDpi <$> 122 case imageType img of 123 Just Png -> getSize img 124 Just Gif -> getSize img 125 Just Jpeg -> getSize img 126 Just Svg -> mbToEither "could not determine SVG size" $ svgSize opts img 127 Just Eps -> mbToEither "could not determine EPS size" $ epsSize img 128 Just Pdf -> mbToEither "could not determine PDF size" $ pdfSize img 129 Just Emf -> mbToEither "could not determine EMF size" $ emfSize img 130 Nothing -> Left "could not determine image type" 131 where mbToEither msg Nothing = Left msg 132 mbToEither _ (Just x) = Right x 133 -- see #6880, some defective JPEGs may encode dpi 0, so default to 72 134 -- if that value is 0 135 checkDpi size = 136 size{ dpiX = if dpiX size == 0 then 72 else dpiX size 137 , dpiY = if dpiY size == 0 then 72 else dpiY size } 138 139 140sizeInPixels :: ImageSize -> (Integer, Integer) 141sizeInPixels s = (pxX s, pxY s) 142 143-- | Calculate (height, width) in points using the image file's dpi metadata, 144-- using 72 Points == 1 Inch. 145sizeInPoints :: ImageSize -> (Double, Double) 146sizeInPoints s = (pxXf * 72 / dpiXf, pxYf * 72 / dpiYf) 147 where 148 pxXf = fromIntegral $ pxX s 149 pxYf = fromIntegral $ pxY s 150 dpiXf = fromIntegral $ dpiX s 151 dpiYf = fromIntegral $ dpiY s 152 153-- | Calculate (height, width) in points, considering the desired dimensions in the 154-- attribute, while falling back on the image file's dpi metadata if no dimensions 155-- are specified in the attribute (or only dimensions in percentages). 156desiredSizeInPoints :: WriterOptions -> Attr -> ImageSize -> (Double, Double) 157desiredSizeInPoints opts attr s = 158 case (getDim Width, getDim Height) of 159 (Just w, Just h) -> (w, h) 160 (Just w, Nothing) -> (w, w / ratio) 161 (Nothing, Just h) -> (h * ratio, h) 162 (Nothing, Nothing) -> sizeInPoints s 163 where 164 ratio = fromIntegral (pxX s) / fromIntegral (pxY s) 165 getDim dir = case dimension dir attr of 166 Just (Percent _) -> Nothing 167 Just dim -> Just $ inPoints opts dim 168 Nothing -> Nothing 169 170inPoints :: WriterOptions -> Dimension -> Double 171inPoints opts dim = 72 * inInch opts dim 172 173inEm :: WriterOptions -> Dimension -> Double 174inEm opts dim = (64/11) * inInch opts dim 175 176inInch :: WriterOptions -> Dimension -> Double 177inInch opts dim = 178 case dim of 179 (Pixel a) -> fromIntegral a / fromIntegral (writerDpi opts) 180 (Centimeter a) -> a * 0.3937007874 181 (Millimeter a) -> a * 0.03937007874 182 (Inch a) -> a 183 (Percent _) -> 0 184 (Em a) -> a * (11/64) 185 186inPixel :: WriterOptions -> Dimension -> Integer 187inPixel opts dim = 188 case dim of 189 (Pixel a) -> a 190 (Centimeter a) -> floor $ dpi * a * 0.3937007874 :: Integer 191 (Millimeter a) -> floor $ dpi * a * 0.03937007874 :: Integer 192 (Inch a) -> floor $ dpi * a :: Integer 193 (Percent _) -> 0 194 (Em a) -> floor $ dpi * a * (11/64) :: Integer 195 where 196 dpi = fromIntegral $ writerDpi opts 197 198-- | Convert a Dimension to Text denoting its equivalent in inches, for example "2.00000". 199-- Note: Dimensions in percentages are converted to the empty string. 200showInInch :: WriterOptions -> Dimension -> T.Text 201showInInch _ (Percent _) = "" 202showInInch opts dim = showFl $ inInch opts dim 203 204-- | Convert a Dimension to Text denoting its equivalent in pixels, for example "600". 205-- Note: Dimensions in percentages are converted to the empty string. 206showInPixel :: WriterOptions -> Dimension -> T.Text 207showInPixel _ (Percent _) = "" 208showInPixel opts dim = T.pack $ show $ inPixel opts dim 209 210-- | Maybe split a string into a leading number and trailing unit, e.g. "3cm" to Just (3.0, "cm") 211numUnit :: T.Text -> Maybe (Double, T.Text) 212numUnit s = 213 let (nums, unit) = T.span (\c -> isDigit c || ('.'==c)) s 214 in (\n -> (n, unit)) <$> safeRead nums 215 216-- | Scale a dimension by a factor. 217scaleDimension :: Double -> Dimension -> Dimension 218scaleDimension factor dim = 219 case dim of 220 Pixel x -> Pixel (round $ factor * fromIntegral x) 221 Centimeter x -> Centimeter (factor * x) 222 Millimeter x -> Millimeter (factor * x) 223 Inch x -> Inch (factor * x) 224 Percent x -> Percent (factor * x) 225 Em x -> Em (factor * x) 226 227-- | Read a Dimension from an Attr attribute. 228-- `dimension Width attr` might return `Just (Pixel 3)` or for example `Just (Centimeter 2.0)`, etc. 229dimension :: Direction -> Attr -> Maybe Dimension 230dimension dir (_, _, kvs) = 231 case dir of 232 Width -> extractDim "width" 233 Height -> extractDim "height" 234 where 235 extractDim key = lookup key kvs >>= lengthToDim 236 237lengthToDim :: T.Text -> Maybe Dimension 238lengthToDim s = numUnit s >>= uncurry toDim 239 where 240 toDim a "cm" = Just $ Centimeter a 241 toDim a "mm" = Just $ Millimeter a 242 toDim a "in" = Just $ Inch a 243 toDim a "inch" = Just $ Inch a 244 toDim a "%" = Just $ Percent a 245 toDim a "px" = Just $ Pixel (floor a::Integer) 246 toDim a "" = Just $ Pixel (floor a::Integer) 247 toDim a "pt" = Just $ Inch (a / 72) 248 toDim a "pc" = Just $ Inch (a / 6) 249 toDim a "em" = Just $ Em a 250 toDim _ _ = Nothing 251 252epsSize :: ByteString -> Maybe ImageSize 253epsSize img = do 254 let ls = takeWhile ("%" `B.isPrefixOf`) $ B.lines img 255 let ls' = dropWhile (not . ("%%BoundingBox:" `B.isPrefixOf`)) ls 256 case ls' of 257 [] -> mzero 258 (x:_) -> case B.words x of 259 [_, _, _, ux, uy] -> do 260 ux' <- safeRead $ TE.decodeUtf8 ux 261 uy' <- safeRead $ TE.decodeUtf8 uy 262 return ImageSize{ 263 pxX = ux' 264 , pxY = uy' 265 , dpiX = 72 266 , dpiY = 72 } 267 _ -> mzero 268 269pdfSize :: ByteString -> Maybe ImageSize 270pdfSize img = 271 case A.parseOnly pPdfSize img of 272 Left _ -> Nothing 273 Right sz -> Just sz 274 275pPdfSize :: A.Parser ImageSize 276pPdfSize = do 277 A.skipWhile (/='/') 278 A.char8 '/' 279 (do A.string "MediaBox" 280 A.skipSpace 281 A.char8 '[' 282 A.skipSpace 283 [x1,y1,x2,y2] <- A.count 4 $ do 284 A.skipSpace 285 raw <- A.many1 $ A.satisfy (\c -> isDigit c || c == '.') 286 case safeRead $ T.pack raw of 287 Just (r :: Double) -> return $ floor r 288 Nothing -> mzero 289 A.skipSpace 290 A.char8 ']' 291 return $ ImageSize{ 292 pxX = x2 - x1 293 , pxY = y2 - y1 294 , dpiX = 72 295 , dpiY = 72 } 296 ) <|> pPdfSize 297 298getSize :: ByteString -> Either T.Text ImageSize 299getSize img = 300 case decodeImageWithMetadata img of 301 Left e -> Left (T.pack e) 302 Right (_, meta) -> do 303 pxx <- maybe (Left "Could not determine width") Right $ 304 -- first look for exif image width, then width 305 (Metadata.lookup 306 (Metadata.Exif (Exif.TagUnknown 0xA002)) meta >>= 307 exifDataToWord) <|> 308 Metadata.lookup Metadata.Width meta 309 pxy <- maybe (Left "Could not determine height") Right $ 310 -- first look for exif image height, then height 311 (Metadata.lookup 312 (Metadata.Exif (Exif.TagUnknown 0xA003)) meta >>= 313 exifDataToWord) <|> 314 Metadata.lookup Metadata.Height meta 315 dpix <- maybe (Right 72) Right $ Metadata.lookup Metadata.DpiX meta 316 dpiy <- maybe (Right 72) Right $ Metadata.lookup Metadata.DpiY meta 317 return $ ImageSize 318 { pxX = fromIntegral pxx 319 , pxY = fromIntegral pxy 320 , dpiX = fromIntegral dpix 321 , dpiY = fromIntegral dpiy } 322 where 323 exifDataToWord (Exif.ExifLong x) = Just $ fromIntegral x 324 exifDataToWord (Exif.ExifShort x) = Just $ fromIntegral x 325 exifDataToWord _ = Nothing 326 327 328svgSize :: WriterOptions -> ByteString -> Maybe ImageSize 329svgSize opts img = do 330 doc <- Xml.parseXMLDoc $ UTF8.toString img 331 let viewboxSize = do 332 vb <- Xml.findAttrBy (== Xml.QName "viewBox" Nothing Nothing) doc 333 [_,_,w,h] <- mapM safeRead (T.words (T.pack vb)) 334 return (w,h) 335 let dpi = fromIntegral $ writerDpi opts 336 let dirToInt dir = do 337 dim <- Xml.findAttrBy (== Xml.QName dir Nothing Nothing) doc >>= lengthToDim . T.pack 338 return $ inPixel opts dim 339 w <- dirToInt "width" <|> (fst <$> viewboxSize) 340 h <- dirToInt "height" <|> (snd <$> viewboxSize) 341 return ImageSize { 342 pxX = w 343 , pxY = h 344 , dpiX = dpi 345 , dpiY = dpi 346 } 347 348emfSize :: ByteString -> Maybe ImageSize 349emfSize img = 350 let 351 parseheader = runGetOrFail $ do 352 skip 0x18 -- 0x00 353 frameL <- getWord32le -- 0x18 measured in 1/100 of a millimetre 354 frameT <- getWord32le -- 0x1C 355 frameR <- getWord32le -- 0x20 356 frameB <- getWord32le -- 0x24 357 skip 0x20 -- 0x28 358 deviceX <- getWord32le -- 0x48 pixels of reference device 359 deviceY <- getWord32le -- 0x4C 360 mmX <- getWord32le -- 0x50 real mm of reference device (always 320*240?) 361 mmY <- getWord32le -- 0x58 362 -- end of header 363 let 364 w = (deviceX * (frameR - frameL)) `quot` (mmX * 100) 365 h = (deviceY * (frameB - frameT)) `quot` (mmY * 100) 366 dpiW = (deviceX * 254) `quot` (mmX * 10) 367 dpiH = (deviceY * 254) `quot` (mmY * 10) 368 return $ ImageSize 369 { pxX = fromIntegral w 370 , pxY = fromIntegral h 371 , dpiX = fromIntegral dpiW 372 , dpiY = fromIntegral dpiH 373 } 374 in 375 case parseheader . BL.fromStrict $ img of 376 Left _ -> Nothing 377 Right (_, _, size) -> Just size 378