1import io 2import os 3import re 4import struct 5from xml.etree import ElementTree 6 7_UNIT_KM = -3 8_UNIT_100M = -2 9_UNIT_10M = -1 10_UNIT_1M = 0 11_UNIT_10CM = 1 12_UNIT_CM = 2 13_UNIT_MM = 3 14_UNIT_0_1MM = 4 15_UNIT_0_01MM = 5 16_UNIT_UM = 6 17_UNIT_INCH = 6 18 19_TIFF_TYPE_SIZES = { 20 1: 1, 21 2: 1, 22 3: 2, 23 4: 4, 24 5: 8, 25 6: 1, 26 7: 1, 27 8: 2, 28 9: 4, 29 10: 8, 30 11: 4, 31 12: 8, 32} 33 34 35def _convertToDPI(density, unit): 36 if unit == _UNIT_KM: 37 return int(density * 0.0000254 + 0.5) 38 elif unit == _UNIT_100M: 39 return int(density * 0.000254 + 0.5) 40 elif unit == _UNIT_10M: 41 return int(density * 0.00254 + 0.5) 42 elif unit == _UNIT_1M: 43 return int(density * 0.0254 + 0.5) 44 elif unit == _UNIT_10CM: 45 return int(density * 0.254 + 0.5) 46 elif unit == _UNIT_CM: 47 return int(density * 2.54 + 0.5) 48 elif unit == _UNIT_MM: 49 return int(density * 25.4 + 0.5) 50 elif unit == _UNIT_0_1MM: 51 return density * 254 52 elif unit == _UNIT_0_01MM: 53 return density * 2540 54 elif unit == _UNIT_UM: 55 return density * 25400 56 return density 57 58 59def _convertToPx(value): 60 matched = re.match(r"(\d+(?:\.\d+)?)?([a-z]*)$", value) 61 if not matched: 62 raise ValueError("unknown length value: %s" % value) 63 64 length, unit = matched.groups() 65 if unit == "": 66 return float(length) 67 elif unit == "cm": 68 return float(length) * 96 / 2.54 69 elif unit == "mm": 70 return float(length) * 96 / 2.54 / 10 71 elif unit == "in": 72 return float(length) * 96 73 elif unit == "pc": 74 return float(length) * 96 / 6 75 elif unit == "pt": 76 return float(length) * 96 / 6 77 elif unit == "px": 78 return float(length) 79 80 raise ValueError("unknown unit type: %s" % unit) 81 82 83def get(filepath): 84 """ 85 Return (width, height) for a given img file content 86 no requirements 87 :type filepath: Union[bytes, str, pathlib.Path] 88 :rtype Tuple[int, int] 89 """ 90 height = -1 91 width = -1 92 93 if isinstance(filepath, io.BytesIO): # file-like object 94 fhandle = filepath 95 else: 96 fhandle = open(filepath, 'rb') 97 98 try: 99 head = fhandle.read(24) 100 size = len(head) 101 # handle GIFs 102 if size >= 10 and head[:6] in (b'GIF87a', b'GIF89a'): 103 # Check to see if content_type is correct 104 try: 105 width, height = struct.unpack("<hh", head[6:10]) 106 except struct.error: 107 raise ValueError("Invalid GIF file") 108 # see png edition spec bytes are below chunk length then and finally the 109 elif size >= 24 and head.startswith(b'\211PNG\r\n\032\n') and head[12:16] == b'IHDR': 110 try: 111 width, height = struct.unpack(">LL", head[16:24]) 112 except struct.error: 113 raise ValueError("Invalid PNG file") 114 # Maybe this is for an older PNG version. 115 elif size >= 16 and head.startswith(b'\211PNG\r\n\032\n'): 116 # Check to see if we have the right content type 117 try: 118 width, height = struct.unpack(">LL", head[8:16]) 119 except struct.error: 120 raise ValueError("Invalid PNG file") 121 # handle JPEGs 122 elif size >= 2 and head.startswith(b'\377\330'): 123 try: 124 fhandle.seek(0) # Read 0xff next 125 size = 2 126 ftype = 0 127 while not 0xc0 <= ftype <= 0xcf or ftype in [0xc4, 0xc8, 0xcc]: 128 fhandle.seek(size, 1) 129 byte = fhandle.read(1) 130 while ord(byte) == 0xff: 131 byte = fhandle.read(1) 132 ftype = ord(byte) 133 size = struct.unpack('>H', fhandle.read(2))[0] - 2 134 # We are at a SOFn block 135 fhandle.seek(1, 1) # Skip `precision' byte. 136 height, width = struct.unpack('>HH', fhandle.read(4)) 137 except (struct.error, TypeError): 138 raise ValueError("Invalid JPEG file") 139 # handle JPEG2000s 140 elif size >= 12 and head.startswith(b'\x00\x00\x00\x0cjP \r\n\x87\n'): 141 fhandle.seek(48) 142 try: 143 height, width = struct.unpack('>LL', fhandle.read(8)) 144 except struct.error: 145 raise ValueError("Invalid JPEG2000 file") 146 # handle big endian TIFF 147 elif size >= 8 and head.startswith(b"\x4d\x4d\x00\x2a"): 148 offset = struct.unpack('>L', head[4:8])[0] 149 fhandle.seek(offset) 150 ifdsize = struct.unpack(">H", fhandle.read(2))[0] 151 for i in range(ifdsize): 152 tag, datatype, count, data = struct.unpack(">HHLL", fhandle.read(12)) 153 if tag == 256: 154 if datatype == 3: 155 width = int(data / 65536) 156 elif datatype == 4: 157 width = data 158 else: 159 raise ValueError("Invalid TIFF file: width column data type should be SHORT/LONG.") 160 elif tag == 257: 161 if datatype == 3: 162 height = int(data / 65536) 163 elif datatype == 4: 164 height = data 165 else: 166 raise ValueError("Invalid TIFF file: height column data type should be SHORT/LONG.") 167 if width != -1 and height != -1: 168 break 169 if width == -1 or height == -1: 170 raise ValueError("Invalid TIFF file: width and/or height IDS entries are missing.") 171 elif size >= 8 and head.startswith(b"\x49\x49\x2a\x00"): 172 offset = struct.unpack('<L', head[4:8])[0] 173 fhandle.seek(offset) 174 ifdsize = struct.unpack("<H", fhandle.read(2))[0] 175 for i in range(ifdsize): 176 tag, datatype, count, data = struct.unpack("<HHLL", fhandle.read(12)) 177 if tag == 256: 178 width = data 179 elif tag == 257: 180 height = data 181 if width != -1 and height != -1: 182 break 183 if width == -1 or height == -1: 184 raise ValueError("Invalid TIFF file: width and/or height IDS entries are missing.") 185 # handle little endian BigTiff 186 elif size >= 8 and head.startswith(b"\x49\x49\x2b\x00"): 187 bytesize_offset = struct.unpack('<L', head[4:8])[0] 188 if bytesize_offset != 8: 189 raise ValueError('Invalid BigTIFF file: Expected offset to be 8, found {} instead.'.format(offset)) 190 offset = struct.unpack('<Q', head[8:16])[0] 191 fhandle.seek(offset) 192 ifdsize = struct.unpack("<Q", fhandle.read(8))[0] 193 for i in range(ifdsize): 194 tag, datatype, count, data = struct.unpack("<HHQQ", fhandle.read(20)) 195 if tag == 256: 196 width = data 197 elif tag == 257: 198 height = data 199 if width != -1 and height != -1: 200 break 201 if width == -1 or height == -1: 202 raise ValueError("Invalid BigTIFF file: width and/or height IDS entries are missing.") 203 204 # handle SVGs 205 elif size >= 5 and (head.startswith(b'<?xml') or head.startswith(b'<svg')): 206 fhandle.seek(0) 207 data = fhandle.read(1024) 208 try: 209 data = data.decode('utf-8') 210 width = re.search(r'[^-]width="(.*?)"', data).group(1) 211 height = re.search(r'[^-]height="(.*?)"', data).group(1) 212 except Exception: 213 raise ValueError("Invalid SVG file") 214 width = _convertToPx(width) 215 height = _convertToPx(height) 216 217 # handle Netpbm 218 elif head[:1] == b"P" and head[1:2] in b"123456": 219 fhandle.seek(2) 220 sizes = [] 221 222 while True: 223 next_chr = fhandle.read(1) 224 225 if next_chr.isspace(): 226 continue 227 228 if next_chr == b"": 229 raise ValueError("Invalid Netpbm file") 230 231 if next_chr == b"#": 232 fhandle.readline() 233 continue 234 235 if not next_chr.isdigit(): 236 raise ValueError("Invalid character found on Netpbm file") 237 238 size = next_chr 239 next_chr = fhandle.read(1) 240 241 while next_chr.isdigit(): 242 size += next_chr 243 next_chr = fhandle.read(1) 244 245 sizes.append(int(size)) 246 247 if len(sizes) == 2: 248 break 249 250 fhandle.seek(-1, os.SEEK_CUR) 251 width, height = sizes 252 253 finally: 254 fhandle.close() 255 256 return width, height 257 258 259def getDPI(filepath): 260 """ 261 Return (x DPI, y DPI) for a given img file content 262 no requirements 263 :type filepath: Union[bytes, str, pathlib.Path] 264 :rtype Tuple[int, int] 265 """ 266 xDPI = -1 267 yDPI = -1 268 269 if not isinstance(filepath, bytes): 270 filepath = str(filepath) 271 272 with open(filepath, 'rb') as fhandle: 273 head = fhandle.read(24) 274 size = len(head) 275 # handle GIFs 276 # GIFs doesn't have density 277 if size >= 10 and head[:6] in (b'GIF87a', b'GIF89a'): 278 pass 279 # see png edition spec bytes are below chunk length then and finally the 280 elif size >= 24 and head.startswith(b'\211PNG\r\n\032\n'): 281 chunkOffset = 8 282 chunk = head[8:] 283 while True: 284 chunkType = chunk[4:8] 285 if chunkType == b'pHYs': 286 try: 287 xDensity, yDensity, unit = struct.unpack(">LLB", chunk[8:]) 288 except struct.error: 289 raise ValueError("Invalid PNG file") 290 if unit: 291 xDPI = _convertToDPI(xDensity, _UNIT_1M) 292 yDPI = _convertToDPI(yDensity, _UNIT_1M) 293 else: # no unit 294 xDPI = xDensity 295 yDPI = yDensity 296 break 297 elif chunkType == b'IDAT': 298 break 299 else: 300 try: 301 dataSize, = struct.unpack(">L", chunk[0:4]) 302 except struct.error: 303 raise ValueError("Invalid PNG file") 304 chunkOffset += dataSize + 12 305 fhandle.seek(chunkOffset) 306 chunk = fhandle.read(17) 307 # handle JPEGs 308 elif size >= 2 and head.startswith(b'\377\330'): 309 try: 310 fhandle.seek(0) # Read 0xff next 311 size = 2 312 ftype = 0 313 while not 0xc0 <= ftype <= 0xcf: 314 if ftype == 0xe0: # APP0 marker 315 fhandle.seek(7, 1) 316 unit, xDensity, yDensity = struct.unpack(">BHH", fhandle.read(5)) 317 if unit == 1 or unit == 0: 318 xDPI = xDensity 319 yDPI = yDensity 320 elif unit == 2: 321 xDPI = _convertToDPI(xDensity, _UNIT_CM) 322 yDPI = _convertToDPI(yDensity, _UNIT_CM) 323 break 324 fhandle.seek(size, 1) 325 byte = fhandle.read(1) 326 while ord(byte) == 0xff: 327 byte = fhandle.read(1) 328 ftype = ord(byte) 329 size = struct.unpack('>H', fhandle.read(2))[0] - 2 330 except struct.error: 331 raise ValueError("Invalid JPEG file") 332 # handle JPEG2000s 333 elif size >= 12 and head.startswith(b'\x00\x00\x00\x0cjP \r\n\x87\n'): 334 fhandle.seek(32) 335 # skip JP2 image header box 336 headerSize = struct.unpack('>L', fhandle.read(4))[0] - 8 337 fhandle.seek(4, 1) 338 foundResBox = False 339 try: 340 while headerSize > 0: 341 boxHeader = fhandle.read(8) 342 boxType = boxHeader[4:] 343 if boxType == b'res ': # find resolution super box 344 foundResBox = True 345 headerSize -= 8 346 break 347 boxSize, = struct.unpack('>L', boxHeader[:4]) 348 fhandle.seek(boxSize - 8, 1) 349 headerSize -= boxSize 350 if foundResBox: 351 while headerSize > 0: 352 boxHeader = fhandle.read(8) 353 boxType = boxHeader[4:] 354 if boxType == b'resd': # Display resolution box 355 yDensity, xDensity, yUnit, xUnit = struct.unpack(">HHBB", fhandle.read(10)) 356 xDPI = _convertToDPI(xDensity, xUnit) 357 yDPI = _convertToDPI(yDensity, yUnit) 358 break 359 boxSize, = struct.unpack('>L', boxHeader[:4]) 360 fhandle.seek(boxSize - 8, 1) 361 headerSize -= boxSize 362 except struct.error as e: 363 raise ValueError("Invalid JPEG2000 file") 364 return xDPI, yDPI 365