1import io
2import os
3import re
4import struct
5from xml.etree import ElementTree
6
7_UNIT_KM = -3
8_UNIT_100M = -2
9_UNIT_10M = -1
10_UNIT_1M = 0
11_UNIT_10CM = 1
12_UNIT_CM = 2
13_UNIT_MM = 3
14_UNIT_0_1MM = 4
15_UNIT_0_01MM = 5
16_UNIT_UM = 6
17_UNIT_INCH = 6
18
19_TIFF_TYPE_SIZES = {
20  1: 1,
21  2: 1,
22  3: 2,
23  4: 4,
24  5: 8,
25  6: 1,
26  7: 1,
27  8: 2,
28  9: 4,
29  10: 8,
30  11: 4,
31  12: 8,
32}
33
34
35def _convertToDPI(density, unit):
36    if unit == _UNIT_KM:
37        return int(density * 0.0000254 + 0.5)
38    elif unit == _UNIT_100M:
39        return int(density * 0.000254 + 0.5)
40    elif unit == _UNIT_10M:
41        return int(density * 0.00254 + 0.5)
42    elif unit == _UNIT_1M:
43        return int(density * 0.0254 + 0.5)
44    elif unit == _UNIT_10CM:
45        return int(density * 0.254 + 0.5)
46    elif unit == _UNIT_CM:
47        return int(density * 2.54 + 0.5)
48    elif unit == _UNIT_MM:
49        return int(density * 25.4 + 0.5)
50    elif unit == _UNIT_0_1MM:
51        return density * 254
52    elif unit == _UNIT_0_01MM:
53        return density * 2540
54    elif unit == _UNIT_UM:
55        return density * 25400
56    return density
57
58
59def _convertToPx(value):
60    matched = re.match(r"(\d+(?:\.\d+)?)?([a-z]*)$", value)
61    if not matched:
62        raise ValueError("unknown length value: %s" % value)
63
64    length, unit = matched.groups()
65    if unit == "":
66        return float(length)
67    elif unit == "cm":
68        return float(length) * 96 / 2.54
69    elif unit == "mm":
70        return float(length) * 96 / 2.54 / 10
71    elif unit == "in":
72        return float(length) * 96
73    elif unit == "pc":
74        return float(length) * 96 / 6
75    elif unit == "pt":
76        return float(length) * 96 / 6
77    elif unit == "px":
78        return float(length)
79
80    raise ValueError("unknown unit type: %s" % unit)
81
82
83def get(filepath):
84    """
85    Return (width, height) for a given img file content
86    no requirements
87    :type filepath: Union[bytes, str, pathlib.Path]
88    :rtype Tuple[int, int]
89    """
90    height = -1
91    width = -1
92
93    if isinstance(filepath, io.BytesIO):  # file-like object
94        fhandle = filepath
95    else:
96        fhandle = open(filepath, 'rb')
97
98    try:
99        head = fhandle.read(24)
100        size = len(head)
101        # handle GIFs
102        if size >= 10 and head[:6] in (b'GIF87a', b'GIF89a'):
103            # Check to see if content_type is correct
104            try:
105                width, height = struct.unpack("<hh", head[6:10])
106            except struct.error:
107                raise ValueError("Invalid GIF file")
108        # see png edition spec bytes are below chunk length then and finally the
109        elif size >= 24 and head.startswith(b'\211PNG\r\n\032\n') and head[12:16] == b'IHDR':
110            try:
111                width, height = struct.unpack(">LL", head[16:24])
112            except struct.error:
113                raise ValueError("Invalid PNG file")
114        # Maybe this is for an older PNG version.
115        elif size >= 16 and head.startswith(b'\211PNG\r\n\032\n'):
116            # Check to see if we have the right content type
117            try:
118                width, height = struct.unpack(">LL", head[8:16])
119            except struct.error:
120                raise ValueError("Invalid PNG file")
121        # handle JPEGs
122        elif size >= 2 and head.startswith(b'\377\330'):
123            try:
124                fhandle.seek(0)  # Read 0xff next
125                size = 2
126                ftype = 0
127                while not 0xc0 <= ftype <= 0xcf or ftype in [0xc4, 0xc8, 0xcc]:
128                    fhandle.seek(size, 1)
129                    byte = fhandle.read(1)
130                    while ord(byte) == 0xff:
131                        byte = fhandle.read(1)
132                    ftype = ord(byte)
133                    size = struct.unpack('>H', fhandle.read(2))[0] - 2
134                # We are at a SOFn block
135                fhandle.seek(1, 1)  # Skip `precision' byte.
136                height, width = struct.unpack('>HH', fhandle.read(4))
137            except (struct.error, TypeError):
138                raise ValueError("Invalid JPEG file")
139        # handle JPEG2000s
140        elif size >= 12 and head.startswith(b'\x00\x00\x00\x0cjP  \r\n\x87\n'):
141            fhandle.seek(48)
142            try:
143                height, width = struct.unpack('>LL', fhandle.read(8))
144            except struct.error:
145                raise ValueError("Invalid JPEG2000 file")
146        # handle big endian TIFF
147        elif size >= 8 and head.startswith(b"\x4d\x4d\x00\x2a"):
148            offset = struct.unpack('>L', head[4:8])[0]
149            fhandle.seek(offset)
150            ifdsize = struct.unpack(">H", fhandle.read(2))[0]
151            for i in range(ifdsize):
152                tag, datatype, count, data = struct.unpack(">HHLL", fhandle.read(12))
153                if tag == 256:
154                    if datatype == 3:
155                        width = int(data / 65536)
156                    elif datatype == 4:
157                        width = data
158                    else:
159                        raise ValueError("Invalid TIFF file: width column data type should be SHORT/LONG.")
160                elif tag == 257:
161                    if datatype == 3:
162                        height = int(data / 65536)
163                    elif datatype == 4:
164                        height = data
165                    else:
166                        raise ValueError("Invalid TIFF file: height column data type should be SHORT/LONG.")
167                if width != -1 and height != -1:
168                    break
169            if width == -1 or height == -1:
170                raise ValueError("Invalid TIFF file: width and/or height IDS entries are missing.")
171        elif size >= 8 and head.startswith(b"\x49\x49\x2a\x00"):
172            offset = struct.unpack('<L', head[4:8])[0]
173            fhandle.seek(offset)
174            ifdsize = struct.unpack("<H", fhandle.read(2))[0]
175            for i in range(ifdsize):
176                tag, datatype, count, data = struct.unpack("<HHLL", fhandle.read(12))
177                if tag == 256:
178                    width = data
179                elif tag == 257:
180                    height = data
181                if width != -1 and height != -1:
182                    break
183            if width == -1 or height == -1:
184                raise ValueError("Invalid TIFF file: width and/or height IDS entries are missing.")
185        # handle little endian BigTiff
186        elif size >= 8 and head.startswith(b"\x49\x49\x2b\x00"):
187            bytesize_offset = struct.unpack('<L', head[4:8])[0]
188            if bytesize_offset != 8:
189                raise ValueError('Invalid BigTIFF file: Expected offset to be 8, found {} instead.'.format(offset))
190            offset = struct.unpack('<Q', head[8:16])[0]
191            fhandle.seek(offset)
192            ifdsize = struct.unpack("<Q", fhandle.read(8))[0]
193            for i in range(ifdsize):
194                tag, datatype, count, data = struct.unpack("<HHQQ", fhandle.read(20))
195                if tag == 256:
196                    width = data
197                elif tag == 257:
198                    height = data
199                if width != -1 and height != -1:
200                    break
201            if width == -1 or height == -1:
202                raise ValueError("Invalid BigTIFF file: width and/or height IDS entries are missing.")
203
204        # handle SVGs
205        elif size >= 5 and (head.startswith(b'<?xml') or head.startswith(b'<svg')):
206            fhandle.seek(0)
207            data = fhandle.read(1024)
208            try:
209                data = data.decode('utf-8')
210                width = re.search(r'[^-]width="(.*?)"', data).group(1)
211                height = re.search(r'[^-]height="(.*?)"', data).group(1)
212            except Exception:
213                raise ValueError("Invalid SVG file")
214            width = _convertToPx(width)
215            height = _convertToPx(height)
216
217        # handle Netpbm
218        elif head[:1] == b"P" and head[1:2] in b"123456":
219            fhandle.seek(2)
220            sizes = []
221
222            while True:
223                next_chr = fhandle.read(1)
224
225                if next_chr.isspace():
226                    continue
227
228                if next_chr == b"":
229                    raise ValueError("Invalid Netpbm file")
230
231                if next_chr == b"#":
232                    fhandle.readline()
233                    continue
234
235                if not next_chr.isdigit():
236                    raise ValueError("Invalid character found on Netpbm file")
237
238                size = next_chr
239                next_chr = fhandle.read(1)
240
241                while next_chr.isdigit():
242                    size += next_chr
243                    next_chr = fhandle.read(1)
244
245                sizes.append(int(size))
246
247                if len(sizes) == 2:
248                    break
249
250                fhandle.seek(-1, os.SEEK_CUR)
251            width, height = sizes
252
253    finally:
254        fhandle.close()
255
256    return width, height
257
258
259def getDPI(filepath):
260    """
261    Return (x DPI, y DPI) for a given img file content
262    no requirements
263    :type filepath: Union[bytes, str, pathlib.Path]
264    :rtype Tuple[int, int]
265    """
266    xDPI = -1
267    yDPI = -1
268
269    if not isinstance(filepath, bytes):
270        filepath = str(filepath)
271
272    with open(filepath, 'rb') as fhandle:
273        head = fhandle.read(24)
274        size = len(head)
275        # handle GIFs
276        # GIFs doesn't have density
277        if size >= 10 and head[:6] in (b'GIF87a', b'GIF89a'):
278            pass
279        # see png edition spec bytes are below chunk length then and finally the
280        elif size >= 24 and head.startswith(b'\211PNG\r\n\032\n'):
281            chunkOffset = 8
282            chunk = head[8:]
283            while True:
284                chunkType = chunk[4:8]
285                if chunkType == b'pHYs':
286                    try:
287                        xDensity, yDensity, unit = struct.unpack(">LLB", chunk[8:])
288                    except struct.error:
289                        raise ValueError("Invalid PNG file")
290                    if unit:
291                        xDPI = _convertToDPI(xDensity, _UNIT_1M)
292                        yDPI = _convertToDPI(yDensity, _UNIT_1M)
293                    else:  # no unit
294                        xDPI = xDensity
295                        yDPI = yDensity
296                    break
297                elif chunkType == b'IDAT':
298                    break
299                else:
300                    try:
301                        dataSize, = struct.unpack(">L", chunk[0:4])
302                    except struct.error:
303                        raise ValueError("Invalid PNG file")
304                    chunkOffset += dataSize + 12
305                    fhandle.seek(chunkOffset)
306                    chunk = fhandle.read(17)
307        # handle JPEGs
308        elif size >= 2 and head.startswith(b'\377\330'):
309            try:
310                fhandle.seek(0)  # Read 0xff next
311                size = 2
312                ftype = 0
313                while not 0xc0 <= ftype <= 0xcf:
314                    if ftype == 0xe0:  # APP0 marker
315                        fhandle.seek(7, 1)
316                        unit, xDensity, yDensity = struct.unpack(">BHH", fhandle.read(5))
317                        if unit == 1 or unit == 0:
318                            xDPI = xDensity
319                            yDPI = yDensity
320                        elif unit == 2:
321                            xDPI = _convertToDPI(xDensity, _UNIT_CM)
322                            yDPI = _convertToDPI(yDensity, _UNIT_CM)
323                        break
324                    fhandle.seek(size, 1)
325                    byte = fhandle.read(1)
326                    while ord(byte) == 0xff:
327                        byte = fhandle.read(1)
328                    ftype = ord(byte)
329                    size = struct.unpack('>H', fhandle.read(2))[0] - 2
330            except struct.error:
331                raise ValueError("Invalid JPEG file")
332        # handle JPEG2000s
333        elif size >= 12 and head.startswith(b'\x00\x00\x00\x0cjP  \r\n\x87\n'):
334            fhandle.seek(32)
335            # skip JP2 image header box
336            headerSize = struct.unpack('>L', fhandle.read(4))[0] - 8
337            fhandle.seek(4, 1)
338            foundResBox = False
339            try:
340                while headerSize > 0:
341                    boxHeader = fhandle.read(8)
342                    boxType = boxHeader[4:]
343                    if boxType == b'res ':  # find resolution super box
344                        foundResBox = True
345                        headerSize -= 8
346                        break
347                    boxSize, = struct.unpack('>L', boxHeader[:4])
348                    fhandle.seek(boxSize - 8, 1)
349                    headerSize -= boxSize
350                if foundResBox:
351                    while headerSize > 0:
352                        boxHeader = fhandle.read(8)
353                        boxType = boxHeader[4:]
354                        if boxType == b'resd':  # Display resolution box
355                            yDensity, xDensity, yUnit, xUnit = struct.unpack(">HHBB", fhandle.read(10))
356                            xDPI = _convertToDPI(xDensity, xUnit)
357                            yDPI = _convertToDPI(yDensity, yUnit)
358                            break
359                        boxSize, = struct.unpack('>L', boxHeader[:4])
360                        fhandle.seek(boxSize - 8, 1)
361                        headerSize -= boxSize
362            except struct.error as e:
363                raise ValueError("Invalid JPEG2000 file")
364    return xDPI, yDPI
365