1import pathlib 2import filetype 3from io import BytesIO 4from .id3 import ID3_MIME_TYPE, ID3_MIME_TYPE_EXTENSIONS 5from .mp3 import MIME_TYPES as MP3_MIME_TYPES 6from .utils.log import getLogger 7from filetype.utils import _NUM_SIGNATURE_BYTES 8 9log = getLogger(__name__) 10 11 12def guessMimetype(filename): 13 """Return the mime-type for `filename`.""" 14 15 path = pathlib.Path(filename) if not isinstance(filename, pathlib.Path) else filename 16 17 with path.open("rb") as signature: 18 # Since filetype only reads 262 of file many mp3s starting with null bytes will not find 19 # a header, so ignoring null bytes and using the bytes interface... 20 buf = b"" 21 while not buf: 22 data = signature.read(_NUM_SIGNATURE_BYTES) 23 if not data: 24 break 25 26 data = data.lstrip(b"\x00") 27 if data: 28 data_len = len(data) 29 if data_len >= _NUM_SIGNATURE_BYTES: 30 buf = data[:_NUM_SIGNATURE_BYTES] 31 else: 32 buf = data + signature.read(_NUM_SIGNATURE_BYTES - data_len) 33 34 # Special casing .id3/.tag because extended filetype with add_type() prepends, meaning 35 # all mp3 would be labeled mimetype id3, while appending would mean each .id3 would be 36 # mime mpeg. 37 if path.suffix in ID3_MIME_TYPE_EXTENSIONS: 38 if Id3Tag().match(buf) or Id3TagExt().match(buf): 39 return Id3TagExt.MIME 40 41 return filetype.guess_mime(buf) 42 43 44class Mp2x(filetype.Type): 45 """Implements the MP2.x audio type matcher.""" 46 MIME = MP3_MIME_TYPES[0] 47 EXTENSION = "mp3" 48 49 def __init__(self): 50 super().__init__(mime=self.__class__.MIME, extension=self.__class__.EXTENSION) 51 52 def match(self, buf): 53 from .mp3.headers import findHeader 54 55 return (len(buf) > 2 and 56 buf[0] == 0xff and buf[1] in (0xf3, 0xe3) and 57 findHeader(BytesIO(buf), 0)[1]) 58 59 60class Mp3Invalids(filetype.Type): 61 """Implements a MP3 audio type matcher this is odd or/corrupt mp3.""" 62 MIME = MP3_MIME_TYPES[0] 63 EXTENSION = "mp3" 64 65 def __init__(self): 66 super().__init__(mime=self.__class__.MIME, extension=self.__class__.EXTENSION) 67 68 def match(self, buf): 69 from .mp3.headers import findHeader 70 71 header = findHeader(BytesIO(buf), 0)[1] 72 log.debug(f"Mp3Invalid, found: {header}") 73 return bool(header) 74 75 76class Id3Tag(filetype.Type): 77 """Implements a MP3 audio type matcher this is odd or/corrupt mp3.""" 78 MIME = ID3_MIME_TYPE 79 EXTENSION = "id3" 80 81 def __init__(self): 82 super().__init__(mime=self.__class__.MIME, extension=self.__class__.EXTENSION) 83 84 def match(self, buf): 85 return buf[:3] in (b"ID3", b"TAG") or len(buf) == 0 86 87 88class Id3TagExt(Id3Tag): 89 EXTENSION = "tag" 90 91 92class M3u(filetype.Type): 93 """Implements the m3u playlist matcher.""" 94 MIME = "audio/x-mpegurl" 95 EXTENSION = "m3u" 96 97 def __init__(self): 98 super().__init__(mime=self.__class__.MIME, extension=self.__class__.EXTENSION) 99 100 def match(self, buf): 101 return len(buf) > 6 and buf.startswith(b"#EXTM3U") 102 103 104# Not using `add_type()`, to append 105filetype.types.append(Mp2x()) 106filetype.types.append(M3u()) 107filetype.types.append(Mp3Invalids()) 108