1# Created By: Virgil Dupras 2# Created On: 2005/07/27 3# Copyright 2010 Hardcoded Software (http://www.hardcoded.net) 4 5# This software is licensed under the "BSD" License as described in the "LICENSE" file, 6# which should be included with this package. The terms are also available at 7# http://www.hardcoded.net/licenses/bsd_license 8 9import re 10import struct 11 12from .util import open_if_filename, tryint 13from .genres import genre_by_index 14 15HEADER_SIZE = 8 16 17re_atom_type = re.compile(r'[A-Za-z0-9\-©]{4}') 18 19 20def read_atom_header(readfunc, offset): 21 header = readfunc(offset, HEADER_SIZE) 22 if len(header) == HEADER_SIZE: 23 size, byte_type = struct.unpack('!i4s', header) 24 str_type = str(byte_type, 'latin-1') 25 return (size, str_type) 26 else: 27 return () 28 29 30def is_valid_atom_type(atom_type): 31 return re_atom_type.match(atom_type) 32 return True 33 34# Base atom classes ***************************************** 35 36 37class Atom: 38 cls_data_model = '' 39 40 def __init__(self, parent, start_offset, header=None): 41 """parent is anything that has a read method""" 42 self.parent = parent 43 self.start_offset = start_offset 44 self.size = 0 45 self.type = '' 46 self._valid = False 47 self._data = None 48 if header is None: 49 header = read_atom_header(self.read, -HEADER_SIZE) 50 if header: 51 self.size, self.type = header 52 self._valid = True 53 54 # --- Protected 55 def _get_data_model(self): 56 return self.cls_data_model 57 58 def _read_atom_data(self): 59 dm = '!' + self._get_data_model() 60 if '*s' in dm: 61 prevsize = struct.calcsize(dm.replace('*s', '')) 62 dm = dm.replace('*s', '%ds' % (self.content_size - prevsize), 1).replace('*s', '') 63 self._datasize = struct.calcsize(dm) 64 data = self.read(0, self._datasize) 65 if len(data) < self._datasize: 66 data = data.ljust(self._datasize) 67 return struct.unpack(dm, data) 68 69 # --- Public 70 def read(self, startat=0, readcount=-1): 71 if readcount < 0: 72 readcount = self.content_size 73 return self.parent.read(self.start_offset + HEADER_SIZE + startat, readcount) 74 75 # --- Properties 76 @property 77 def content_size(self): 78 return self.size - HEADER_SIZE 79 80 @property 81 def data(self): 82 if self._data is None: 83 self._data = self._read_atom_data() 84 return self._data 85 86 @property 87 def valid(self): 88 return self._valid 89 90 91class AtomBox(Atom): 92 def __init__(self, parent, start_offset, header=None): 93 Atom.__init__(self, parent, start_offset, header) 94 self._children = None 95 96 # --- Protected 97 def _read_children(self): 98 children = [] 99 self.data # pre-read data 100 # self.data[-1] is the data of the children 101 startat = self._datasize 102 while startat < self.content_size: 103 header = read_atom_header(self.read, startat) 104 if not header: 105 break 106 if header[0] == 0: # when size is zero, it takes the rest of the atom 107 header = (self.content_size - startat, header[1]) 108 if header[0] < HEADER_SIZE: # safeguard 109 header = (HEADER_SIZE, header[1]) 110 if is_valid_atom_type(header[1]): 111 subatom = self._get_atom_class(header[1])(self, startat, header) 112 children.append(subatom) 113 startat += header[0] 114 115 return tuple(children) 116 117 def _get_atom_class(self, type): 118 return ATOM_SPECS.get(type, Atom) 119 120 # --- Public 121 def find(self, atom_type): 122 gotta_find = atom_type[:4] 123 # You'd think that iterating through atoms is slow and that there should be a {type:atom} 124 # mapping, but the tests I've done on real data shows that doing so is in fact slower. 125 # I think this is because most atoms have only a few subatoms. 126 for atom in self.atoms: 127 if atom.type == gotta_find: 128 if len(atom_type) >= 9: 129 return atom.find(atom_type[5:]) 130 else: 131 return atom 132 133 # --- Properties 134 @property 135 def atoms(self): 136 if self._children is None: 137 self._children = self._read_children() 138 return self._children 139 140 141# Specific atoms ************************************************************* 142 143class AttributeAtom(AtomBox): 144 def _get_atom_class(self, type): 145 return AttributeDataAtom 146 147 @property 148 def attr_data(self): 149 try: 150 return self.atoms[0].attr_data 151 except IndexError: 152 # For some reason, our attribute atom has no data sub-atom, no biggie, just return nothing. 153 return '' 154 155 156class AttributeDataAtom(Atom): 157 def _get_data_model(self, integer_type='i'): 158 [data_type] = struct.unpack('!i', self.read(0, 4)) 159 return '2i' + (integer_type if data_type == 0 else '*s') 160 161 def _read_atom_data(self): 162 result = Atom._read_atom_data(self) 163 # Convert to unicode if needed 164 if isinstance(result[2], bytes): 165 result = list(result) 166 result[2] = result[2].decode('utf-8', 'ignore') 167 result = tuple(result) 168 return result 169 170 @property 171 def attr_data(self): 172 return self.data[2] 173 174 175class EsdsAtom(Atom): 176 cls_data_model = '26si' 177 178 @property 179 def bitrate(self): 180 return self.data[1] 181 182 183class GnreAtom(AttributeAtom): 184 def _get_atom_class(self, type): 185 return GnreDataAtom 186 187 188class GnreDataAtom(AttributeDataAtom): 189 def _get_data_model(self): 190 return AttributeDataAtom._get_data_model(self, 'H') 191 192 193class MetaAtom(AtomBox): 194 cls_data_model = 'i' 195 196 197class MdhdAtom(Atom): 198 def _get_data_model(self): 199 [version] = struct.unpack('B', self.read(0, 1)) 200 return '20s2i' if version > 0 else '12s2i' 201 202 @property 203 def sample_rate(self): 204 return self.data[1] 205 206 @property 207 def duration(self): 208 return self.data[2] 209 210 211class StsdAtom(AtomBox): 212 def _get_data_model(self): 213 [version] = struct.unpack('4s', self.read(12, 4)) 214 if version in (b'mp4v', b'avc1', b'encv', b's263'): 215 return'94s' 216 elif version in (b'mp4a', b'drms', b'enca', b'samr', b'sawb'): 217 return '44s' 218 else: 219 return '24s' 220 221 222ATOM_SPECS = { 223 '©nam': AttributeAtom, 224 '©ART': AttributeAtom, 225 '©wrt': AttributeAtom, 226 '©alb': AttributeAtom, 227 '©too': AttributeAtom, 228 '©day': AttributeAtom, 229 '©cmt': AttributeAtom, 230 '©gen': AttributeAtom, 231 'data': AttributeDataAtom, 232 'esds': EsdsAtom, 233 'gnre': GnreAtom, 234 'ilst': AtomBox, 235 'mdhd': MdhdAtom, 236 'mdia': AtomBox, 237 'meta': MetaAtom, 238 'minf': AtomBox, 239 'moov': AtomBox, 240 'stbl': AtomBox, 241 'stsd': StsdAtom, 242 'trak': AtomBox, 243 'trkn': AttributeAtom, 244 'udta': AtomBox, 245} 246 247# Mp4 File ********************************************************** 248 249 250class File(AtomBox): 251 '''The class used to handle MP4 (m4a, m4p) metadata. 252 253 :param infile: The file path to process. 254 ''' 255 def __init__(self, infile): 256 self._fp, self._shouldclose = open_if_filename(infile, 'rb') 257 self._fp.seek(0, 2) 258 AtomBox.__init__(self, None, 0, (self._fp.tell(), 'root')) 259 260 def _get_attr(self, path): 261 atom = self.find(path) 262 return atom.attr_data if atom else '' 263 264 def close(self): 265 if self._fp and self._shouldclose: 266 self._fp.close() 267 self._fp = None 268 269 def read(self, startat=0, readcount=-1): 270 if startat < 0: 271 startat = 0 272 self._fp.seek(startat) 273 return self._fp.read(readcount) 274 275 @property 276 def album(self): 277 '''The album on which the audio appears.''' 278 return self._get_attr('moov.udta.meta.ilst.©alb') 279 280 @property 281 def artist(self): 282 '''The artist associated with the audio.''' 283 return self._get_attr('moov.udta.meta.ilst.©ART') 284 285 @property 286 def audio_offset(self): 287 '''The offset, in bytes, at which audio data starts in the file.''' 288 atoms = [a for a in self.atoms if (a.size > 8) and (a.type == 'mdat')] 289 return atoms[0].start_offset if atoms else 0 290 291 @property 292 def audio_size(self): 293 '''The size of the audio part of the file in bytes.''' 294 atoms = [a for a in self.atoms if (a.size > 8) and (a.type == 'mdat')] 295 return atoms[0].size if atoms else 0 296 297 @property 298 def bitrate(self): 299 '''The bitrate of the audio file.''' 300 atom = self.find('moov.trak.mdia.minf.stbl.stsd.esds') 301 return atom.bitrate // 1000 if atom else 0 302 303 @property 304 def comment(self): 305 '''The comment in the audio file.''' 306 return self._get_attr('moov.udta.meta.ilst.©cmt') 307 308 @property 309 def duration(self): 310 '''The duration of the audio file (in whole seconds).''' 311 atom = self.find('moov.trak.mdia.mdhd') 312 return atom.duration // self.sample_rate if atom else 0 313 314 @property 315 def genre(self): 316 '''The genre associated with the audio.''' 317 data = self._get_attr('moov.udta.meta.ilst.gnre') 318 if not data: 319 data = self._get_attr('moov.udta.meta.ilst.©gen') 320 if isinstance(data, str): 321 return data 322 elif isinstance(data, int): 323 return genre_by_index(data - 1) 324 else: 325 return '' 326 327 @property 328 def sample_rate(self): 329 '''The sample rate of the audio file.''' 330 atom = self.find('moov.trak.mdia.mdhd') 331 return atom.sample_rate if atom else 0 332 333 @property 334 def title(self): 335 '''The title associated with the audio.''' 336 return self._get_attr('moov.udta.meta.ilst.©nam') 337 338 @property 339 def track(self): 340 '''The track number associated with the audio.''' 341 return tryint(self._get_attr('moov.udta.meta.ilst.trkn')) 342 343 @property 344 def valid(self): 345 '''Whether the file could correctly be read or not.''' 346 return self.find('mdat') is not None 347 348 @property 349 def year(self): 350 '''The year in which the audio was recorded.''' 351 return self._get_attr('moov.udta.meta.ilst.©day')[:4] 352