1# Created By: Virgil Dupras
2# Created On: 2005/07/27
3# Copyright 2010 Hardcoded Software (http://www.hardcoded.net)
4
5# This software is licensed under the "BSD" License as described in the "LICENSE" file,
6# which should be included with this package. The terms are also available at
7# http://www.hardcoded.net/licenses/bsd_license
8
9import re
10import struct
11
12from .util import open_if_filename, tryint
13from .genres import genre_by_index
14
15HEADER_SIZE = 8
16
17re_atom_type = re.compile(r'[A-Za-z0-9\-©]{4}')
18
19
20def read_atom_header(readfunc, offset):
21    header = readfunc(offset, HEADER_SIZE)
22    if len(header) == HEADER_SIZE:
23        size, byte_type = struct.unpack('!i4s', header)
24        str_type = str(byte_type, 'latin-1')
25        return (size, str_type)
26    else:
27        return ()
28
29
30def is_valid_atom_type(atom_type):
31    return re_atom_type.match(atom_type)
32    return True
33
34# Base atom classes *****************************************
35
36
37class Atom:
38    cls_data_model = ''
39
40    def __init__(self, parent, start_offset, header=None):
41        """parent is anything that has a read method"""
42        self.parent = parent
43        self.start_offset = start_offset
44        self.size = 0
45        self.type = ''
46        self._valid = False
47        self._data = None
48        if header is None:
49            header = read_atom_header(self.read, -HEADER_SIZE)
50        if header:
51            self.size, self.type = header
52            self._valid = True
53
54    # --- Protected
55    def _get_data_model(self):
56        return self.cls_data_model
57
58    def _read_atom_data(self):
59        dm = '!' + self._get_data_model()
60        if '*s' in dm:
61            prevsize = struct.calcsize(dm.replace('*s', ''))
62            dm = dm.replace('*s', '%ds' % (self.content_size - prevsize), 1).replace('*s', '')
63        self._datasize = struct.calcsize(dm)
64        data = self.read(0, self._datasize)
65        if len(data) < self._datasize:
66            data = data.ljust(self._datasize)
67        return struct.unpack(dm, data)
68
69    # --- Public
70    def read(self, startat=0, readcount=-1):
71        if readcount < 0:
72            readcount = self.content_size
73        return self.parent.read(self.start_offset + HEADER_SIZE + startat, readcount)
74
75    # --- Properties
76    @property
77    def content_size(self):
78        return self.size - HEADER_SIZE
79
80    @property
81    def data(self):
82        if self._data is None:
83            self._data = self._read_atom_data()
84        return self._data
85
86    @property
87    def valid(self):
88        return self._valid
89
90
91class AtomBox(Atom):
92    def __init__(self, parent, start_offset, header=None):
93        Atom.__init__(self, parent, start_offset, header)
94        self._children = None
95
96    # --- Protected
97    def _read_children(self):
98        children = []
99        self.data  # pre-read data
100        # self.data[-1] is the data of the children
101        startat = self._datasize
102        while startat < self.content_size:
103            header = read_atom_header(self.read, startat)
104            if not header:
105                break
106            if header[0] == 0:  # when size is zero, it takes the rest of the atom
107                header = (self.content_size - startat, header[1])
108            if header[0] < HEADER_SIZE:  # safeguard
109                header = (HEADER_SIZE, header[1])
110            if is_valid_atom_type(header[1]):
111                subatom = self._get_atom_class(header[1])(self, startat, header)
112                children.append(subatom)
113            startat += header[0]
114
115        return tuple(children)
116
117    def _get_atom_class(self, type):
118        return ATOM_SPECS.get(type, Atom)
119
120    # --- Public
121    def find(self, atom_type):
122        gotta_find = atom_type[:4]
123        # You'd think that iterating through atoms is slow and that there should be a {type:atom}
124        # mapping, but the tests I've done on real data shows that doing so is in fact slower.
125        # I think this is because most atoms have only a few subatoms.
126        for atom in self.atoms:
127            if atom.type == gotta_find:
128                if len(atom_type) >= 9:
129                    return atom.find(atom_type[5:])
130                else:
131                    return atom
132
133    # --- Properties
134    @property
135    def atoms(self):
136        if self._children is None:
137            self._children = self._read_children()
138        return self._children
139
140
141# Specific atoms *************************************************************
142
143class AttributeAtom(AtomBox):
144    def _get_atom_class(self, type):
145        return AttributeDataAtom
146
147    @property
148    def attr_data(self):
149        try:
150            return self.atoms[0].attr_data
151        except IndexError:
152            # For some reason, our attribute atom has no data sub-atom, no biggie, just return nothing.
153            return ''
154
155
156class AttributeDataAtom(Atom):
157    def _get_data_model(self, integer_type='i'):
158        [data_type] = struct.unpack('!i', self.read(0, 4))
159        return '2i' + (integer_type if data_type == 0 else '*s')
160
161    def _read_atom_data(self):
162        result = Atom._read_atom_data(self)
163        # Convert to unicode if needed
164        if isinstance(result[2], bytes):
165            result = list(result)
166            result[2] = result[2].decode('utf-8', 'ignore')
167            result = tuple(result)
168        return result
169
170    @property
171    def attr_data(self):
172        return self.data[2]
173
174
175class EsdsAtom(Atom):
176    cls_data_model = '26si'
177
178    @property
179    def bitrate(self):
180        return self.data[1]
181
182
183class GnreAtom(AttributeAtom):
184    def _get_atom_class(self, type):
185        return GnreDataAtom
186
187
188class GnreDataAtom(AttributeDataAtom):
189    def _get_data_model(self):
190        return AttributeDataAtom._get_data_model(self, 'H')
191
192
193class MetaAtom(AtomBox):
194    cls_data_model = 'i'
195
196
197class MdhdAtom(Atom):
198    def _get_data_model(self):
199        [version] = struct.unpack('B', self.read(0, 1))
200        return '20s2i' if version > 0 else '12s2i'
201
202    @property
203    def sample_rate(self):
204        return self.data[1]
205
206    @property
207    def duration(self):
208        return self.data[2]
209
210
211class StsdAtom(AtomBox):
212    def _get_data_model(self):
213        [version] = struct.unpack('4s', self.read(12, 4))
214        if version in (b'mp4v', b'avc1', b'encv', b's263'):
215            return'94s'
216        elif version in (b'mp4a', b'drms', b'enca', b'samr', b'sawb'):
217            return '44s'
218        else:
219            return '24s'
220
221
222ATOM_SPECS = {
223    '©nam': AttributeAtom,
224    '©ART': AttributeAtom,
225    '©wrt': AttributeAtom,
226    '©alb': AttributeAtom,
227    '©too': AttributeAtom,
228    '©day': AttributeAtom,
229    '©cmt': AttributeAtom,
230    '©gen': AttributeAtom,
231    'data': AttributeDataAtom,
232    'esds': EsdsAtom,
233    'gnre': GnreAtom,
234    'ilst': AtomBox,
235    'mdhd': MdhdAtom,
236    'mdia': AtomBox,
237    'meta': MetaAtom,
238    'minf': AtomBox,
239    'moov': AtomBox,
240    'stbl': AtomBox,
241    'stsd': StsdAtom,
242    'trak': AtomBox,
243    'trkn': AttributeAtom,
244    'udta': AtomBox,
245}
246
247# Mp4 File **********************************************************
248
249
250class File(AtomBox):
251    '''The class used to handle MP4 (m4a, m4p) metadata.
252
253    :param infile: The file path to process.
254    '''
255    def __init__(self, infile):
256        self._fp, self._shouldclose = open_if_filename(infile, 'rb')
257        self._fp.seek(0, 2)
258        AtomBox.__init__(self, None, 0, (self._fp.tell(), 'root'))
259
260    def _get_attr(self, path):
261        atom = self.find(path)
262        return atom.attr_data if atom else ''
263
264    def close(self):
265        if self._fp and self._shouldclose:
266            self._fp.close()
267            self._fp = None
268
269    def read(self, startat=0, readcount=-1):
270        if startat < 0:
271            startat = 0
272        self._fp.seek(startat)
273        return self._fp.read(readcount)
274
275    @property
276    def album(self):
277        '''The album on which the audio appears.'''
278        return self._get_attr('moov.udta.meta.ilst.©alb')
279
280    @property
281    def artist(self):
282        '''The artist associated with the audio.'''
283        return self._get_attr('moov.udta.meta.ilst.©ART')
284
285    @property
286    def audio_offset(self):
287        '''The offset, in bytes, at which audio data starts in the file.'''
288        atoms = [a for a in self.atoms if (a.size > 8) and (a.type == 'mdat')]
289        return atoms[0].start_offset if atoms else 0
290
291    @property
292    def audio_size(self):
293        '''The size of the audio part of the file in bytes.'''
294        atoms = [a for a in self.atoms if (a.size > 8) and (a.type == 'mdat')]
295        return atoms[0].size if atoms else 0
296
297    @property
298    def bitrate(self):
299        '''The bitrate of the audio file.'''
300        atom = self.find('moov.trak.mdia.minf.stbl.stsd.esds')
301        return atom.bitrate // 1000 if atom else 0
302
303    @property
304    def comment(self):
305        '''The comment in the audio file.'''
306        return self._get_attr('moov.udta.meta.ilst.©cmt')
307
308    @property
309    def duration(self):
310        '''The duration of the audio file (in whole seconds).'''
311        atom = self.find('moov.trak.mdia.mdhd')
312        return atom.duration // self.sample_rate if atom else 0
313
314    @property
315    def genre(self):
316        '''The genre associated with the audio.'''
317        data = self._get_attr('moov.udta.meta.ilst.gnre')
318        if not data:
319            data = self._get_attr('moov.udta.meta.ilst.©gen')
320        if isinstance(data, str):
321            return data
322        elif isinstance(data, int):
323            return genre_by_index(data - 1)
324        else:
325            return ''
326
327    @property
328    def sample_rate(self):
329        '''The sample rate of the audio file.'''
330        atom = self.find('moov.trak.mdia.mdhd')
331        return atom.sample_rate if atom else 0
332
333    @property
334    def title(self):
335        '''The title associated with the audio.'''
336        return self._get_attr('moov.udta.meta.ilst.©nam')
337
338    @property
339    def track(self):
340        '''The track number associated with the audio.'''
341        return tryint(self._get_attr('moov.udta.meta.ilst.trkn'))
342
343    @property
344    def valid(self):
345        '''Whether the file could correctly be read or not.'''
346        return self.find('mdat') is not None
347
348    @property
349    def year(self):
350        '''The year in which the audio was recorded.'''
351        return self._get_attr('moov.udta.meta.ilst.©day')[:4]
352