1# -*- coding: utf-8 -*-
2# enzyme - Video metadata parser
3# Copyright 2011-2012 Antoine Bertin <diaoulael@gmail.com>
4# Copyright 2003-2007 Thomas Schueppel <stain@acm.org>
5# Copyright 2003-2007 Dirk Meyer <dischi@freevo.org>
6#
7# This file is part of enzyme.
8#
9# enzyme is free software; you can redistribute it and/or modify it under
10# the terms of the GNU General Public License as published by
11# the Free Software Foundation; either version 3 of the License, or
12# (at your option) any later version.
13#
14# enzyme is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with enzyme.  If not, see <http://www.gnu.org/licenses/>.
21from __future__ import absolute_import
22__all__ = ['Parser']
23
24import zlib
25import logging
26import StringIO
27import struct
28from .exceptions import ParseError
29from . import core
30
31# get logging object
32log = logging.getLogger(__name__)
33
34
35# http://developer.apple.com/documentation/QuickTime/QTFF/index.html
36# http://developer.apple.com/documentation/QuickTime/QTFF/QTFFChap4/\
37#     chapter_5_section_2.html#//apple_ref/doc/uid/TP40000939-CH206-BBCBIICE
38# Note: May need to define custom log level to work like ATOM_DEBUG did here
39
40QTUDTA = {
41    'nam': 'title',
42    'aut': 'artist',
43    'cpy': 'copyright'
44}
45
46QTLANGUAGES = {
47    0: "en",
48    1: "fr",
49    2: "de",
50    3: "it",
51    4: "nl",
52    5: "sv",
53    6: "es",
54    7: "da",
55    8: "pt",
56    9: "no",
57    10: "he",
58    11: "ja",
59    12: "ar",
60    13: "fi",
61    14: "el",
62    15: "is",
63    16: "mt",
64    17: "tr",
65    18: "hr",
66    19: "Traditional Chinese",
67    20: "ur",
68    21: "hi",
69    22: "th",
70    23: "ko",
71    24: "lt",
72    25: "pl",
73    26: "hu",
74    27: "et",
75    28: "lv",
76    29: "Lappish",
77    30: "fo",
78    31: "Farsi",
79    32: "ru",
80    33: "Simplified Chinese",
81    34: "Flemish",
82    35: "ga",
83    36: "sq",
84    37: "ro",
85    38: "cs",
86    39: "sk",
87    40: "sl",
88    41: "yi",
89    42: "sr",
90    43: "mk",
91    44: "bg",
92    45: "uk",
93    46: "be",
94    47: "uz",
95    48: "kk",
96    49: "az",
97    50: "AzerbaijanAr",
98    51: "hy",
99    52: "ka",
100    53: "mo",
101    54: "ky",
102    55: "tg",
103    56: "tk",
104    57: "mn",
105    58: "MongolianCyr",
106    59: "ps",
107    60: "ku",
108    61: "ks",
109    62: "sd",
110    63: "bo",
111    64: "ne",
112    65: "sa",
113    66: "mr",
114    67: "bn",
115    68: "as",
116    69: "gu",
117    70: "pa",
118    71: "or",
119    72: "ml",
120    73: "kn",
121    74: "ta",
122    75: "te",
123    76: "si",
124    77: "my",
125    78: "Khmer",
126    79: "lo",
127    80: "vi",
128    81: "id",
129    82: "tl",
130    83: "MalayRoman",
131    84: "MalayArabic",
132    85: "am",
133    86: "ti",
134    87: "om",
135    88: "so",
136    89: "sw",
137    90: "Ruanda",
138    91: "Rundi",
139    92: "Chewa",
140    93: "mg",
141    94: "eo",
142    128: "cy",
143    129: "eu",
144    130: "ca",
145    131: "la",
146    132: "qu",
147    133: "gn",
148    134: "ay",
149    135: "tt",
150    136: "ug",
151    137: "Dzongkha",
152    138: "JavaneseRom",
153}
154
155class MPEG4(core.AVContainer):
156    """
157    Parser for the MP4 container format. This format is mostly
158    identical to Apple Quicktime and 3GP files. It maps to mp4, mov,
159    qt and some other extensions.
160    """
161    table_mapping = {'QTUDTA': QTUDTA}
162
163    def __init__(self, file):
164        core.AVContainer.__init__(self)
165        self._references = []
166
167        self.mime = 'video/quicktime'
168        self.type = 'Quicktime Video'
169        h = file.read(8)
170        try:
171            (size, type) = struct.unpack('>I4s', h)
172        except struct.error:
173            # EOF.
174            raise ParseError()
175
176        if type == 'ftyp':
177            # file type information
178            if size >= 12:
179                # this should always happen
180                if file.read(4) != 'qt  ':
181                    # not a quicktime movie, it is a mpeg4 container
182                    self.mime = 'video/mp4'
183                    self.type = 'MPEG-4 Video'
184                size -= 4
185            file.seek(size - 8, 1)
186            h = file.read(8)
187            (size, type) = struct.unpack('>I4s', h)
188
189        while type in ['mdat', 'skip']:
190            # movie data at the beginning, skip
191            file.seek(size - 8, 1)
192            h = file.read(8)
193            (size, type) = struct.unpack('>I4s', h)
194
195        if not type in ['moov', 'wide', 'free']:
196            log.debug('invalid header: %r' % type)
197            raise ParseError()
198
199        # Extended size
200        if size == 1:
201            size = struct.unpack('>Q', file.read(8))
202
203        # Back over the atom header we just read, since _readatom expects the
204        # file position to be at the start of an atom.
205        file.seek(-8, 1)
206        while self._readatom(file):
207            pass
208
209        if self._references:
210            self._set('references', self._references)
211
212
213    def _readatom(self, file):
214        s = file.read(8)
215        if len(s) < 8:
216            return 0
217
218        atomsize, atomtype = struct.unpack('>I4s', s)
219        if not str(atomtype).decode('latin1').isalnum():
220            # stop at nonsense data
221            return 0
222
223        log.debug('%r [%X]' % (atomtype, atomsize))
224
225        if atomtype == 'udta':
226            # Userdata (Metadata)
227            pos = 0
228            tabl = {}
229            i18ntabl = {}
230            atomdata = file.read(atomsize - 8)
231            while pos < atomsize - 12:
232                (datasize, datatype) = struct.unpack('>I4s', atomdata[pos:pos + 8])
233                if ord(datatype[0]) == 169:
234                    # i18n Metadata...
235                    mypos = 8 + pos
236                    while mypos + 4 < datasize + pos:
237                        # first 4 Bytes are i18n header
238                        (tlen, lang) = struct.unpack('>HH', atomdata[mypos:mypos + 4])
239                        i18ntabl[lang] = i18ntabl.get(lang, {})
240                        l = atomdata[mypos + 4:mypos + tlen + 4]
241                        i18ntabl[lang][datatype[1:]] = l
242                        mypos += tlen + 4
243                elif datatype == 'WLOC':
244                    # Drop Window Location
245                    pass
246                else:
247                    if ord(atomdata[pos + 8:pos + datasize][0]) > 1:
248                        tabl[datatype] = atomdata[pos + 8:pos + datasize]
249                pos += datasize
250            if len(i18ntabl.keys()) > 0:
251                for k in i18ntabl.keys():
252                    if k in QTLANGUAGES and QTLANGUAGES[k] == 'en':
253                        self._appendtable('QTUDTA', i18ntabl[k])
254                        self._appendtable('QTUDTA', tabl)
255            else:
256                log.debug('NO i18')
257                self._appendtable('QTUDTA', tabl)
258
259        elif atomtype == 'trak':
260            atomdata = file.read(atomsize - 8)
261            pos = 0
262            trackinfo = {}
263            tracktype = None
264            while pos < atomsize - 8:
265                (datasize, datatype) = struct.unpack('>I4s', atomdata[pos:pos + 8])
266
267                if datatype == 'tkhd':
268                    tkhd = struct.unpack('>6I8x4H36xII', atomdata[pos + 8:pos + datasize])
269                    trackinfo['width'] = tkhd[10] >> 16
270                    trackinfo['height'] = tkhd[11] >> 16
271                    trackinfo['id'] = tkhd[3]
272
273                    try:
274                        # XXX Timestamp of Seconds is since January 1st 1904!
275                        # XXX 2082844800 is the difference between Unix and
276                        # XXX Apple time. FIXME to work on Apple, too
277                        self.timestamp = int(tkhd[1]) - 2082844800
278                    except Exception as e:
279                        log.exception('There was trouble extracting timestamp')
280
281                elif datatype == 'mdia':
282                    pos += 8
283                    datasize -= 8
284                    log.debug('--> mdia information')
285
286                    while datasize:
287                        mdia = struct.unpack('>I4s', atomdata[pos:pos + 8])
288                        if mdia[1] == 'mdhd':
289                            # Parse based on version of mdhd header.  See
290                            # http://wiki.multimedia.cx/index.php?title=QuickTime_container#mdhd
291                            ver = ord(atomdata[pos + 8])
292                            if ver == 0:
293                                mdhd = struct.unpack('>IIIIIhh', atomdata[pos + 8:pos + 8 + 24])
294                            elif ver == 1:
295                                mdhd = struct.unpack('>IQQIQhh', atomdata[pos + 8:pos + 8 + 36])
296                            else:
297                                mdhd = None
298
299                            if mdhd:
300                                # duration / time scale
301                                trackinfo['length'] = mdhd[4] / mdhd[3]
302                                if mdhd[5] in QTLANGUAGES:
303                                    trackinfo['language'] = QTLANGUAGES[mdhd[5]]
304                                # mdhd[6] == quality
305                                self.length = max(self.length, mdhd[4] / mdhd[3])
306                        elif mdia[1] == 'minf':
307                            # minf has only atoms inside
308                            pos -= (mdia[0] - 8)
309                            datasize += (mdia[0] - 8)
310                        elif mdia[1] == 'stbl':
311                            # stbl has only atoms inside
312                            pos -= (mdia[0] - 8)
313                            datasize += (mdia[0] - 8)
314                        elif mdia[1] == 'hdlr':
315                            hdlr = struct.unpack('>I4s4s', atomdata[pos + 8:pos + 8 + 12])
316                            if hdlr[1] == 'mhlr':
317                                if hdlr[2] == 'vide':
318                                    tracktype = 'video'
319                                if hdlr[2] == 'soun':
320                                    tracktype = 'audio'
321                        elif mdia[1] == 'stsd':
322                            stsd = struct.unpack('>2I', atomdata[pos + 8:pos + 8 + 8])
323                            if stsd[1] > 0:
324                                codec = atomdata[pos + 16:pos + 16 + 8]
325                                codec = struct.unpack('>I4s', codec)
326                                trackinfo['codec'] = codec[1]
327                                if codec[1] == 'jpeg':
328                                    tracktype = 'image'
329                        elif mdia[1] == 'dinf':
330                            dref = struct.unpack('>I4s', atomdata[pos + 8:pos + 8 + 8])
331                            log.debug('  --> %r, %r (useless)' % mdia)
332                            if dref[1] == 'dref':
333                                num = struct.unpack('>I', atomdata[pos + 20:pos + 20 + 4])[0]
334                                rpos = pos + 20 + 4
335                                for ref in range(num):
336                                    # FIXME: do somthing if this references
337                                    ref = struct.unpack('>I3s', atomdata[rpos:rpos + 7])
338                                    data = atomdata[rpos + 7:rpos + ref[0]]
339                                    rpos += ref[0]
340                        else:
341                            if mdia[1].startswith('st'):
342                                log.debug('  --> %r, %r (sample)' % mdia)
343                            elif mdia[1] == 'vmhd' and not tracktype:
344                                # indicates that this track is video
345                                tracktype = 'video'
346                            elif mdia[1] in ['vmhd', 'smhd'] and not tracktype:
347                                # indicates that this track is audio
348                                tracktype = 'audio'
349                            else:
350                                log.debug('  --> %r, %r (unknown)' % mdia)
351
352                        pos += mdia[0]
353                        datasize -= mdia[0]
354
355                elif datatype == 'udta':
356                    log.debug('udta: %r' % struct.unpack('>I4s', atomdata[:8]))
357                else:
358                    if datatype == 'edts':
359                        log.debug('--> %r [%d] (edit list)' % \
360                                  (datatype, datasize))
361                    else:
362                        log.debug('--> %r [%d] (unknown)' % \
363                                  (datatype, datasize))
364                pos += datasize
365
366            info = None
367            if tracktype == 'video':
368                info = core.VideoStream()
369                self.video.append(info)
370            if tracktype == 'audio':
371                info = core.AudioStream()
372                self.audio.append(info)
373            if info:
374                for key, value in trackinfo.items():
375                    setattr(info, key, value)
376
377        elif atomtype == 'mvhd':
378            # movie header
379            mvhd = struct.unpack('>6I2h', file.read(28))
380            self.length = max(self.length, mvhd[4] / mvhd[3])
381            self.volume = mvhd[6]
382            file.seek(atomsize - 8 - 28, 1)
383
384
385        elif atomtype == 'cmov':
386            # compressed movie
387            datasize, atomtype = struct.unpack('>I4s', file.read(8))
388            if not atomtype == 'dcom':
389                return atomsize
390
391            method = struct.unpack('>4s', file.read(datasize - 8))[0]
392
393            datasize, atomtype = struct.unpack('>I4s', file.read(8))
394            if not atomtype == 'cmvd':
395                return atomsize
396
397            if method == 'zlib':
398                data = file.read(datasize - 8)
399                try:
400                    decompressed = zlib.decompress(data)
401                except Exception as e:
402                    try:
403                        decompressed = zlib.decompress(data[4:])
404                    except Exception as e:
405                        log.exception('There was a proble decompressiong atom')
406                        return atomsize
407
408                decompressedIO = StringIO.StringIO(decompressed)
409                while self._readatom(decompressedIO):
410                    pass
411
412            else:
413                log.info('unknown compression %r' % method)
414                # unknown compression method
415                file.seek(datasize - 8, 1)
416
417        elif atomtype == 'moov':
418            # decompressed movie info
419            while self._readatom(file):
420                pass
421
422        elif atomtype == 'mdat':
423            pos = file.tell() + atomsize - 8
424            # maybe there is data inside the mdat
425            log.info('parsing mdat')
426            while self._readatom(file):
427                pass
428            log.info('end of mdat')
429            file.seek(pos, 0)
430
431
432        elif atomtype == 'rmra':
433            # reference list
434            while self._readatom(file):
435                pass
436
437        elif atomtype == 'rmda':
438            # reference
439            atomdata = file.read(atomsize - 8)
440            pos = 0
441            url = ''
442            quality = 0
443            datarate = 0
444            while pos < atomsize - 8:
445                (datasize, datatype) = struct.unpack('>I4s', atomdata[pos:pos + 8])
446                if datatype == 'rdrf':
447                    rflags, rtype, rlen = struct.unpack('>I4sI', atomdata[pos + 8:pos + 20])
448                    if rtype == 'url ':
449                        url = atomdata[pos + 20:pos + 20 + rlen]
450                        if url.find('\0') > 0:
451                            url = url[:url.find('\0')]
452                elif datatype == 'rmqu':
453                    quality = struct.unpack('>I', atomdata[pos + 8:pos + 12])[0]
454
455                elif datatype == 'rmdr':
456                    datarate = struct.unpack('>I', atomdata[pos + 12:pos + 16])[0]
457
458                pos += datasize
459            if url:
460                self._references.append((url, quality, datarate))
461
462        else:
463            if not atomtype in ['wide', 'free']:
464                log.info('unhandled base atom %r' % atomtype)
465
466            # Skip unknown atoms
467            try:
468                file.seek(atomsize - 8, 1)
469            except IOError:
470                return 0
471
472        return atomsize
473
474
475Parser = MPEG4
476