1# -*- coding: utf-8 -*- 2# enzyme - Video metadata parser 3# Copyright 2011-2012 Antoine Bertin <diaoulael@gmail.com> 4# Copyright 2003-2007 Thomas Schueppel <stain@acm.org> 5# Copyright 2003-2007 Dirk Meyer <dischi@freevo.org> 6# 7# This file is part of enzyme. 8# 9# enzyme is free software; you can redistribute it and/or modify it under 10# the terms of the GNU General Public License as published by 11# the Free Software Foundation; either version 3 of the License, or 12# (at your option) any later version. 13# 14# enzyme is distributed in the hope that it will be useful, 15# but WITHOUT ANY WARRANTY; without even the implied warranty of 16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17# GNU General Public License for more details. 18# 19# You should have received a copy of the GNU General Public License 20# along with enzyme. If not, see <http://www.gnu.org/licenses/>. 21from __future__ import absolute_import 22__all__ = ['Parser'] 23 24import zlib 25import logging 26import StringIO 27import struct 28from .exceptions import ParseError 29from . import core 30 31# get logging object 32log = logging.getLogger(__name__) 33 34 35# http://developer.apple.com/documentation/QuickTime/QTFF/index.html 36# http://developer.apple.com/documentation/QuickTime/QTFF/QTFFChap4/\ 37# chapter_5_section_2.html#//apple_ref/doc/uid/TP40000939-CH206-BBCBIICE 38# Note: May need to define custom log level to work like ATOM_DEBUG did here 39 40QTUDTA = { 41 'nam': 'title', 42 'aut': 'artist', 43 'cpy': 'copyright' 44} 45 46QTLANGUAGES = { 47 0: "en", 48 1: "fr", 49 2: "de", 50 3: "it", 51 4: "nl", 52 5: "sv", 53 6: "es", 54 7: "da", 55 8: "pt", 56 9: "no", 57 10: "he", 58 11: "ja", 59 12: "ar", 60 13: "fi", 61 14: "el", 62 15: "is", 63 16: "mt", 64 17: "tr", 65 18: "hr", 66 19: "Traditional Chinese", 67 20: "ur", 68 21: "hi", 69 22: "th", 70 23: "ko", 71 24: "lt", 72 25: "pl", 73 26: "hu", 74 27: "et", 75 28: "lv", 76 29: "Lappish", 77 30: "fo", 78 31: "Farsi", 79 32: "ru", 80 33: "Simplified Chinese", 81 34: "Flemish", 82 35: "ga", 83 36: "sq", 84 37: "ro", 85 38: "cs", 86 39: "sk", 87 40: "sl", 88 41: "yi", 89 42: "sr", 90 43: "mk", 91 44: "bg", 92 45: "uk", 93 46: "be", 94 47: "uz", 95 48: "kk", 96 49: "az", 97 50: "AzerbaijanAr", 98 51: "hy", 99 52: "ka", 100 53: "mo", 101 54: "ky", 102 55: "tg", 103 56: "tk", 104 57: "mn", 105 58: "MongolianCyr", 106 59: "ps", 107 60: "ku", 108 61: "ks", 109 62: "sd", 110 63: "bo", 111 64: "ne", 112 65: "sa", 113 66: "mr", 114 67: "bn", 115 68: "as", 116 69: "gu", 117 70: "pa", 118 71: "or", 119 72: "ml", 120 73: "kn", 121 74: "ta", 122 75: "te", 123 76: "si", 124 77: "my", 125 78: "Khmer", 126 79: "lo", 127 80: "vi", 128 81: "id", 129 82: "tl", 130 83: "MalayRoman", 131 84: "MalayArabic", 132 85: "am", 133 86: "ti", 134 87: "om", 135 88: "so", 136 89: "sw", 137 90: "Ruanda", 138 91: "Rundi", 139 92: "Chewa", 140 93: "mg", 141 94: "eo", 142 128: "cy", 143 129: "eu", 144 130: "ca", 145 131: "la", 146 132: "qu", 147 133: "gn", 148 134: "ay", 149 135: "tt", 150 136: "ug", 151 137: "Dzongkha", 152 138: "JavaneseRom", 153} 154 155class MPEG4(core.AVContainer): 156 """ 157 Parser for the MP4 container format. This format is mostly 158 identical to Apple Quicktime and 3GP files. It maps to mp4, mov, 159 qt and some other extensions. 160 """ 161 table_mapping = {'QTUDTA': QTUDTA} 162 163 def __init__(self, file): 164 core.AVContainer.__init__(self) 165 self._references = [] 166 167 self.mime = 'video/quicktime' 168 self.type = 'Quicktime Video' 169 h = file.read(8) 170 try: 171 (size, type) = struct.unpack('>I4s', h) 172 except struct.error: 173 # EOF. 174 raise ParseError() 175 176 if type == 'ftyp': 177 # file type information 178 if size >= 12: 179 # this should always happen 180 if file.read(4) != 'qt ': 181 # not a quicktime movie, it is a mpeg4 container 182 self.mime = 'video/mp4' 183 self.type = 'MPEG-4 Video' 184 size -= 4 185 file.seek(size - 8, 1) 186 h = file.read(8) 187 (size, type) = struct.unpack('>I4s', h) 188 189 while type in ['mdat', 'skip']: 190 # movie data at the beginning, skip 191 file.seek(size - 8, 1) 192 h = file.read(8) 193 (size, type) = struct.unpack('>I4s', h) 194 195 if not type in ['moov', 'wide', 'free']: 196 log.debug('invalid header: %r' % type) 197 raise ParseError() 198 199 # Extended size 200 if size == 1: 201 size = struct.unpack('>Q', file.read(8)) 202 203 # Back over the atom header we just read, since _readatom expects the 204 # file position to be at the start of an atom. 205 file.seek(-8, 1) 206 while self._readatom(file): 207 pass 208 209 if self._references: 210 self._set('references', self._references) 211 212 213 def _readatom(self, file): 214 s = file.read(8) 215 if len(s) < 8: 216 return 0 217 218 atomsize, atomtype = struct.unpack('>I4s', s) 219 if not str(atomtype).decode('latin1').isalnum(): 220 # stop at nonsense data 221 return 0 222 223 log.debug('%r [%X]' % (atomtype, atomsize)) 224 225 if atomtype == 'udta': 226 # Userdata (Metadata) 227 pos = 0 228 tabl = {} 229 i18ntabl = {} 230 atomdata = file.read(atomsize - 8) 231 while pos < atomsize - 12: 232 (datasize, datatype) = struct.unpack('>I4s', atomdata[pos:pos + 8]) 233 if ord(datatype[0]) == 169: 234 # i18n Metadata... 235 mypos = 8 + pos 236 while mypos + 4 < datasize + pos: 237 # first 4 Bytes are i18n header 238 (tlen, lang) = struct.unpack('>HH', atomdata[mypos:mypos + 4]) 239 i18ntabl[lang] = i18ntabl.get(lang, {}) 240 l = atomdata[mypos + 4:mypos + tlen + 4] 241 i18ntabl[lang][datatype[1:]] = l 242 mypos += tlen + 4 243 elif datatype == 'WLOC': 244 # Drop Window Location 245 pass 246 else: 247 if ord(atomdata[pos + 8:pos + datasize][0]) > 1: 248 tabl[datatype] = atomdata[pos + 8:pos + datasize] 249 pos += datasize 250 if len(i18ntabl.keys()) > 0: 251 for k in i18ntabl.keys(): 252 if k in QTLANGUAGES and QTLANGUAGES[k] == 'en': 253 self._appendtable('QTUDTA', i18ntabl[k]) 254 self._appendtable('QTUDTA', tabl) 255 else: 256 log.debug('NO i18') 257 self._appendtable('QTUDTA', tabl) 258 259 elif atomtype == 'trak': 260 atomdata = file.read(atomsize - 8) 261 pos = 0 262 trackinfo = {} 263 tracktype = None 264 while pos < atomsize - 8: 265 (datasize, datatype) = struct.unpack('>I4s', atomdata[pos:pos + 8]) 266 267 if datatype == 'tkhd': 268 tkhd = struct.unpack('>6I8x4H36xII', atomdata[pos + 8:pos + datasize]) 269 trackinfo['width'] = tkhd[10] >> 16 270 trackinfo['height'] = tkhd[11] >> 16 271 trackinfo['id'] = tkhd[3] 272 273 try: 274 # XXX Timestamp of Seconds is since January 1st 1904! 275 # XXX 2082844800 is the difference between Unix and 276 # XXX Apple time. FIXME to work on Apple, too 277 self.timestamp = int(tkhd[1]) - 2082844800 278 except Exception as e: 279 log.exception('There was trouble extracting timestamp') 280 281 elif datatype == 'mdia': 282 pos += 8 283 datasize -= 8 284 log.debug('--> mdia information') 285 286 while datasize: 287 mdia = struct.unpack('>I4s', atomdata[pos:pos + 8]) 288 if mdia[1] == 'mdhd': 289 # Parse based on version of mdhd header. See 290 # http://wiki.multimedia.cx/index.php?title=QuickTime_container#mdhd 291 ver = ord(atomdata[pos + 8]) 292 if ver == 0: 293 mdhd = struct.unpack('>IIIIIhh', atomdata[pos + 8:pos + 8 + 24]) 294 elif ver == 1: 295 mdhd = struct.unpack('>IQQIQhh', atomdata[pos + 8:pos + 8 + 36]) 296 else: 297 mdhd = None 298 299 if mdhd: 300 # duration / time scale 301 trackinfo['length'] = mdhd[4] / mdhd[3] 302 if mdhd[5] in QTLANGUAGES: 303 trackinfo['language'] = QTLANGUAGES[mdhd[5]] 304 # mdhd[6] == quality 305 self.length = max(self.length, mdhd[4] / mdhd[3]) 306 elif mdia[1] == 'minf': 307 # minf has only atoms inside 308 pos -= (mdia[0] - 8) 309 datasize += (mdia[0] - 8) 310 elif mdia[1] == 'stbl': 311 # stbl has only atoms inside 312 pos -= (mdia[0] - 8) 313 datasize += (mdia[0] - 8) 314 elif mdia[1] == 'hdlr': 315 hdlr = struct.unpack('>I4s4s', atomdata[pos + 8:pos + 8 + 12]) 316 if hdlr[1] == 'mhlr': 317 if hdlr[2] == 'vide': 318 tracktype = 'video' 319 if hdlr[2] == 'soun': 320 tracktype = 'audio' 321 elif mdia[1] == 'stsd': 322 stsd = struct.unpack('>2I', atomdata[pos + 8:pos + 8 + 8]) 323 if stsd[1] > 0: 324 codec = atomdata[pos + 16:pos + 16 + 8] 325 codec = struct.unpack('>I4s', codec) 326 trackinfo['codec'] = codec[1] 327 if codec[1] == 'jpeg': 328 tracktype = 'image' 329 elif mdia[1] == 'dinf': 330 dref = struct.unpack('>I4s', atomdata[pos + 8:pos + 8 + 8]) 331 log.debug(' --> %r, %r (useless)' % mdia) 332 if dref[1] == 'dref': 333 num = struct.unpack('>I', atomdata[pos + 20:pos + 20 + 4])[0] 334 rpos = pos + 20 + 4 335 for ref in range(num): 336 # FIXME: do somthing if this references 337 ref = struct.unpack('>I3s', atomdata[rpos:rpos + 7]) 338 data = atomdata[rpos + 7:rpos + ref[0]] 339 rpos += ref[0] 340 else: 341 if mdia[1].startswith('st'): 342 log.debug(' --> %r, %r (sample)' % mdia) 343 elif mdia[1] == 'vmhd' and not tracktype: 344 # indicates that this track is video 345 tracktype = 'video' 346 elif mdia[1] in ['vmhd', 'smhd'] and not tracktype: 347 # indicates that this track is audio 348 tracktype = 'audio' 349 else: 350 log.debug(' --> %r, %r (unknown)' % mdia) 351 352 pos += mdia[0] 353 datasize -= mdia[0] 354 355 elif datatype == 'udta': 356 log.debug('udta: %r' % struct.unpack('>I4s', atomdata[:8])) 357 else: 358 if datatype == 'edts': 359 log.debug('--> %r [%d] (edit list)' % \ 360 (datatype, datasize)) 361 else: 362 log.debug('--> %r [%d] (unknown)' % \ 363 (datatype, datasize)) 364 pos += datasize 365 366 info = None 367 if tracktype == 'video': 368 info = core.VideoStream() 369 self.video.append(info) 370 if tracktype == 'audio': 371 info = core.AudioStream() 372 self.audio.append(info) 373 if info: 374 for key, value in trackinfo.items(): 375 setattr(info, key, value) 376 377 elif atomtype == 'mvhd': 378 # movie header 379 mvhd = struct.unpack('>6I2h', file.read(28)) 380 self.length = max(self.length, mvhd[4] / mvhd[3]) 381 self.volume = mvhd[6] 382 file.seek(atomsize - 8 - 28, 1) 383 384 385 elif atomtype == 'cmov': 386 # compressed movie 387 datasize, atomtype = struct.unpack('>I4s', file.read(8)) 388 if not atomtype == 'dcom': 389 return atomsize 390 391 method = struct.unpack('>4s', file.read(datasize - 8))[0] 392 393 datasize, atomtype = struct.unpack('>I4s', file.read(8)) 394 if not atomtype == 'cmvd': 395 return atomsize 396 397 if method == 'zlib': 398 data = file.read(datasize - 8) 399 try: 400 decompressed = zlib.decompress(data) 401 except Exception as e: 402 try: 403 decompressed = zlib.decompress(data[4:]) 404 except Exception as e: 405 log.exception('There was a proble decompressiong atom') 406 return atomsize 407 408 decompressedIO = StringIO.StringIO(decompressed) 409 while self._readatom(decompressedIO): 410 pass 411 412 else: 413 log.info('unknown compression %r' % method) 414 # unknown compression method 415 file.seek(datasize - 8, 1) 416 417 elif atomtype == 'moov': 418 # decompressed movie info 419 while self._readatom(file): 420 pass 421 422 elif atomtype == 'mdat': 423 pos = file.tell() + atomsize - 8 424 # maybe there is data inside the mdat 425 log.info('parsing mdat') 426 while self._readatom(file): 427 pass 428 log.info('end of mdat') 429 file.seek(pos, 0) 430 431 432 elif atomtype == 'rmra': 433 # reference list 434 while self._readatom(file): 435 pass 436 437 elif atomtype == 'rmda': 438 # reference 439 atomdata = file.read(atomsize - 8) 440 pos = 0 441 url = '' 442 quality = 0 443 datarate = 0 444 while pos < atomsize - 8: 445 (datasize, datatype) = struct.unpack('>I4s', atomdata[pos:pos + 8]) 446 if datatype == 'rdrf': 447 rflags, rtype, rlen = struct.unpack('>I4sI', atomdata[pos + 8:pos + 20]) 448 if rtype == 'url ': 449 url = atomdata[pos + 20:pos + 20 + rlen] 450 if url.find('\0') > 0: 451 url = url[:url.find('\0')] 452 elif datatype == 'rmqu': 453 quality = struct.unpack('>I', atomdata[pos + 8:pos + 12])[0] 454 455 elif datatype == 'rmdr': 456 datarate = struct.unpack('>I', atomdata[pos + 12:pos + 16])[0] 457 458 pos += datasize 459 if url: 460 self._references.append((url, quality, datarate)) 461 462 else: 463 if not atomtype in ['wide', 'free']: 464 log.info('unhandled base atom %r' % atomtype) 465 466 # Skip unknown atoms 467 try: 468 file.seek(atomsize - 8, 1) 469 except IOError: 470 return 0 471 472 return atomsize 473 474 475Parser = MPEG4 476