1# -*- coding: utf-8 -*- 2# enzyme - Video metadata parser 3# Copyright 2011-2012 Antoine Bertin <diaoulael@gmail.com> 4# Copyright 2003-2006 Thomas Schueppel <stain@acm.org> 5# Copyright 2003-2006 Dirk Meyer <dischi@freevo.org> 6# 7# This file is part of enzyme. 8# 9# enzyme is free software; you can redistribute it and/or modify it under 10# the terms of the GNU General Public License as published by 11# the Free Software Foundation; either version 3 of the License, or 12# (at your option) any later version. 13# 14# enzyme is distributed in the hope that it will be useful, 15# but WITHOUT ANY WARRANTY; without even the implied warranty of 16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17# GNU General Public License for more details. 18# 19# You should have received a copy of the GNU General Public License 20# along with enzyme. If not, see <http://www.gnu.org/licenses/>. 21from __future__ import absolute_import 22import re 23import logging 24from . import fourcc 25from . import language 26from .strutils import str_to_unicode, unicode_to_str 27 28UNPRINTABLE_KEYS = ['thumbnail', 'url', 'codec_private'] 29MEDIACORE = ['title', 'caption', 'comment', 'size', 'type', 'subtype', 'timestamp', 30 'keywords', 'country', 'language', 'langcode', 'url', 'artist', 31 'mime', 'datetime', 'tags', 'hash'] 32AUDIOCORE = ['channels', 'samplerate', 'length', 'encoder', 'codec', 'format', 33 'samplebits', 'bitrate', 'fourcc', 'trackno', 'id', 'userdate', 34 'enabled', 'default', 'codec_private'] 35MUSICCORE = ['trackof', 'album', 'genre', 'discs', 'thumbnail'] 36VIDEOCORE = ['length', 'encoder', 'bitrate', 'samplerate', 'codec', 'format', 37 'samplebits', 'width', 'height', 'fps', 'aspect', 'trackno', 38 'fourcc', 'id', 'enabled', 'default', 'codec_private'] 39AVCORE = ['length', 'encoder', 'trackno', 'trackof', 'copyright', 'product', 40 'genre', 'writer', 'producer', 'studio', 'rating', 'actors', 'thumbnail', 41 'delay', 'image', 'video', 'audio', 'subtitles', 'chapters', 'software', 42 'summary', 'synopsis', 'season', 'episode', 'series'] 43 44# get logging object 45log = logging.getLogger(__name__) 46 47 48class Media(object): 49 """ 50 Media is the base class to all Media Metadata Containers. It defines 51 the basic structures that handle metadata. Media and its derivates 52 contain a common set of metadata attributes that is listed in keys. 53 Specific derivates contain additional keys to the dublin core set that is 54 defined in Media. 55 """ 56 media = None 57 _keys = MEDIACORE 58 table_mapping = {} 59 60 def __init__(self, hash=None): 61 if hash is not None: 62 # create Media based on dict 63 for key, value in hash.items(): 64 if isinstance(value, list) and value and isinstance(value[0], dict): 65 value = [Media(x) for x in value] 66 self._set(key, value) 67 return 68 69 self._keys = self._keys[:] 70 self.tables = {} 71 # Tags, unlike tables, are more well-defined dicts whose values are 72 # either Tag objects, other dicts (for nested tags), or lists of either 73 # (for multiple instances of the tag, e.g. actor). Where possible, 74 # parsers should transform tag names to conform to the Official 75 # Matroska tags defined at http://www.matroska.org/technical/specs/tagging/index.html 76 # All tag names will be lower-cased. 77 self.tags = Tags() 78 for key in set(self._keys) - set(['media', 'tags']): 79 setattr(self, key, None) 80 81 # 82 # unicode and string convertion for debugging 83 # 84 #TODO: Fix that mess 85 def __unicode__(self): 86 result = '' 87 88 # print normal attributes 89 lists = [] 90 for key in self._keys: 91 value = getattr(self, key, None) 92 if value == None or key == 'url': 93 continue 94 if isinstance(value, list): 95 if not value: 96 continue 97 elif isinstance(value[0], basestring): 98 # Just a list of strings (keywords?), so don't treat it specially. 99 value = ', '.join(value) 100 else: 101 lists.append((key, value)) 102 continue 103 elif isinstance(value, dict): 104 # Tables or tags treated separately. 105 continue 106 if key in UNPRINTABLE_KEYS: 107 value = '<unprintable data, size=%d>' % len(value) 108 result += '| %10s: %s\n' % (unicode(key), unicode(value)) 109 110 # print tags (recursively, to support nested tags). 111 def print_tags(tags, suffix, show_label): 112 result = '' 113 for n, (name, tag) in enumerate(tags.items()): 114 result += '| %12s%s%s = ' % ('tags: ' if n == 0 and show_label else '', suffix, name) 115 if isinstance(tag, list): 116 # TODO: doesn't support lists/dicts within lists. 117 result += '%s\n' % ', '.join(subtag.value for subtag in tag) 118 else: 119 result += '%s\n' % (tag.value or '') 120 if isinstance(tag, dict): 121 result += print_tags(tag, ' ', False) 122 return result 123 result += print_tags(self.tags, '', True) 124 125 # print lists 126 for key, l in lists: 127 for n, item in enumerate(l): 128 label = '+-- ' + key.rstrip('s').capitalize() 129 if key not in ['tracks', 'subtitles', 'chapters']: 130 label += ' Track' 131 result += '%s #%d\n' % (label, n + 1) 132 result += '| ' + re.sub(r'\n(.)', r'\n| \1', unicode(item)) 133 134 # print tables 135 #FIXME: WTH? 136# if log.level >= 10: 137# for name, table in self.tables.items(): 138# result += '+-- Table %s\n' % str(name) 139# for key, value in table.items(): 140# try: 141# value = unicode(value) 142# if len(value) > 50: 143# value = '<unprintable data, size=%d>' % len(value) 144# except (UnicodeDecodeError, TypeError): 145# try: 146# value = '<unprintable data, size=%d>' % len(value) 147# except AttributeError: 148# value = '<unprintable data>' 149# result += '| | %s: %s\n' % (unicode(key), value) 150 return result 151 152 def __str__(self): 153 return unicode(self).encode() 154 155 def __repr__(self): 156 if hasattr(self, 'url'): 157 return '<%s %s>' % (str(self.__class__)[8:-2], self.url) 158 else: 159 return '<%s>' % (str(self.__class__)[8:-2]) 160 161 # 162 # internal functions 163 # 164 def _appendtable(self, name, hashmap): 165 """ 166 Appends a tables of additional metadata to the Object. 167 If such a table already exists, the given tables items are 168 added to the existing one. 169 """ 170 if name not in self.tables: 171 self.tables[name] = hashmap 172 else: 173 # Append to the already existing table 174 for k in hashmap.keys(): 175 self.tables[name][k] = hashmap[k] 176 177 def _set(self, key, value): 178 """ 179 Set key to value and add the key to the internal keys list if 180 missing. 181 """ 182 if value is None and getattr(self, key, None) is None: 183 return 184 if isinstance(value, str): 185 value = str_to_unicode(value) 186 setattr(self, key, value) 187 if not key in self._keys: 188 self._keys.append(key) 189 190 def _set_url(self, url): 191 """ 192 Set the URL of the source 193 """ 194 self.url = url 195 196 def _finalize(self): 197 """ 198 Correct same data based on specific rules 199 """ 200 # make sure all strings are unicode 201 for key in self._keys: 202 if key in UNPRINTABLE_KEYS: 203 continue 204 value = getattr(self, key) 205 if value is None: 206 continue 207 if key == 'image': 208 if isinstance(value, unicode): 209 setattr(self, key, unicode_to_str(value)) 210 continue 211 if isinstance(value, str): 212 setattr(self, key, str_to_unicode(value)) 213 if isinstance(value, unicode): 214 setattr(self, key, value.strip().rstrip().replace('\0', '')) 215 if isinstance(value, list) and value and isinstance(value[0], Media): 216 for submenu in value: 217 submenu._finalize() 218 219 # copy needed tags from tables 220 for name, table in self.tables.items(): 221 mapping = self.table_mapping.get(name, {}) 222 for tag, attr in mapping.items(): 223 if self.get(attr): 224 continue 225 value = table.get(tag, None) 226 if value is not None: 227 if not isinstance(value, (str, unicode)): 228 value = str_to_unicode(str(value)) 229 elif isinstance(value, str): 230 value = str_to_unicode(value) 231 value = value.strip().rstrip().replace('\0', '') 232 setattr(self, attr, value) 233 234 if 'fourcc' in self._keys and 'codec' in self._keys and self.codec is not None: 235 # Codec may be a fourcc, in which case we resolve it to its actual 236 # name and set the fourcc attribute. 237 self.fourcc, self.codec = fourcc.resolve(self.codec) 238 if 'language' in self._keys: 239 self.langcode, self.language = language.resolve(self.language) 240 241 # 242 # data access 243 # 244 def __contains__(self, key): 245 """ 246 Test if key exists in the dict 247 """ 248 return hasattr(self, key) 249 250 def get(self, attr, default=None): 251 """ 252 Returns the given attribute. If the attribute is not set by 253 the parser return 'default'. 254 """ 255 return getattr(self, attr, default) 256 257 def __getitem__(self, attr): 258 """ 259 Get the value of the given attribute 260 """ 261 return getattr(self, attr, None) 262 263 def __setitem__(self, key, value): 264 """ 265 Set the value of 'key' to 'value' 266 """ 267 setattr(self, key, value) 268 269 def has_key(self, key): 270 """ 271 Check if the object has an attribute 'key' 272 """ 273 return hasattr(self, key) 274 275 def convert(self): 276 """ 277 Convert Media to dict. 278 """ 279 result = {} 280 for k in self._keys: 281 value = getattr(self, k, None) 282 if isinstance(value, list) and value and isinstance(value[0], Media): 283 value = [x.convert() for x in value] 284 result[k] = value 285 return result 286 287 def keys(self): 288 """ 289 Return all keys for the attributes set by the parser. 290 """ 291 return self._keys 292 293 294class Collection(Media): 295 """ 296 Collection of Digial Media like CD, DVD, Directory, Playlist 297 """ 298 _keys = Media._keys + ['id', 'tracks'] 299 300 def __init__(self): 301 Media.__init__(self) 302 self.tracks = [] 303 304 305class Tag(object): 306 """ 307 An individual tag, which will be a value stored in a Tags object. 308 309 Tag values are strings (for binary data), unicode objects, or datetime 310 objects for tags that represent dates or times. 311 """ 312 def __init__(self, value=None, langcode='und', binary=False): 313 super(Tag, self).__init__() 314 self.value = value 315 self.langcode = langcode 316 self.binary = binary 317 318 def __unicode__(self): 319 return unicode(self.value) 320 321 def __str__(self): 322 return str(self.value) 323 324 def __repr__(self): 325 if not self.binary: 326 return '<Tag object: %s>' % repr(self.value) 327 else: 328 return '<Binary Tag object: size=%d>' % len(self.value) 329 330 @property 331 def langcode(self): 332 return self._langcode 333 334 @langcode.setter 335 def langcode(self, code): 336 self._langcode, self.language = language.resolve(code) 337 338 339class Tags(dict, Tag): 340 """ 341 A dictionary containing Tag objects. Values can be other Tags objects 342 (for nested tags), lists, or Tag objects. 343 344 A Tags object is more or less a dictionary but it also contains a value. 345 This is necessary in order to represent this kind of tag specification 346 (e.g. for Matroska):: 347 348 <Simple> 349 <Name>LAW_RATING</Name> 350 <String>PG</String> 351 <Simple> 352 <Name>COUNTRY</Name> 353 <String>US</String> 354 </Simple> 355 </Simple> 356 357 The attribute RATING has a value (PG), but it also has a child tag 358 COUNTRY that specifies the country code the rating belongs to. 359 """ 360 def __init__(self, value=None, langcode='und', binary=False): 361 super(Tags, self).__init__() 362 self.value = value 363 self.langcode = langcode 364 self.binary = False 365 366 367class AudioStream(Media): 368 """ 369 Audio Tracks in a Multiplexed Container. 370 """ 371 _keys = Media._keys + AUDIOCORE 372 373 374class Music(AudioStream): 375 """ 376 Digital Music. 377 """ 378 _keys = AudioStream._keys + MUSICCORE 379 380 def _finalize(self): 381 """ 382 Correct same data based on specific rules 383 """ 384 AudioStream._finalize(self) 385 if self.trackof: 386 try: 387 # XXX Why is this needed anyway? 388 if int(self.trackno) < 10: 389 self.trackno = '0%s' % int(self.trackno) 390 except (AttributeError, ValueError): 391 pass 392 393 394class VideoStream(Media): 395 """ 396 Video Tracks in a Multiplexed Container. 397 """ 398 _keys = Media._keys + VIDEOCORE 399 400 401class Chapter(Media): 402 """ 403 Chapter in a Multiplexed Container. 404 """ 405 _keys = ['enabled', 'name', 'pos', 'id'] 406 407 def __init__(self, name=None, pos=0): 408 Media.__init__(self) 409 self.name = name 410 self.pos = pos 411 self.enabled = True 412 413 414class Subtitle(Media): 415 """ 416 Subtitle Tracks in a Multiplexed Container. 417 """ 418 _keys = ['enabled', 'default', 'langcode', 'language', 'trackno', 'title', 419 'id', 'codec'] 420 421 def __init__(self, language=None): 422 Media.__init__(self) 423 self.language = language 424 425 426class AVContainer(Media): 427 """ 428 Container for Audio and Video streams. This is the Container Type for 429 all media, that contain more than one stream. 430 """ 431 _keys = Media._keys + AVCORE 432 433 def __init__(self): 434 Media.__init__(self) 435 self.audio = [] 436 self.video = [] 437 self.subtitles = [] 438 self.chapters = [] 439 440 def _finalize(self): 441 """ 442 Correct same data based on specific rules 443 """ 444 Media._finalize(self) 445 if not self.length and len(self.video) and self.video[0].length: 446 self.length = 0 447 # Length not specified for container, so use the largest length 448 # of its tracks as container length. 449 for track in self.video + self.audio: 450 if track.length: 451 self.length = max(self.length, track.length) 452