1# -*- coding: utf-8 -*-
2# enzyme - Video metadata parser
3# Copyright 2011-2012 Antoine Bertin <diaoulael@gmail.com>
4# Copyright 2003-2006 Thomas Schueppel <stain@acm.org>
5# Copyright 2003-2006 Dirk Meyer <dischi@freevo.org>
6#
7# This file is part of enzyme.
8#
9# enzyme is free software; you can redistribute it and/or modify it under
10# the terms of the GNU General Public License as published by
11# the Free Software Foundation; either version 3 of the License, or
12# (at your option) any later version.
13#
14# enzyme is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with enzyme.  If not, see <http://www.gnu.org/licenses/>.
21from __future__ import absolute_import
22import re
23import logging
24from . import fourcc
25from . import language
26from .strutils import str_to_unicode, unicode_to_str
27
28UNPRINTABLE_KEYS = ['thumbnail', 'url', 'codec_private']
29MEDIACORE = ['title', 'caption', 'comment', 'size', 'type', 'subtype', 'timestamp',
30             'keywords', 'country', 'language', 'langcode', 'url', 'artist',
31             'mime', 'datetime', 'tags', 'hash']
32AUDIOCORE = ['channels', 'samplerate', 'length', 'encoder', 'codec', 'format',
33             'samplebits', 'bitrate', 'fourcc', 'trackno', 'id', 'userdate',
34             'enabled', 'default', 'codec_private']
35MUSICCORE = ['trackof', 'album', 'genre', 'discs', 'thumbnail']
36VIDEOCORE = ['length', 'encoder', 'bitrate', 'samplerate', 'codec', 'format',
37             'samplebits', 'width', 'height', 'fps', 'aspect', 'trackno',
38             'fourcc', 'id', 'enabled', 'default', 'codec_private']
39AVCORE = ['length', 'encoder', 'trackno', 'trackof', 'copyright', 'product',
40          'genre', 'writer', 'producer', 'studio', 'rating', 'actors', 'thumbnail',
41          'delay', 'image', 'video', 'audio', 'subtitles', 'chapters', 'software',
42          'summary', 'synopsis', 'season', 'episode', 'series']
43
44# get logging object
45log = logging.getLogger(__name__)
46
47
48class Media(object):
49    """
50    Media is the base class to all Media Metadata Containers. It defines
51    the basic structures that handle metadata. Media and its derivates
52    contain a common set of metadata attributes that is listed in keys.
53    Specific derivates contain additional keys to the dublin core set that is
54    defined in Media.
55    """
56    media = None
57    _keys = MEDIACORE
58    table_mapping = {}
59
60    def __init__(self, hash=None):
61        if hash is not None:
62            # create Media based on dict
63            for key, value in hash.items():
64                if isinstance(value, list) and value and isinstance(value[0], dict):
65                    value = [Media(x) for x in value]
66                self._set(key, value)
67            return
68
69        self._keys = self._keys[:]
70        self.tables = {}
71        # Tags, unlike tables, are more well-defined dicts whose values are
72        # either Tag objects, other dicts (for nested tags), or lists of either
73        # (for multiple instances of the tag, e.g. actor).  Where possible,
74        # parsers should transform tag names to conform to the Official
75        # Matroska tags defined at http://www.matroska.org/technical/specs/tagging/index.html
76        # All tag names will be lower-cased.
77        self.tags = Tags()
78        for key in set(self._keys) - set(['media', 'tags']):
79            setattr(self, key, None)
80
81    #
82    # unicode and string convertion for debugging
83    #
84    #TODO: Fix that mess
85    def __unicode__(self):
86        result = ''
87
88        # print normal attributes
89        lists = []
90        for key in self._keys:
91            value = getattr(self, key, None)
92            if value == None or key == 'url':
93                continue
94            if isinstance(value, list):
95                if not value:
96                    continue
97                elif isinstance(value[0], basestring):
98                    # Just a list of strings (keywords?), so don't treat it specially.
99                    value = ', '.join(value)
100                else:
101                    lists.append((key, value))
102                    continue
103            elif isinstance(value, dict):
104                # Tables or tags treated separately.
105                continue
106            if key in UNPRINTABLE_KEYS:
107                value = '<unprintable data, size=%d>' % len(value)
108            result += '| %10s: %s\n' % (unicode(key), unicode(value))
109
110        # print tags (recursively, to support nested tags).
111        def print_tags(tags, suffix, show_label):
112            result = ''
113            for n, (name, tag) in enumerate(tags.items()):
114                result += '| %12s%s%s = ' % ('tags: ' if n == 0 and show_label else '', suffix, name)
115                if isinstance(tag, list):
116                    # TODO: doesn't support lists/dicts within lists.
117                    result += '%s\n' % ', '.join(subtag.value for subtag in tag)
118                else:
119                    result += '%s\n' % (tag.value or '')
120                if isinstance(tag, dict):
121                    result += print_tags(tag, '    ', False)
122            return result
123        result += print_tags(self.tags, '', True)
124
125        # print lists
126        for key, l in lists:
127            for n, item in enumerate(l):
128                label = '+-- ' + key.rstrip('s').capitalize()
129                if key not in ['tracks', 'subtitles', 'chapters']:
130                    label += ' Track'
131                result += '%s #%d\n' % (label, n + 1)
132                result += '|    ' + re.sub(r'\n(.)', r'\n|    \1', unicode(item))
133
134        # print tables
135        #FIXME: WTH?
136#        if log.level >= 10:
137#            for name, table in self.tables.items():
138#                result += '+-- Table %s\n' % str(name)
139#                for key, value in table.items():
140#                    try:
141#                        value = unicode(value)
142#                        if len(value) > 50:
143#                            value = '<unprintable data, size=%d>' % len(value)
144#                    except (UnicodeDecodeError, TypeError):
145#                        try:
146#                            value = '<unprintable data, size=%d>' % len(value)
147#                        except AttributeError:
148#                            value = '<unprintable data>'
149#                    result += '|    | %s: %s\n' % (unicode(key), value)
150        return result
151
152    def __str__(self):
153        return unicode(self).encode()
154
155    def __repr__(self):
156        if hasattr(self, 'url'):
157            return '<%s %s>' % (str(self.__class__)[8:-2], self.url)
158        else:
159            return '<%s>' % (str(self.__class__)[8:-2])
160
161    #
162    # internal functions
163    #
164    def _appendtable(self, name, hashmap):
165        """
166        Appends a tables of additional metadata to the Object.
167        If such a table already exists, the given tables items are
168        added to the existing one.
169        """
170        if name not in self.tables:
171            self.tables[name] = hashmap
172        else:
173            # Append to the already existing table
174            for k in hashmap.keys():
175                self.tables[name][k] = hashmap[k]
176
177    def _set(self, key, value):
178        """
179        Set key to value and add the key to the internal keys list if
180        missing.
181        """
182        if value is None and getattr(self, key, None) is None:
183            return
184        if isinstance(value, str):
185            value = str_to_unicode(value)
186        setattr(self, key, value)
187        if not key in self._keys:
188            self._keys.append(key)
189
190    def _set_url(self, url):
191        """
192        Set the URL of the source
193        """
194        self.url = url
195
196    def _finalize(self):
197        """
198        Correct same data based on specific rules
199        """
200        # make sure all strings are unicode
201        for key in self._keys:
202            if key in UNPRINTABLE_KEYS:
203                continue
204            value = getattr(self, key)
205            if value is None:
206                continue
207            if key == 'image':
208                if isinstance(value, unicode):
209                    setattr(self, key, unicode_to_str(value))
210                continue
211            if isinstance(value, str):
212                setattr(self, key, str_to_unicode(value))
213            if isinstance(value, unicode):
214                setattr(self, key, value.strip().rstrip().replace('\0', ''))
215            if isinstance(value, list) and value and isinstance(value[0], Media):
216                for submenu in value:
217                    submenu._finalize()
218
219        # copy needed tags from tables
220        for name, table in self.tables.items():
221            mapping = self.table_mapping.get(name, {})
222            for tag, attr in mapping.items():
223                if self.get(attr):
224                    continue
225                value = table.get(tag, None)
226                if value is not None:
227                    if not isinstance(value, (str, unicode)):
228                        value = str_to_unicode(str(value))
229                    elif isinstance(value, str):
230                        value = str_to_unicode(value)
231                    value = value.strip().rstrip().replace('\0', '')
232                    setattr(self, attr, value)
233
234        if 'fourcc' in self._keys and 'codec' in self._keys and self.codec is not None:
235            # Codec may be a fourcc, in which case we resolve it to its actual
236            # name and set the fourcc attribute.
237            self.fourcc, self.codec = fourcc.resolve(self.codec)
238        if 'language' in self._keys:
239            self.langcode, self.language = language.resolve(self.language)
240
241    #
242    # data access
243    #
244    def __contains__(self, key):
245        """
246        Test if key exists in the dict
247        """
248        return hasattr(self, key)
249
250    def get(self, attr, default=None):
251        """
252        Returns the given attribute. If the attribute is not set by
253        the parser return 'default'.
254        """
255        return getattr(self, attr, default)
256
257    def __getitem__(self, attr):
258        """
259        Get the value of the given attribute
260        """
261        return getattr(self, attr, None)
262
263    def __setitem__(self, key, value):
264        """
265        Set the value of 'key' to 'value'
266        """
267        setattr(self, key, value)
268
269    def has_key(self, key):
270        """
271        Check if the object has an attribute 'key'
272        """
273        return hasattr(self, key)
274
275    def convert(self):
276        """
277        Convert Media to dict.
278        """
279        result = {}
280        for k in self._keys:
281            value = getattr(self, k, None)
282            if isinstance(value, list) and value and isinstance(value[0], Media):
283                value = [x.convert() for x in value]
284            result[k] = value
285        return result
286
287    def keys(self):
288        """
289        Return all keys for the attributes set by the parser.
290        """
291        return self._keys
292
293
294class Collection(Media):
295    """
296    Collection of Digial Media like CD, DVD, Directory, Playlist
297    """
298    _keys = Media._keys + ['id', 'tracks']
299
300    def __init__(self):
301        Media.__init__(self)
302        self.tracks = []
303
304
305class Tag(object):
306    """
307    An individual tag, which will be a value stored in a Tags object.
308
309    Tag values are strings (for binary data), unicode objects, or datetime
310    objects for tags that represent dates or times.
311    """
312    def __init__(self, value=None, langcode='und', binary=False):
313        super(Tag, self).__init__()
314        self.value = value
315        self.langcode = langcode
316        self.binary = binary
317
318    def __unicode__(self):
319        return unicode(self.value)
320
321    def __str__(self):
322        return str(self.value)
323
324    def __repr__(self):
325        if not self.binary:
326            return '<Tag object: %s>' % repr(self.value)
327        else:
328            return '<Binary Tag object: size=%d>' % len(self.value)
329
330    @property
331    def langcode(self):
332        return self._langcode
333
334    @langcode.setter
335    def langcode(self, code):
336        self._langcode, self.language = language.resolve(code)
337
338
339class Tags(dict, Tag):
340    """
341    A dictionary containing Tag objects.  Values can be other Tags objects
342    (for nested tags), lists, or Tag objects.
343
344    A Tags object is more or less a dictionary but it also contains a value.
345    This is necessary in order to represent this kind of tag specification
346    (e.g. for Matroska)::
347
348        <Simple>
349          <Name>LAW_RATING</Name>
350          <String>PG</String>
351            <Simple>
352              <Name>COUNTRY</Name>
353              <String>US</String>
354            </Simple>
355        </Simple>
356
357    The attribute RATING has a value (PG), but it also has a child tag
358    COUNTRY that specifies the country code the rating belongs to.
359    """
360    def __init__(self, value=None, langcode='und', binary=False):
361        super(Tags, self).__init__()
362        self.value = value
363        self.langcode = langcode
364        self.binary = False
365
366
367class AudioStream(Media):
368    """
369    Audio Tracks in a Multiplexed Container.
370    """
371    _keys = Media._keys + AUDIOCORE
372
373
374class Music(AudioStream):
375    """
376    Digital Music.
377    """
378    _keys = AudioStream._keys + MUSICCORE
379
380    def _finalize(self):
381        """
382        Correct same data based on specific rules
383        """
384        AudioStream._finalize(self)
385        if self.trackof:
386            try:
387                # XXX Why is this needed anyway?
388                if int(self.trackno) < 10:
389                    self.trackno = '0%s' % int(self.trackno)
390            except (AttributeError, ValueError):
391                pass
392
393
394class VideoStream(Media):
395    """
396    Video Tracks in a Multiplexed Container.
397    """
398    _keys = Media._keys + VIDEOCORE
399
400
401class Chapter(Media):
402    """
403    Chapter in a Multiplexed Container.
404    """
405    _keys = ['enabled', 'name', 'pos', 'id']
406
407    def __init__(self, name=None, pos=0):
408        Media.__init__(self)
409        self.name = name
410        self.pos = pos
411        self.enabled = True
412
413
414class Subtitle(Media):
415    """
416    Subtitle Tracks in a Multiplexed Container.
417    """
418    _keys = ['enabled', 'default', 'langcode', 'language', 'trackno', 'title',
419             'id', 'codec']
420
421    def __init__(self, language=None):
422        Media.__init__(self)
423        self.language = language
424
425
426class AVContainer(Media):
427    """
428    Container for Audio and Video streams. This is the Container Type for
429    all media, that contain more than one stream.
430    """
431    _keys = Media._keys + AVCORE
432
433    def __init__(self):
434        Media.__init__(self)
435        self.audio = []
436        self.video = []
437        self.subtitles = []
438        self.chapters = []
439
440    def _finalize(self):
441        """
442        Correct same data based on specific rules
443        """
444        Media._finalize(self)
445        if not self.length and len(self.video) and self.video[0].length:
446            self.length = 0
447            # Length not specified for container, so use the largest length
448            # of its tracks as container length.
449            for track in self.video + self.audio:
450                if track.length:
451                    self.length = max(self.length, track.length)
452