1# -*- coding: utf-8 -*-
2from future.utils import with_metaclass
3from future.utils import iteritems, itervalues
4import re
5import sys
6from collections import defaultdict
7from itertools import chain
8from copy import deepcopy
9
10
11PLATFORM = sys.platform
12UNREG_RE = re.compile('[Xx]-', re.DOTALL)
13ENCODING_RE = re.compile('(?:base64|7bit|8bit|quoted\-printable)', re.DOTALL)
14PLATFORM_RE = re.compile(sys.platform, re.DOTALL)
15MEDIA_TYPE_RE = re.compile('([-\w.+]+)\/([-\w.+]*)', re.DOTALL)
16
17SIGNATURES = ('application/pgp-keys',
18              'application/pgp',
19              'application/pgp-signature',
20              'application/pkcs10',
21              'application/pkcs7-mime',
22              'application/pkcs7-signature',
23              'text/vcard')
24
25RFC_URL = "http://rfc-editor.org/rfc/rfc%s.txt"
26IANA_URL = "http://www.iana.org/assignments/media-types/%s/%s"
27LTSW_URL = "http://www.ltsw.se/knbase/internet/%s.htp"
28DRAFT_URL = "http://datatracker.ietf.org/public/idindex.cgi?command=id_details&filename=%s"
29CONTACT_URL = "http://www.iana.org/assignments/contact-people.htm#%s"
30REGEX_URLS = {'^RFC(\d+)$': RFC_URL, '^DRAFT:(.+)$': DRAFT_URL, '^\[([^\]]+)\]': CONTACT_URL}
31
32if sys.version_info[0] == 3:
33    basestring = str
34    def cmp(x,y):
35        if isinstance(x, Type): return x.__cmp__(y)
36        if isinstance(y, Type): return y.__cmp__(x) * -1
37        return 0 if x == y else (1 if x > y else -1)
38
39def flatten(l):
40    if isinstance(l, (list, tuple)):
41        return [e for i in l for e in flatten(i)]
42    return [l]
43
44
45class InvalidContentType(RuntimeError):
46    pass
47
48
49class Type(object):
50    """
51    Builds a MIME::Type object from the provided MIME Content Type value
52    (e.g., 'text/plain' or 'applicaton/x-eruby'). The constructed
53    object is yielded to an optional block for additional configuration,
54    such as associating extensions and encoding information.
55    """
56    def __init__(self, content_type):
57        if content_type is None:
58            raise InvalidContentType('Invalid Content-Type provided "(%s)"' % content_type)
59
60        matchdata = MEDIA_TYPE_RE.match(content_type)
61        if matchdata is None:
62            raise InvalidContentType('Invalid Content-Type provided "(%s)"' % content_type)
63
64        # content_type
65        #   Returns the whole MIME content-type string.
66        #   text/plain        => text/plain
67        #   x-chemical/x-pdb  => x-chemical/x-pdb
68        self.content_type = content_type
69
70        # raw_media_type
71        #   Returns the media type of the unmodified MIME type.
72        #   text/plain        => text
73        #   x-chemical/x-pdb  => x-chemical
74        #
75        # raw_sub_type
76        #   Returns the media type of the unmodified MIME type.
77        #   text/plain        => plain
78        #   x-chemical/x-pdb  => x-pdb
79        (self.raw_media_type, self.raw_sub_type) = matchdata.group(1, 2)
80
81        # simplified
82        #   The MIME types main- and sub-label can both start with <tt>x-</tt>,
83        #   which indicates that it is a non-registered name. Of course, after
84        #   registration this flag can disappear, adds to the confusing
85        #   proliferation of MIME types. The simplified string has the <tt>x-</tt>
86        #   removed and are translated to lowercase.
87        #   text/plain        => text/plain
88        #   x-chemical/x-pdb  => chemical/pdb
89        self.simplified = self.simplify(self.content_type)
90
91        # media_type
92        #   Returns the media type of the simplified MIME type.
93        #   text/plain        => text
94        #   x-chemical/x-pdb  => chemical
95        #
96        # sub_type
97        #   Returns the sub-type of the simplified MIME type.
98        #   text/plain        => plain
99        #   x-chemical/x-pdb  => pdb
100        (self.media_type, self.sub_type) = MEDIA_TYPE_RE.match(self.simplified).group(1, 2)
101
102        # The list of extensions which are known to be used for this MIME::Type.
103        # Non-array values will be coerced into an array with #to_a. Array
104        # values will be flattened and +nil+ values removed.
105        self._extensions = []
106        self._encoding = 'default'
107        self._system = None
108        self.registered = True
109
110        # The encoded URL list for this MIME::Type. See #urls for more information.
111        self.url = None
112        self.is_obsolete = False
113        self._docs = ''
114        self._use_instead = None
115
116    def __repr__(self):
117        return '<MIME::Type %s>' % self.content_type
118
119    def __str__(self):
120        return self.content_type
121
122    def __cmp__(self, other):
123        """
124        Compares the MIME::Type against the exact content type or the
125        simplified type (the simplified type will be used if comparing against
126        something that can be treated as a String). In comparisons,
127        this is done against the lowercase version of the MIME::Type.
128        """
129        if hasattr(other, 'content_type'):
130            return cmp(self.content_type.lower(), other.content_type.lower())
131        elif isinstance(other, basestring):
132            return cmp(self.simplified, self.simplify(str(other)))
133        else:
134            return cmp(self.content_type.lower(), other.lower())
135
136    def __lt__(self, other):
137        if hasattr(other, 'content_type'):
138            return cmp(self.content_type.lower(), other.content_type.lower()) < 0
139        elif isinstance(other, basestring):
140            return cmp(self.simplified, self.simplify(str(other))) < 0
141        else:
142            return cmp(self.content_type.lower(), other.lower()) < 0
143
144    def __gt__(self, other):
145        if hasattr(other, 'content_type'):
146            return cmp(self.content_type.lower(), other.content_type.lower()) > 0
147        elif isinstance(other, basestring):
148            return cmp(self.simplified, self.simplify(str(other))) > 0
149        else:
150            return cmp(self.content_type.lower(), other.lower()) > 0
151
152    def __eq__(self, other):
153        """
154        Returns +true+ if the other object is a MIME::Type and the content
155        types match.
156        """
157        return isinstance(other, self.__class__) and cmp(self, other) == 0
158
159    def is_like(self, other):
160         # Returns +true+ if the simplified type matches the current
161        if hasattr(other, 'simplified'):
162            return self.simplified == other.simplified
163        else:
164            return self.simplified == self.simplify(other)
165
166    def priority_compare(self, other):
167        """
168        Compares the MIME::Type based on how reliable it is before doing a
169        normal <=> comparison. Used by MIME::Types#[] to sort types. The
170        comparisons involved are:
171        1. self.simplified <=> other.simplified (ensures that we
172           don't try to compare different types)
173        2. IANA-registered definitions < other definitions.
174        3. Generic definitions < platform definitions.
175        3. Complete definitions < incomplete definitions.
176        4. Current definitions < obsolete definitions.
177        5. Obselete with use-instead references < obsolete without.
178        6. Obsolete use-instead definitions are compared.
179        """
180        pc = cmp(self.simplified, other.simplified)
181        if pc is 0:
182            if self.is_registered != other.is_registered:
183                # registered < unregistered
184                pc = -1 if self.is_registered else 1
185            elif self.platform != other.platform:
186                # generic < platform
187                pc = 1 if self.platform else -1
188            elif self.is_complete != other.is_complete:
189                # complete < incomplete
190                pc = -1 if self.is_complete else 1
191            elif self.is_obsolete != other.is_obsolete:
192                # current < obsolete
193                pc = 1 if self.is_obsolete else -1
194            if pc is 0 and self.is_obsolete and (self.use_instead != other.use_instead):
195                if self.use_instead is None:
196                    pc = -1
197                elif other.use_instead is None:
198                    pc = 1
199                else:
200                    pc = cmp(self.use_instead, other.use_instead)
201        return pc
202
203    @property
204    def extensions(self):
205        return self._extensions
206
207    @extensions.setter
208    def extensions(self, value):
209        self._extensions = [] if value is None else flatten(value)
210
211    @property
212    def default_encoding(self):
213        return self.media_type == 'text' and 'quoted-printable' or 'base64'
214
215    @property
216    def use_instead(self):
217        if not self.is_obsolete:
218            return None
219        return self._use_instead
220
221    @property
222    def is_registered(self):
223        if UNREG_RE.match(self.raw_media_type) or UNREG_RE.match(self.raw_sub_type):
224            return False
225        return self.registered
226
227    @property
228    def docs(self):
229        return self._docs
230
231    @docs.setter
232    def docs(self, d):
233        if d:
234            rs = re.compile('use-instead:([-\w.+]+)\/([-\w.+]*)').findall(d)
235            if rs:
236                self._use_instead = map(lambda e: "%s/%s" % e, rs)
237            else:
238                self._use_instead = None
239        self._docs = d
240
241    @property
242    def urls(self):
243        """
244        The decoded URL list for this MIME::Type.
245        The special URL value IANA will be translated into:
246          http://www.iana.org/assignments/media-types/<mediatype>/<subtype>
247        The special URL value RFC### will be translated into:
248          http://www.rfc-editor.org/rfc/rfc###.txt
249        The special URL value DRAFT:name will be
250        translated into:
251          https://datatracker.ietf.org/public/idindex.cgi?
252              command=id_detail&filename=<name>
253        The special URL value
254        LTSW will be translated
255        into:
256          http://www.ltsw.se/knbase/internet/<mediatype>.htp
257        The special
258        URL value
259        [token] will
260        be translated
261        into:
262          http://www.iana.org/assignments/contact-people.htm#<token>
263        These values will be accessible through #urls, which always returns an array.
264        """
265        def _url(el):
266            if el == 'IANA':
267                return IANA_URL % (self.media_type, self.sub_type)
268            elif el == 'LTSW':
269                return LTSW_URL % self.media_type
270            match = re.compile('^\{([^=]+)=([^\}]+)\}').match(el)
271            if match:
272                return match.group(1, 2)
273            match = re.compile('^\[([^=]+)=([^\]]+)\]').match(el)
274            if match:
275                return [match.group(1), CONTACT_URL % match.group(2)]
276            for regex in REGEX_URLS:
277                match = re.compile(regex).match(el)
278                if match:
279                    return REGEX_URLS[regex] % match.group(1)
280            return el
281        return map(_url, self.url)
282
283    @property
284    def encoding(self):
285        enc = self._encoding
286        if enc is None or enc == 'default':
287            return self.default_encoding
288        return self._encoding
289
290    @encoding.setter
291    def encoding(self, enc):
292        if isinstance(enc, basestring) and enc.startswith(':'):
293            enc = enc.replace(':', '')
294
295        if enc is None or enc == 'default':
296            self._encoding = self.default_encoding
297        elif ENCODING_RE.match(enc):
298            self._encoding = enc
299        else:
300            raise TypeError('The encoding must be None, default, '
301                            'base64, 7bit, 8bit, or quoted-printable.')
302
303    @property
304    def system(self):
305        return self._system
306
307    @system.setter
308    def system(self, os):
309        if os is None or hasattr(os, 'match'):
310            self._system = os
311        else:
312            self._system = re.compile(os)
313
314    @property
315    def is_binary(self):
316        # MIME types can be specified to be sent across a network in
317        # particular
318        # formats. This method returns +true+ when the MIME type
319        # encoding is set
320        # to <tt>base64</tt>.
321        return self.encoding == 'base64'
322
323    @property
324    def is_ascii(self):
325        # Returns +true+ when the simplified MIME type is in the list of known
326        # digital signatures.
327        return not self.is_binary
328
329    @property
330    def is_signature(self):
331        # Returns +true+ when the simplified MIME type is in the list of
332        # known digital signatures.
333        return self.simplified.lower() in SIGNATURES
334
335    @property
336    def is_system(self):
337        # Returns +true+ if the MIME::Type is specific to an operating system.
338        return self.system is not None
339
340    @property
341    def is_platform(self):
342        # Returns +true+ if the MIME::Type is specific to the current operating
343        # system as represented by RUBY_PLATFORM.
344        return self.is_system and self.system.match(PLATFORM)
345
346    @property
347    def is_complete(self):
348        # Returns +true+ if the MIME::Type specifies an extension list,
349        # indicating that it is a complete MIME::Type.
350        return bool(self.extensions)
351
352    @property
353    def to_s(self):
354        # Returns the MIME type as a string.
355        return self.content_type
356
357    @property
358    def to_str(self):
359        # Returns the MIME type as a string for implicit conversions.
360        return self.content_type
361
362    @property
363    def to_a(self):
364        # Returns the MIME type as an array suitable for use with
365        # MIME::Type.from_array.
366        return [self.content_type, self.extensions, self.encoding, self.system,
367                self.is_obsolete, self.docs, self.url, self.is_registered]
368
369    @property
370    def to_hash(self):
371        # Returns the MIME type as an array suitable for use with
372        # MIME::Type.from_hash.
373        return {'Content-Type': self.content_type,
374                'Content-Transfer-Encoding': self.encoding,
375                'Extensions': self.extensions,
376                'System': self.system,
377                'Obsolete': self.is_obsolete,
378                'Docs': self.docs,
379                'URL': self.url,
380                'Registered': self.is_registered}
381
382    @classmethod
383    def simplify(cls, content_type):
384        """
385        The MIME types main- and sub-label can both start with <tt>x-</tt>,
386        which indicates that it is a non-registered name. Of course, after
387        registration this flag can disappear, adds to the confusing
388        proliferation of MIME types. The simplified string has the
389        <tt>x-</tt> removed and are translated to lowercase.
390        """
391        matchdata = MEDIA_TYPE_RE.match(content_type)
392        if matchdata is None:
393            return None
394        wrap = lambda s: re.sub(UNREG_RE, '', s.lower())
395        (media_type, subtype) = matchdata.groups()
396        return '%s/%s' % (wrap(media_type), wrap(subtype))
397
398    @classmethod
399    def from_array(cls, content_type,
400                   extensions=[], encoding=None, system=None,
401                   is_obsolete=False, docs=None, url=None, is_registered=False):
402        """
403        Creates a MIME::Type from an array in the form of:
404          [type-name, [extensions], encoding, system]
405        +extensions+, +encoding+, and +system+ are optional.
406          Type.from_array("application/x-ruby", ['rb'], '8bit')
407          # Type.from_array(["application/x-ruby", ['rb'], '8bit'])
408        These are equivalent to:
409          type = Type('application/x-ruby')
410          type.extensions = ['rb']
411          type.encoding = '8bit'
412        """
413        mt = cls(content_type)
414        mt.extensions = extensions
415        mt.encoding = encoding
416        mt.system = system
417        mt.is_obsolete = is_obsolete
418        mt.docs = docs
419        mt.url = url
420        mt.registered = is_registered
421        return mt
422
423    @classmethod
424    def from_hash(cls, hash):
425        """
426        Creates a MIME::Type from a hash. Keys are case-insensitive,
427        dashes may be replaced with underscores, and the internal
428        Symbol of the lowercase-underscore version can be used as
429        well. That is, Content-Type can be provided as content-type,
430        Content_Type, content_type, or :content_type.
431        Known keys are <tt>Content-Type</tt>,
432        <tt>Content-Transfer-Encoding</tt>, <tt>Extensions</tt>, and
433        <tt>System</tt>.
434          Type.from_hash({'Content-Type': 'text/x-yaml',
435                          'Content-Transfer-Encoding': '8bit',
436                          'System': 'linux',
437                          'Extensions': ['yaml', 'yml']})
438        This is equivalent to:
439          t = Type.new('text/x-yaml')
440          t.encoding = '8bit'
441          t.system = 'linux'
442          t.extensions = ['yaml', 'yml']
443        """
444        wrap_key = lambda k: k.lower().replace('-', '_')
445        type_hash = dict([(wrap_key(k), v) for k, v in hash.items()])
446        mt = cls(type_hash['content_type'])
447        mt.extensions = type_hash.get('extensions', [])
448        mt.encoding = type_hash.get('encoding', 'default')
449        mt.system = type_hash.get('system')
450        mt.is_obsolete = type_hash.get('is_obsolete', False)
451        mt.docs = type_hash.get('docs')
452        mt.url = type_hash.get('url')
453        mt.registered = type_hash.get('is_registered', False)
454        return mt
455
456    @classmethod
457    def from_mime_type(cls, mime_type):
458        """
459        Essentially a copy constructor.
460         Type.from_mime_type(plaintext)
461        is equivalent to:
462          t = Type.new(plaintext.content_type.dup)
463          t.extensions  = plaintext.extensions.dup
464          t.system      = plaintext.system.dup
465          t.encoding = plaintext.encoding.dup
466        """
467        mt = cls(deepcopy(mime_type.content_type))
468        mt.extensions = map(deepcopy, mime_type.extensions)
469        mt.url = mime_type.url and map(deepcopy, mime_type.url) or None
470        mt.system = deepcopy(mime_type.system)
471        mt.encoding = deepcopy(mime_type.encoding)
472        mt.docs = deepcopy(mime_type.docs)
473
474        mt.is_obsolete = mime_type.is_obsolete
475        mt.registered = mime_type.is_registered
476        return mt
477
478
479class ItemMeta(type):
480    def __getitem__(cls, type_id):
481        if isinstance(type_id, Type):
482            return cls.type_variants.get(type_id.simplified)
483        elif isinstance(type_id, re._pattern_type):
484            return cls.match(type_id)
485        else:
486            return cls.type_variants.get(Type.simplify(type_id))
487
488
489class Types(with_metaclass(ItemMeta, object)):
490    """
491    = MIME::Types
492    MIME types are used in MIME-compliant communications, as in e-mail or
493    HTTP traffic, to indicate the type of content which is transmitted.
494    MIME::Types provides the ability for detailed information about MIME
495    entities (provided as a set of MIME::Type objects) to be determined and
496    used programmatically. There are many types defined by RFCs and vendors,
497    so the list is long but not complete; don't hesitate to ask to add
498    additional information. This library follows the IANA collection of MIME
499    types (see below for reference).
500    == Description
501    MIME types are used in MIME entities, as in email or HTTP traffic. It is
502    useful at times to have information available about MIME types (or,
503    inversely, about files). A MIME::Type stores the known information about
504    one MIME type.
505    == Usage
506     from mime import Type, Types
507     plaintext = Types['text/plain']
508     text = plaintext[0]
509     print text.media_type            # => 'text'
510     print text.sub_type              # => 'plain'
511     print " ".join(text.extensions)  # => 'asc txt c cc h hh cpp'
512     print text.encoding              # => 8bit
513     print text.is_binary             # => False
514     print text.is_ascii              # => True
515     print text.is_obsolete           # => False
516     print text.is_registered         # => True
517     print text == 'text/plain'       # => True
518     print Type.simplify('x-appl/x-zip') # => 'appl/zip'
519    == About
520    This module is built to conform to the MIME types of RFCs 2045 and 2231.
521    It follows the official IANA registry at
522    http://www.iana.org/assignments/media-types/ and
523    ftp://ftp.iana.org/assignments/media-types with some unofficial types
524    added from the the collection at
525    http://www.ltsw.se/knbase/internet/mime.htp
526    This is originally based on Perl MIME::Types by Mark Overmeer.
527    This is Python clone of https://github.com/halostatue/mime-types
528    See Also:
529        http://www.iana.org/assignments/media-types/
530        http://www.ltsw.se/knbase/internet/mime.htp
531    """
532
533    type_variants = defaultdict(list)
534    extension_index = defaultdict(list)
535
536    __metaclass__ = ItemMeta
537
538    def __init__(self, data_version=None):
539        self.data_version = data_version
540
541    def __repr__(self):
542        return '<MIME::Types version:%s>' % self.data_version
543
544    @classmethod
545    def m(cls, type_id, flags={}):
546        return cls.prune_matches(cls[type_id], flags)
547
548    @classmethod
549    def match(cls, regex):
550        return flatten([v for k, v in iteritems(cls.type_variants)
551                        if regex.search(k)])
552
553    @classmethod
554    def prune_matches(cls, matches, flags):
555        if flags.get('complete'):
556            matches = filter(lambda e: e.is_complete, matches)
557        if flags.get('platform'):
558            matches = filter(lambda e: e.is_platform, matches)
559        return list(matches)
560
561    @classmethod
562    def add_type_variant(cls, mime_type):
563        cls.type_variants[mime_type.simplified].append(mime_type)
564
565    @classmethod
566    def index_extensions(cls, mime_type):
567        for ext in mime_type.extensions:
568            cls.extension_index[ext].append(mime_type)
569
570    @classmethod
571    def any(cls, block):
572        for mt in flatten(list(itervalues(cls.extension_index))):
573            if block(mt):
574                return True
575
576    @classmethod
577    def all(cls, block):
578        return all([block(mt) for mt in flatten(cls.extension_index.values())])
579
580    @classmethod
581    def defined_types(cls):
582        return chain(*cls.type_variants.values())
583
584    @classmethod
585    def count(cls):
586        return len(list(cls.defined_types()))
587
588    @classmethod
589    def each(cls, block):
590        return map(block, cls.defined_types())
591
592    @classmethod
593    def type_for(cls, filename, platform=False):
594        ext = filename.split('.')[-1].lower()
595        type_list = cls.extension_index.get(ext, [])
596        if platform:
597            type_list = filter(lambda t: t.is_platform, type_list)
598        return list(type_list)
599
600    of = type_for
601
602    @classmethod
603    def add(cls, *types):
604        for mime_type in types:
605            if isinstance(mime_type, Types):
606                cls.add(*mime_type.defined_types())
607            else:
608                mts = cls.type_variants.get(mime_type.simplified)
609                if mts and mime_type in mts:
610                    Warning('Type %s already registered as a variant of %s.',
611                            mime_type, mime_type.simplified)
612                cls.add_type_variant(mime_type)
613                cls.index_extensions(mime_type)
614