1# -*- coding: utf-8 -*- 2from future.utils import with_metaclass 3from future.utils import iteritems, itervalues 4import re 5import sys 6from collections import defaultdict 7from itertools import chain 8from copy import deepcopy 9 10 11PLATFORM = sys.platform 12UNREG_RE = re.compile('[Xx]-', re.DOTALL) 13ENCODING_RE = re.compile('(?:base64|7bit|8bit|quoted\-printable)', re.DOTALL) 14PLATFORM_RE = re.compile(sys.platform, re.DOTALL) 15MEDIA_TYPE_RE = re.compile('([-\w.+]+)\/([-\w.+]*)', re.DOTALL) 16 17SIGNATURES = ('application/pgp-keys', 18 'application/pgp', 19 'application/pgp-signature', 20 'application/pkcs10', 21 'application/pkcs7-mime', 22 'application/pkcs7-signature', 23 'text/vcard') 24 25RFC_URL = "http://rfc-editor.org/rfc/rfc%s.txt" 26IANA_URL = "http://www.iana.org/assignments/media-types/%s/%s" 27LTSW_URL = "http://www.ltsw.se/knbase/internet/%s.htp" 28DRAFT_URL = "http://datatracker.ietf.org/public/idindex.cgi?command=id_details&filename=%s" 29CONTACT_URL = "http://www.iana.org/assignments/contact-people.htm#%s" 30REGEX_URLS = {'^RFC(\d+)$': RFC_URL, '^DRAFT:(.+)$': DRAFT_URL, '^\[([^\]]+)\]': CONTACT_URL} 31 32if sys.version_info[0] == 3: 33 basestring = str 34 def cmp(x,y): 35 if isinstance(x, Type): return x.__cmp__(y) 36 if isinstance(y, Type): return y.__cmp__(x) * -1 37 return 0 if x == y else (1 if x > y else -1) 38 39def flatten(l): 40 if isinstance(l, (list, tuple)): 41 return [e for i in l for e in flatten(i)] 42 return [l] 43 44 45class InvalidContentType(RuntimeError): 46 pass 47 48 49class Type(object): 50 """ 51 Builds a MIME::Type object from the provided MIME Content Type value 52 (e.g., 'text/plain' or 'applicaton/x-eruby'). The constructed 53 object is yielded to an optional block for additional configuration, 54 such as associating extensions and encoding information. 55 """ 56 def __init__(self, content_type): 57 if content_type is None: 58 raise InvalidContentType('Invalid Content-Type provided "(%s)"' % content_type) 59 60 matchdata = MEDIA_TYPE_RE.match(content_type) 61 if matchdata is None: 62 raise InvalidContentType('Invalid Content-Type provided "(%s)"' % content_type) 63 64 # content_type 65 # Returns the whole MIME content-type string. 66 # text/plain => text/plain 67 # x-chemical/x-pdb => x-chemical/x-pdb 68 self.content_type = content_type 69 70 # raw_media_type 71 # Returns the media type of the unmodified MIME type. 72 # text/plain => text 73 # x-chemical/x-pdb => x-chemical 74 # 75 # raw_sub_type 76 # Returns the media type of the unmodified MIME type. 77 # text/plain => plain 78 # x-chemical/x-pdb => x-pdb 79 (self.raw_media_type, self.raw_sub_type) = matchdata.group(1, 2) 80 81 # simplified 82 # The MIME types main- and sub-label can both start with <tt>x-</tt>, 83 # which indicates that it is a non-registered name. Of course, after 84 # registration this flag can disappear, adds to the confusing 85 # proliferation of MIME types. The simplified string has the <tt>x-</tt> 86 # removed and are translated to lowercase. 87 # text/plain => text/plain 88 # x-chemical/x-pdb => chemical/pdb 89 self.simplified = self.simplify(self.content_type) 90 91 # media_type 92 # Returns the media type of the simplified MIME type. 93 # text/plain => text 94 # x-chemical/x-pdb => chemical 95 # 96 # sub_type 97 # Returns the sub-type of the simplified MIME type. 98 # text/plain => plain 99 # x-chemical/x-pdb => pdb 100 (self.media_type, self.sub_type) = MEDIA_TYPE_RE.match(self.simplified).group(1, 2) 101 102 # The list of extensions which are known to be used for this MIME::Type. 103 # Non-array values will be coerced into an array with #to_a. Array 104 # values will be flattened and +nil+ values removed. 105 self._extensions = [] 106 self._encoding = 'default' 107 self._system = None 108 self.registered = True 109 110 # The encoded URL list for this MIME::Type. See #urls for more information. 111 self.url = None 112 self.is_obsolete = False 113 self._docs = '' 114 self._use_instead = None 115 116 def __repr__(self): 117 return '<MIME::Type %s>' % self.content_type 118 119 def __str__(self): 120 return self.content_type 121 122 def __cmp__(self, other): 123 """ 124 Compares the MIME::Type against the exact content type or the 125 simplified type (the simplified type will be used if comparing against 126 something that can be treated as a String). In comparisons, 127 this is done against the lowercase version of the MIME::Type. 128 """ 129 if hasattr(other, 'content_type'): 130 return cmp(self.content_type.lower(), other.content_type.lower()) 131 elif isinstance(other, basestring): 132 return cmp(self.simplified, self.simplify(str(other))) 133 else: 134 return cmp(self.content_type.lower(), other.lower()) 135 136 def __lt__(self, other): 137 if hasattr(other, 'content_type'): 138 return cmp(self.content_type.lower(), other.content_type.lower()) < 0 139 elif isinstance(other, basestring): 140 return cmp(self.simplified, self.simplify(str(other))) < 0 141 else: 142 return cmp(self.content_type.lower(), other.lower()) < 0 143 144 def __gt__(self, other): 145 if hasattr(other, 'content_type'): 146 return cmp(self.content_type.lower(), other.content_type.lower()) > 0 147 elif isinstance(other, basestring): 148 return cmp(self.simplified, self.simplify(str(other))) > 0 149 else: 150 return cmp(self.content_type.lower(), other.lower()) > 0 151 152 def __eq__(self, other): 153 """ 154 Returns +true+ if the other object is a MIME::Type and the content 155 types match. 156 """ 157 return isinstance(other, self.__class__) and cmp(self, other) == 0 158 159 def is_like(self, other): 160 # Returns +true+ if the simplified type matches the current 161 if hasattr(other, 'simplified'): 162 return self.simplified == other.simplified 163 else: 164 return self.simplified == self.simplify(other) 165 166 def priority_compare(self, other): 167 """ 168 Compares the MIME::Type based on how reliable it is before doing a 169 normal <=> comparison. Used by MIME::Types#[] to sort types. The 170 comparisons involved are: 171 1. self.simplified <=> other.simplified (ensures that we 172 don't try to compare different types) 173 2. IANA-registered definitions < other definitions. 174 3. Generic definitions < platform definitions. 175 3. Complete definitions < incomplete definitions. 176 4. Current definitions < obsolete definitions. 177 5. Obselete with use-instead references < obsolete without. 178 6. Obsolete use-instead definitions are compared. 179 """ 180 pc = cmp(self.simplified, other.simplified) 181 if pc is 0: 182 if self.is_registered != other.is_registered: 183 # registered < unregistered 184 pc = -1 if self.is_registered else 1 185 elif self.platform != other.platform: 186 # generic < platform 187 pc = 1 if self.platform else -1 188 elif self.is_complete != other.is_complete: 189 # complete < incomplete 190 pc = -1 if self.is_complete else 1 191 elif self.is_obsolete != other.is_obsolete: 192 # current < obsolete 193 pc = 1 if self.is_obsolete else -1 194 if pc is 0 and self.is_obsolete and (self.use_instead != other.use_instead): 195 if self.use_instead is None: 196 pc = -1 197 elif other.use_instead is None: 198 pc = 1 199 else: 200 pc = cmp(self.use_instead, other.use_instead) 201 return pc 202 203 @property 204 def extensions(self): 205 return self._extensions 206 207 @extensions.setter 208 def extensions(self, value): 209 self._extensions = [] if value is None else flatten(value) 210 211 @property 212 def default_encoding(self): 213 return self.media_type == 'text' and 'quoted-printable' or 'base64' 214 215 @property 216 def use_instead(self): 217 if not self.is_obsolete: 218 return None 219 return self._use_instead 220 221 @property 222 def is_registered(self): 223 if UNREG_RE.match(self.raw_media_type) or UNREG_RE.match(self.raw_sub_type): 224 return False 225 return self.registered 226 227 @property 228 def docs(self): 229 return self._docs 230 231 @docs.setter 232 def docs(self, d): 233 if d: 234 rs = re.compile('use-instead:([-\w.+]+)\/([-\w.+]*)').findall(d) 235 if rs: 236 self._use_instead = map(lambda e: "%s/%s" % e, rs) 237 else: 238 self._use_instead = None 239 self._docs = d 240 241 @property 242 def urls(self): 243 """ 244 The decoded URL list for this MIME::Type. 245 The special URL value IANA will be translated into: 246 http://www.iana.org/assignments/media-types/<mediatype>/<subtype> 247 The special URL value RFC### will be translated into: 248 http://www.rfc-editor.org/rfc/rfc###.txt 249 The special URL value DRAFT:name will be 250 translated into: 251 https://datatracker.ietf.org/public/idindex.cgi? 252 command=id_detail&filename=<name> 253 The special URL value 254 LTSW will be translated 255 into: 256 http://www.ltsw.se/knbase/internet/<mediatype>.htp 257 The special 258 URL value 259 [token] will 260 be translated 261 into: 262 http://www.iana.org/assignments/contact-people.htm#<token> 263 These values will be accessible through #urls, which always returns an array. 264 """ 265 def _url(el): 266 if el == 'IANA': 267 return IANA_URL % (self.media_type, self.sub_type) 268 elif el == 'LTSW': 269 return LTSW_URL % self.media_type 270 match = re.compile('^\{([^=]+)=([^\}]+)\}').match(el) 271 if match: 272 return match.group(1, 2) 273 match = re.compile('^\[([^=]+)=([^\]]+)\]').match(el) 274 if match: 275 return [match.group(1), CONTACT_URL % match.group(2)] 276 for regex in REGEX_URLS: 277 match = re.compile(regex).match(el) 278 if match: 279 return REGEX_URLS[regex] % match.group(1) 280 return el 281 return map(_url, self.url) 282 283 @property 284 def encoding(self): 285 enc = self._encoding 286 if enc is None or enc == 'default': 287 return self.default_encoding 288 return self._encoding 289 290 @encoding.setter 291 def encoding(self, enc): 292 if isinstance(enc, basestring) and enc.startswith(':'): 293 enc = enc.replace(':', '') 294 295 if enc is None or enc == 'default': 296 self._encoding = self.default_encoding 297 elif ENCODING_RE.match(enc): 298 self._encoding = enc 299 else: 300 raise TypeError('The encoding must be None, default, ' 301 'base64, 7bit, 8bit, or quoted-printable.') 302 303 @property 304 def system(self): 305 return self._system 306 307 @system.setter 308 def system(self, os): 309 if os is None or hasattr(os, 'match'): 310 self._system = os 311 else: 312 self._system = re.compile(os) 313 314 @property 315 def is_binary(self): 316 # MIME types can be specified to be sent across a network in 317 # particular 318 # formats. This method returns +true+ when the MIME type 319 # encoding is set 320 # to <tt>base64</tt>. 321 return self.encoding == 'base64' 322 323 @property 324 def is_ascii(self): 325 # Returns +true+ when the simplified MIME type is in the list of known 326 # digital signatures. 327 return not self.is_binary 328 329 @property 330 def is_signature(self): 331 # Returns +true+ when the simplified MIME type is in the list of 332 # known digital signatures. 333 return self.simplified.lower() in SIGNATURES 334 335 @property 336 def is_system(self): 337 # Returns +true+ if the MIME::Type is specific to an operating system. 338 return self.system is not None 339 340 @property 341 def is_platform(self): 342 # Returns +true+ if the MIME::Type is specific to the current operating 343 # system as represented by RUBY_PLATFORM. 344 return self.is_system and self.system.match(PLATFORM) 345 346 @property 347 def is_complete(self): 348 # Returns +true+ if the MIME::Type specifies an extension list, 349 # indicating that it is a complete MIME::Type. 350 return bool(self.extensions) 351 352 @property 353 def to_s(self): 354 # Returns the MIME type as a string. 355 return self.content_type 356 357 @property 358 def to_str(self): 359 # Returns the MIME type as a string for implicit conversions. 360 return self.content_type 361 362 @property 363 def to_a(self): 364 # Returns the MIME type as an array suitable for use with 365 # MIME::Type.from_array. 366 return [self.content_type, self.extensions, self.encoding, self.system, 367 self.is_obsolete, self.docs, self.url, self.is_registered] 368 369 @property 370 def to_hash(self): 371 # Returns the MIME type as an array suitable for use with 372 # MIME::Type.from_hash. 373 return {'Content-Type': self.content_type, 374 'Content-Transfer-Encoding': self.encoding, 375 'Extensions': self.extensions, 376 'System': self.system, 377 'Obsolete': self.is_obsolete, 378 'Docs': self.docs, 379 'URL': self.url, 380 'Registered': self.is_registered} 381 382 @classmethod 383 def simplify(cls, content_type): 384 """ 385 The MIME types main- and sub-label can both start with <tt>x-</tt>, 386 which indicates that it is a non-registered name. Of course, after 387 registration this flag can disappear, adds to the confusing 388 proliferation of MIME types. The simplified string has the 389 <tt>x-</tt> removed and are translated to lowercase. 390 """ 391 matchdata = MEDIA_TYPE_RE.match(content_type) 392 if matchdata is None: 393 return None 394 wrap = lambda s: re.sub(UNREG_RE, '', s.lower()) 395 (media_type, subtype) = matchdata.groups() 396 return '%s/%s' % (wrap(media_type), wrap(subtype)) 397 398 @classmethod 399 def from_array(cls, content_type, 400 extensions=[], encoding=None, system=None, 401 is_obsolete=False, docs=None, url=None, is_registered=False): 402 """ 403 Creates a MIME::Type from an array in the form of: 404 [type-name, [extensions], encoding, system] 405 +extensions+, +encoding+, and +system+ are optional. 406 Type.from_array("application/x-ruby", ['rb'], '8bit') 407 # Type.from_array(["application/x-ruby", ['rb'], '8bit']) 408 These are equivalent to: 409 type = Type('application/x-ruby') 410 type.extensions = ['rb'] 411 type.encoding = '8bit' 412 """ 413 mt = cls(content_type) 414 mt.extensions = extensions 415 mt.encoding = encoding 416 mt.system = system 417 mt.is_obsolete = is_obsolete 418 mt.docs = docs 419 mt.url = url 420 mt.registered = is_registered 421 return mt 422 423 @classmethod 424 def from_hash(cls, hash): 425 """ 426 Creates a MIME::Type from a hash. Keys are case-insensitive, 427 dashes may be replaced with underscores, and the internal 428 Symbol of the lowercase-underscore version can be used as 429 well. That is, Content-Type can be provided as content-type, 430 Content_Type, content_type, or :content_type. 431 Known keys are <tt>Content-Type</tt>, 432 <tt>Content-Transfer-Encoding</tt>, <tt>Extensions</tt>, and 433 <tt>System</tt>. 434 Type.from_hash({'Content-Type': 'text/x-yaml', 435 'Content-Transfer-Encoding': '8bit', 436 'System': 'linux', 437 'Extensions': ['yaml', 'yml']}) 438 This is equivalent to: 439 t = Type.new('text/x-yaml') 440 t.encoding = '8bit' 441 t.system = 'linux' 442 t.extensions = ['yaml', 'yml'] 443 """ 444 wrap_key = lambda k: k.lower().replace('-', '_') 445 type_hash = dict([(wrap_key(k), v) for k, v in hash.items()]) 446 mt = cls(type_hash['content_type']) 447 mt.extensions = type_hash.get('extensions', []) 448 mt.encoding = type_hash.get('encoding', 'default') 449 mt.system = type_hash.get('system') 450 mt.is_obsolete = type_hash.get('is_obsolete', False) 451 mt.docs = type_hash.get('docs') 452 mt.url = type_hash.get('url') 453 mt.registered = type_hash.get('is_registered', False) 454 return mt 455 456 @classmethod 457 def from_mime_type(cls, mime_type): 458 """ 459 Essentially a copy constructor. 460 Type.from_mime_type(plaintext) 461 is equivalent to: 462 t = Type.new(plaintext.content_type.dup) 463 t.extensions = plaintext.extensions.dup 464 t.system = plaintext.system.dup 465 t.encoding = plaintext.encoding.dup 466 """ 467 mt = cls(deepcopy(mime_type.content_type)) 468 mt.extensions = map(deepcopy, mime_type.extensions) 469 mt.url = mime_type.url and map(deepcopy, mime_type.url) or None 470 mt.system = deepcopy(mime_type.system) 471 mt.encoding = deepcopy(mime_type.encoding) 472 mt.docs = deepcopy(mime_type.docs) 473 474 mt.is_obsolete = mime_type.is_obsolete 475 mt.registered = mime_type.is_registered 476 return mt 477 478 479class ItemMeta(type): 480 def __getitem__(cls, type_id): 481 if isinstance(type_id, Type): 482 return cls.type_variants.get(type_id.simplified) 483 elif isinstance(type_id, re._pattern_type): 484 return cls.match(type_id) 485 else: 486 return cls.type_variants.get(Type.simplify(type_id)) 487 488 489class Types(with_metaclass(ItemMeta, object)): 490 """ 491 = MIME::Types 492 MIME types are used in MIME-compliant communications, as in e-mail or 493 HTTP traffic, to indicate the type of content which is transmitted. 494 MIME::Types provides the ability for detailed information about MIME 495 entities (provided as a set of MIME::Type objects) to be determined and 496 used programmatically. There are many types defined by RFCs and vendors, 497 so the list is long but not complete; don't hesitate to ask to add 498 additional information. This library follows the IANA collection of MIME 499 types (see below for reference). 500 == Description 501 MIME types are used in MIME entities, as in email or HTTP traffic. It is 502 useful at times to have information available about MIME types (or, 503 inversely, about files). A MIME::Type stores the known information about 504 one MIME type. 505 == Usage 506 from mime import Type, Types 507 plaintext = Types['text/plain'] 508 text = plaintext[0] 509 print text.media_type # => 'text' 510 print text.sub_type # => 'plain' 511 print " ".join(text.extensions) # => 'asc txt c cc h hh cpp' 512 print text.encoding # => 8bit 513 print text.is_binary # => False 514 print text.is_ascii # => True 515 print text.is_obsolete # => False 516 print text.is_registered # => True 517 print text == 'text/plain' # => True 518 print Type.simplify('x-appl/x-zip') # => 'appl/zip' 519 == About 520 This module is built to conform to the MIME types of RFCs 2045 and 2231. 521 It follows the official IANA registry at 522 http://www.iana.org/assignments/media-types/ and 523 ftp://ftp.iana.org/assignments/media-types with some unofficial types 524 added from the the collection at 525 http://www.ltsw.se/knbase/internet/mime.htp 526 This is originally based on Perl MIME::Types by Mark Overmeer. 527 This is Python clone of https://github.com/halostatue/mime-types 528 See Also: 529 http://www.iana.org/assignments/media-types/ 530 http://www.ltsw.se/knbase/internet/mime.htp 531 """ 532 533 type_variants = defaultdict(list) 534 extension_index = defaultdict(list) 535 536 __metaclass__ = ItemMeta 537 538 def __init__(self, data_version=None): 539 self.data_version = data_version 540 541 def __repr__(self): 542 return '<MIME::Types version:%s>' % self.data_version 543 544 @classmethod 545 def m(cls, type_id, flags={}): 546 return cls.prune_matches(cls[type_id], flags) 547 548 @classmethod 549 def match(cls, regex): 550 return flatten([v for k, v in iteritems(cls.type_variants) 551 if regex.search(k)]) 552 553 @classmethod 554 def prune_matches(cls, matches, flags): 555 if flags.get('complete'): 556 matches = filter(lambda e: e.is_complete, matches) 557 if flags.get('platform'): 558 matches = filter(lambda e: e.is_platform, matches) 559 return list(matches) 560 561 @classmethod 562 def add_type_variant(cls, mime_type): 563 cls.type_variants[mime_type.simplified].append(mime_type) 564 565 @classmethod 566 def index_extensions(cls, mime_type): 567 for ext in mime_type.extensions: 568 cls.extension_index[ext].append(mime_type) 569 570 @classmethod 571 def any(cls, block): 572 for mt in flatten(list(itervalues(cls.extension_index))): 573 if block(mt): 574 return True 575 576 @classmethod 577 def all(cls, block): 578 return all([block(mt) for mt in flatten(cls.extension_index.values())]) 579 580 @classmethod 581 def defined_types(cls): 582 return chain(*cls.type_variants.values()) 583 584 @classmethod 585 def count(cls): 586 return len(list(cls.defined_types())) 587 588 @classmethod 589 def each(cls, block): 590 return map(block, cls.defined_types()) 591 592 @classmethod 593 def type_for(cls, filename, platform=False): 594 ext = filename.split('.')[-1].lower() 595 type_list = cls.extension_index.get(ext, []) 596 if platform: 597 type_list = filter(lambda t: t.is_platform, type_list) 598 return list(type_list) 599 600 of = type_for 601 602 @classmethod 603 def add(cls, *types): 604 for mime_type in types: 605 if isinstance(mime_type, Types): 606 cls.add(*mime_type.defined_types()) 607 else: 608 mts = cls.type_variants.get(mime_type.simplified) 609 if mts and mime_type in mts: 610 Warning('Type %s already registered as a variant of %s.', 611 mime_type, mime_type.simplified) 612 cls.add_type_variant(mime_type) 613 cls.index_extensions(mime_type) 614