1#!/usr/bin/env python
2# Copyright 2014 Joe Gregorio
3#
4# Licensed under the MIT License
5
6"""MIME-Type Parser
7
8This module provides basic functions for handling mime-types. It can handle
9matching mime-types against a list of media-ranges. See section 14.1 of the
10HTTP specification [RFC 2616] for a complete explanation.
11
12   http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.1
13
14Contents:
15 - parse_mime_type():   Parses a mime-type into its component parts.
16 - parse_media_range(): Media-ranges are mime-types with wild-cards and a 'q'
17                          quality parameter.
18 - quality():           Determines the quality ('q') of a mime-type when
19                          compared against a list of media-ranges.
20 - quality_parsed():    Just like quality() except the second parameter must be
21                          pre-parsed.
22 - best_match():        Choose the mime-type with the highest quality ('q')
23                          from a list of candidates.
24"""
25from __future__ import absolute_import
26from functools import reduce
27import six
28
29__version__ = '0.1.3'
30
31__email__ = 'joe@bitworking.org'
32__license__ = 'MIT License'
33__credits__ = ''
34
35
36def parse_mime_type(mime_type):
37    """Parses a mime-type into its component parts.
38
39    Carves up a mime-type and returns a tuple of the (type, subtype, params)
40    where 'params' is a dictionary of all the parameters for the media range.
41    For example, the media range 'application/xhtml;q=0.5' would get parsed
42    into:
43
44       ('application', 'xhtml', {'q', '0.5'})
45       """
46    parts = mime_type.split(';')
47    params = dict([tuple([s.strip() for s in param.split('=', 1)])\
48            for param in parts[1:]
49                  ])
50    full_type = parts[0].strip()
51    # Java URLConnection class sends an Accept header that includes a
52    # single '*'. Turn it into a legal wildcard.
53    if full_type == '*':
54        full_type = '*/*'
55    (type, subtype) = full_type.split('/')
56
57    return (type.strip(), subtype.strip(), params)
58
59
60def parse_media_range(range):
61    """Parse a media-range into its component parts.
62
63    Carves up a media range and returns a tuple of the (type, subtype,
64    params) where 'params' is a dictionary of all the parameters for the media
65    range.  For example, the media range 'application/*;q=0.5' would get parsed
66    into:
67
68       ('application', '*', {'q', '0.5'})
69
70    In addition this function also guarantees that there is a value for 'q'
71    in the params dictionary, filling it in with a proper default if
72    necessary.
73    """
74    (type, subtype, params) = parse_mime_type(range)
75    if 'q' not in params or not params['q'] or \
76            not float(params['q']) or float(params['q']) > 1\
77            or float(params['q']) < 0:
78        params['q'] = '1'
79
80    return (type, subtype, params)
81
82
83def fitness_and_quality_parsed(mime_type, parsed_ranges):
84    """Find the best match for a mime-type amongst parsed media-ranges.
85
86    Find the best match for a given mime-type against a list of media_ranges
87    that have already been parsed by parse_media_range(). Returns a tuple of
88    the fitness value and the value of the 'q' quality parameter of the best
89    match, or (-1, 0) if no match was found. Just as for quality_parsed(),
90    'parsed_ranges' must be a list of parsed media ranges.
91    """
92    best_fitness = -1
93    best_fit_q = 0
94    (target_type, target_subtype, target_params) =\
95            parse_media_range(mime_type)
96    for (type, subtype, params) in parsed_ranges:
97        type_match = (type == target_type or\
98                      type == '*' or\
99                      target_type == '*')
100        subtype_match = (subtype == target_subtype or\
101                         subtype == '*' or\
102                         target_subtype == '*')
103        if type_match and subtype_match:
104            param_matches = reduce(lambda x, y: x + y, [1 for (key, value) in \
105                    six.iteritems(target_params) if key != 'q' and \
106                    key in params and value == params[key]], 0)
107            fitness = (type == target_type) and 100 or 0
108            fitness += (subtype == target_subtype) and 10 or 0
109            fitness += param_matches
110            if fitness > best_fitness:
111                best_fitness = fitness
112                best_fit_q = params['q']
113
114    return best_fitness, float(best_fit_q)
115
116
117def quality_parsed(mime_type, parsed_ranges):
118    """Find the best match for a mime-type amongst parsed media-ranges.
119
120    Find the best match for a given mime-type against a list of media_ranges
121    that have already been parsed by parse_media_range(). Returns the 'q'
122    quality parameter of the best match, 0 if no match was found. This function
123    bahaves the same as quality() except that 'parsed_ranges' must be a list of
124    parsed media ranges.
125    """
126
127    return fitness_and_quality_parsed(mime_type, parsed_ranges)[1]
128
129
130def quality(mime_type, ranges):
131    """Return the quality ('q') of a mime-type against a list of media-ranges.
132
133    Returns the quality 'q' of a mime-type when compared against the
134    media-ranges in ranges. For example:
135
136    >>> quality('text/html','text/*;q=0.3, text/html;q=0.7,
137                  text/html;level=1, text/html;level=2;q=0.4, */*;q=0.5')
138    0.7
139
140    """
141    parsed_ranges = [parse_media_range(r) for r in ranges.split(',')]
142
143    return quality_parsed(mime_type, parsed_ranges)
144
145
146def best_match(supported, header):
147    """Return mime-type with the highest quality ('q') from list of candidates.
148
149    Takes a list of supported mime-types and finds the best match for all the
150    media-ranges listed in header. The value of header must be a string that
151    conforms to the format of the HTTP Accept: header. The value of 'supported'
152    is a list of mime-types. The list of supported mime-types should be sorted
153    in order of increasing desirability, in case of a situation where there is
154    a tie.
155
156    >>> best_match(['application/xbel+xml', 'text/xml'],
157                   'text/*;q=0.5,*/*; q=0.1')
158    'text/xml'
159    """
160    split_header = _filter_blank(header.split(','))
161    parsed_header = [parse_media_range(r) for r in split_header]
162    weighted_matches = []
163    pos = 0
164    for mime_type in supported:
165        weighted_matches.append((fitness_and_quality_parsed(mime_type,
166                                 parsed_header), pos, mime_type))
167        pos += 1
168    weighted_matches.sort()
169
170    return weighted_matches[-1][0][1] and weighted_matches[-1][2] or ''
171
172
173def _filter_blank(i):
174    for s in i:
175        if s.strip():
176            yield s
177