1"""
2Tagging utilities - from user tag input parsing to tag cloud
3calculation.
4"""
5import math
6
7from django.db.models.query import QuerySet
8from django.utils.encoding import force_str
9from django.utils.translation import gettext as _
10
11# Font size distribution algorithms
12LOGARITHMIC, LINEAR = 1, 2
13
14
15def parse_tag_input(input):
16    """
17    Parses tag input, with multiple word input being activated and
18    delineated by commas and double quotes. Quotes take precedence, so
19    they may contain commas.
20
21    Returns a sorted list of unique tag names.
22    """
23    if not input:
24        return []
25
26    input = force_str(input)
27
28    # Special case - if there are no commas or double quotes in the
29    # input, we don't *do* a recall... I mean, we know we only need to
30    # split on spaces.
31    if ',' not in input and '"' not in input:
32        words = list(set(split_strip(input, ' ')))
33        words.sort()
34        return words
35
36    words = []
37    buffer = []
38    # Defer splitting of non-quoted sections until we know if there are
39    # any unquoted commas.
40    to_be_split = []
41    saw_loose_comma = False
42    open_quote = False
43    i = iter(input)
44    try:
45        while 1:
46            c = next(i)
47            if c == '"':
48                if buffer:
49                    to_be_split.append(''.join(buffer))
50                    buffer = []
51                # Find the matching quote
52                open_quote = True
53                c = next(i)
54                while c != '"':
55                    buffer.append(c)
56                    c = next(i)
57                if buffer:
58                    word = ''.join(buffer).strip()
59                    if word:
60                        words.append(word)
61                    buffer = []
62                open_quote = False
63            else:
64                if not saw_loose_comma and c == ',':
65                    saw_loose_comma = True
66                buffer.append(c)
67    except StopIteration:
68        # If we were parsing an open quote which was never closed treat
69        # the buffer as unquoted.
70        if buffer:
71            if open_quote and ',' in buffer:
72                saw_loose_comma = True
73            to_be_split.append(''.join(buffer))
74    if to_be_split:
75        if saw_loose_comma:
76            delimiter = ','
77        else:
78            delimiter = ' '
79        for chunk in to_be_split:
80            words.extend(split_strip(chunk, delimiter))
81    words = list(set(words))
82    words.sort()
83    return words
84
85
86def split_strip(input, delimiter=','):
87    """
88    Splits ``input`` on ``delimiter``, stripping each resulting string
89    and returning a list of non-empty strings.
90    """
91    words = [w.strip() for w in input.split(delimiter)]
92    return [w for w in words if w]
93
94
95def edit_string_for_tags(tags):
96    """
97    Given list of ``Tag`` instances, creates a string representation of
98    the list suitable for editing by the user, such that submitting the
99    given string representation back without changing it will give the
100    same list of tags.
101
102    Tag names which contain commas will be double quoted.
103
104    If any tag name which isn't being quoted contains whitespace, the
105    resulting string of tag names will be comma-delimited, otherwise
106    it will be space-delimited.
107    """
108    names = []
109    use_commas = False
110    for tag in tags:
111        name = tag.name
112        if ',' in name:
113            names.append('"%s"' % name)
114            continue
115        elif ' ' in name:
116            if not use_commas:
117                use_commas = True
118        names.append(name)
119    if use_commas:
120        glue = ', '
121    else:
122        glue = ' '
123    result = glue.join(names)
124
125    # If we only had one name, and it had spaces,
126    # we need to enclose it in quotes.
127    # Otherwise, it's interpreted as two tags.
128    if len(names) == 1 and use_commas:
129        result = '"' + result + '"'
130
131    return result
132
133
134def get_queryset_and_model(queryset_or_model):
135    """
136    Given a ``QuerySet`` or a ``Model``, returns a two-tuple of
137    (queryset, model).
138
139    If a ``Model`` is given, the ``QuerySet`` returned will be created
140    using its default manager.
141    """
142    try:
143        return queryset_or_model, queryset_or_model.model
144    except AttributeError:
145        return queryset_or_model._default_manager.all(), queryset_or_model
146
147
148def get_tag_list(tags):
149    """
150    Utility function for accepting tag input in a flexible manner.
151
152    If a ``Tag`` object is given, it will be returned in a list as
153    its single occupant.
154
155    If given, the tag names in the following will be used to create a
156    ``Tag`` ``QuerySet``:
157
158       * A string, which may contain multiple tag names.
159       * A list or tuple of strings corresponding to tag names.
160       * A list or tuple of integers corresponding to tag ids.
161
162    If given, the following will be returned as-is:
163
164       * A list or tuple of ``Tag`` objects.
165       * A ``Tag`` ``QuerySet``.
166
167    """
168    from tagging.models import Tag
169    if isinstance(tags, Tag):
170        return [tags]
171    elif isinstance(tags, QuerySet) and tags.model is Tag:
172        return tags
173    elif isinstance(tags, str):
174        return Tag.objects.filter(name__in=parse_tag_input(tags))
175    elif isinstance(tags, (list, tuple)):
176        if len(tags) == 0:
177            return tags
178        contents = set()
179        for item in tags:
180            if isinstance(item, str):
181                contents.add('string')
182            elif isinstance(item, Tag):
183                contents.add('tag')
184            elif isinstance(item, int):
185                contents.add('int')
186        if len(contents) == 1:
187            if 'string' in contents:
188                return Tag.objects.filter(name__in=[force_str(tag)
189                                                    for tag in tags])
190            elif 'tag' in contents:
191                return tags
192            elif 'int' in contents:
193                return Tag.objects.filter(id__in=tags)
194        else:
195            raise ValueError(
196                _('If a list or tuple of tags is provided, '
197                  'they must all be tag names, Tag objects or Tag ids.'))
198    else:
199        raise ValueError(_('The tag input given was invalid.'))
200
201
202def get_tag(tag):
203    """
204    Utility function for accepting single tag input in a flexible
205    manner.
206
207    If a ``Tag`` object is given it will be returned as-is; if a
208    string or integer are given, they will be used to lookup the
209    appropriate ``Tag``.
210
211    If no matching tag can be found, ``None`` will be returned.
212    """
213    from tagging.models import Tag
214    if isinstance(tag, Tag):
215        return tag
216
217    try:
218        if isinstance(tag, str):
219            return Tag.objects.get(name=tag)
220        elif isinstance(tag, int):
221            return Tag.objects.get(id=tag)
222    except Tag.DoesNotExist:
223        pass
224
225    return None
226
227
228def _calculate_thresholds(min_weight, max_weight, steps):
229    delta = (max_weight - min_weight) / float(steps)
230    return [min_weight + i * delta for i in range(1, steps + 1)]
231
232
233def _calculate_tag_weight(weight, max_weight, distribution):
234    """
235    Logarithmic tag weight calculation is based on code from the
236    *Tag Cloud* plugin for Mephisto, by Sven Fuchs.
237
238    http://www.artweb-design.de/projects/mephisto-plugin-tag-cloud
239    """
240    if distribution == LINEAR or max_weight == 1:
241        return weight
242    elif distribution == LOGARITHMIC:
243        return min(
244            math.log(weight) * max_weight / math.log(max_weight),
245            max_weight)
246    raise ValueError(
247        _('Invalid distribution algorithm specified: %s.') % distribution)
248
249
250def calculate_cloud(tags, steps=4, distribution=LOGARITHMIC):
251    """
252    Add a ``font_size`` attribute to each tag according to the
253    frequency of its use, as indicated by its ``count``
254    attribute.
255
256    ``steps`` defines the range of font sizes - ``font_size`` will
257    be an integer between 1 and ``steps`` (inclusive).
258
259    ``distribution`` defines the type of font size distribution
260    algorithm which will be used - logarithmic or linear. It must be
261    one of ``tagging.utils.LOGARITHMIC`` or ``tagging.utils.LINEAR``.
262    """
263    if len(tags) > 0:
264        counts = [tag.count for tag in tags]
265        min_weight = float(min(counts))
266        max_weight = float(max(counts))
267        thresholds = _calculate_thresholds(min_weight, max_weight, steps)
268        for tag in tags:
269            font_set = False
270            tag_weight = _calculate_tag_weight(
271                tag.count, max_weight, distribution)
272            for i in range(steps):
273                if not font_set and tag_weight <= thresholds[i]:
274                    tag.font_size = i + 1
275                    font_set = True
276    return tags
277