1""" 2Tagging utilities - from user tag input parsing to tag cloud 3calculation. 4""" 5import math 6 7from django.db.models.query import QuerySet 8from django.utils.encoding import force_str 9from django.utils.translation import gettext as _ 10 11# Font size distribution algorithms 12LOGARITHMIC, LINEAR = 1, 2 13 14 15def parse_tag_input(input): 16 """ 17 Parses tag input, with multiple word input being activated and 18 delineated by commas and double quotes. Quotes take precedence, so 19 they may contain commas. 20 21 Returns a sorted list of unique tag names. 22 """ 23 if not input: 24 return [] 25 26 input = force_str(input) 27 28 # Special case - if there are no commas or double quotes in the 29 # input, we don't *do* a recall... I mean, we know we only need to 30 # split on spaces. 31 if ',' not in input and '"' not in input: 32 words = list(set(split_strip(input, ' '))) 33 words.sort() 34 return words 35 36 words = [] 37 buffer = [] 38 # Defer splitting of non-quoted sections until we know if there are 39 # any unquoted commas. 40 to_be_split = [] 41 saw_loose_comma = False 42 open_quote = False 43 i = iter(input) 44 try: 45 while 1: 46 c = next(i) 47 if c == '"': 48 if buffer: 49 to_be_split.append(''.join(buffer)) 50 buffer = [] 51 # Find the matching quote 52 open_quote = True 53 c = next(i) 54 while c != '"': 55 buffer.append(c) 56 c = next(i) 57 if buffer: 58 word = ''.join(buffer).strip() 59 if word: 60 words.append(word) 61 buffer = [] 62 open_quote = False 63 else: 64 if not saw_loose_comma and c == ',': 65 saw_loose_comma = True 66 buffer.append(c) 67 except StopIteration: 68 # If we were parsing an open quote which was never closed treat 69 # the buffer as unquoted. 70 if buffer: 71 if open_quote and ',' in buffer: 72 saw_loose_comma = True 73 to_be_split.append(''.join(buffer)) 74 if to_be_split: 75 if saw_loose_comma: 76 delimiter = ',' 77 else: 78 delimiter = ' ' 79 for chunk in to_be_split: 80 words.extend(split_strip(chunk, delimiter)) 81 words = list(set(words)) 82 words.sort() 83 return words 84 85 86def split_strip(input, delimiter=','): 87 """ 88 Splits ``input`` on ``delimiter``, stripping each resulting string 89 and returning a list of non-empty strings. 90 """ 91 words = [w.strip() for w in input.split(delimiter)] 92 return [w for w in words if w] 93 94 95def edit_string_for_tags(tags): 96 """ 97 Given list of ``Tag`` instances, creates a string representation of 98 the list suitable for editing by the user, such that submitting the 99 given string representation back without changing it will give the 100 same list of tags. 101 102 Tag names which contain commas will be double quoted. 103 104 If any tag name which isn't being quoted contains whitespace, the 105 resulting string of tag names will be comma-delimited, otherwise 106 it will be space-delimited. 107 """ 108 names = [] 109 use_commas = False 110 for tag in tags: 111 name = tag.name 112 if ',' in name: 113 names.append('"%s"' % name) 114 continue 115 elif ' ' in name: 116 if not use_commas: 117 use_commas = True 118 names.append(name) 119 if use_commas: 120 glue = ', ' 121 else: 122 glue = ' ' 123 result = glue.join(names) 124 125 # If we only had one name, and it had spaces, 126 # we need to enclose it in quotes. 127 # Otherwise, it's interpreted as two tags. 128 if len(names) == 1 and use_commas: 129 result = '"' + result + '"' 130 131 return result 132 133 134def get_queryset_and_model(queryset_or_model): 135 """ 136 Given a ``QuerySet`` or a ``Model``, returns a two-tuple of 137 (queryset, model). 138 139 If a ``Model`` is given, the ``QuerySet`` returned will be created 140 using its default manager. 141 """ 142 try: 143 return queryset_or_model, queryset_or_model.model 144 except AttributeError: 145 return queryset_or_model._default_manager.all(), queryset_or_model 146 147 148def get_tag_list(tags): 149 """ 150 Utility function for accepting tag input in a flexible manner. 151 152 If a ``Tag`` object is given, it will be returned in a list as 153 its single occupant. 154 155 If given, the tag names in the following will be used to create a 156 ``Tag`` ``QuerySet``: 157 158 * A string, which may contain multiple tag names. 159 * A list or tuple of strings corresponding to tag names. 160 * A list or tuple of integers corresponding to tag ids. 161 162 If given, the following will be returned as-is: 163 164 * A list or tuple of ``Tag`` objects. 165 * A ``Tag`` ``QuerySet``. 166 167 """ 168 from tagging.models import Tag 169 if isinstance(tags, Tag): 170 return [tags] 171 elif isinstance(tags, QuerySet) and tags.model is Tag: 172 return tags 173 elif isinstance(tags, str): 174 return Tag.objects.filter(name__in=parse_tag_input(tags)) 175 elif isinstance(tags, (list, tuple)): 176 if len(tags) == 0: 177 return tags 178 contents = set() 179 for item in tags: 180 if isinstance(item, str): 181 contents.add('string') 182 elif isinstance(item, Tag): 183 contents.add('tag') 184 elif isinstance(item, int): 185 contents.add('int') 186 if len(contents) == 1: 187 if 'string' in contents: 188 return Tag.objects.filter(name__in=[force_str(tag) 189 for tag in tags]) 190 elif 'tag' in contents: 191 return tags 192 elif 'int' in contents: 193 return Tag.objects.filter(id__in=tags) 194 else: 195 raise ValueError( 196 _('If a list or tuple of tags is provided, ' 197 'they must all be tag names, Tag objects or Tag ids.')) 198 else: 199 raise ValueError(_('The tag input given was invalid.')) 200 201 202def get_tag(tag): 203 """ 204 Utility function for accepting single tag input in a flexible 205 manner. 206 207 If a ``Tag`` object is given it will be returned as-is; if a 208 string or integer are given, they will be used to lookup the 209 appropriate ``Tag``. 210 211 If no matching tag can be found, ``None`` will be returned. 212 """ 213 from tagging.models import Tag 214 if isinstance(tag, Tag): 215 return tag 216 217 try: 218 if isinstance(tag, str): 219 return Tag.objects.get(name=tag) 220 elif isinstance(tag, int): 221 return Tag.objects.get(id=tag) 222 except Tag.DoesNotExist: 223 pass 224 225 return None 226 227 228def _calculate_thresholds(min_weight, max_weight, steps): 229 delta = (max_weight - min_weight) / float(steps) 230 return [min_weight + i * delta for i in range(1, steps + 1)] 231 232 233def _calculate_tag_weight(weight, max_weight, distribution): 234 """ 235 Logarithmic tag weight calculation is based on code from the 236 *Tag Cloud* plugin for Mephisto, by Sven Fuchs. 237 238 http://www.artweb-design.de/projects/mephisto-plugin-tag-cloud 239 """ 240 if distribution == LINEAR or max_weight == 1: 241 return weight 242 elif distribution == LOGARITHMIC: 243 return min( 244 math.log(weight) * max_weight / math.log(max_weight), 245 max_weight) 246 raise ValueError( 247 _('Invalid distribution algorithm specified: %s.') % distribution) 248 249 250def calculate_cloud(tags, steps=4, distribution=LOGARITHMIC): 251 """ 252 Add a ``font_size`` attribute to each tag according to the 253 frequency of its use, as indicated by its ``count`` 254 attribute. 255 256 ``steps`` defines the range of font sizes - ``font_size`` will 257 be an integer between 1 and ``steps`` (inclusive). 258 259 ``distribution`` defines the type of font size distribution 260 algorithm which will be used - logarithmic or linear. It must be 261 one of ``tagging.utils.LOGARITHMIC`` or ``tagging.utils.LINEAR``. 262 """ 263 if len(tags) > 0: 264 counts = [tag.count for tag in tags] 265 min_weight = float(min(counts)) 266 max_weight = float(max(counts)) 267 thresholds = _calculate_thresholds(min_weight, max_weight, steps) 268 for tag in tags: 269 font_set = False 270 tag_weight = _calculate_tag_weight( 271 tag.count, max_weight, distribution) 272 for i in range(steps): 273 if not font_set and tag_weight <= thresholds[i]: 274 tag.font_size = i + 1 275 font_set = True 276 return tags 277