1import functools
2import inspect
3import re
4import unicodedata
5
6from anyascii import anyascii
7from django.apps import apps
8from django.conf import settings
9from django.conf.locale import LANG_INFO
10from django.core.exceptions import ImproperlyConfigured
11from django.core.signals import setting_changed
12from django.db.models import Model
13from django.dispatch import receiver
14from django.utils.encoding import force_str
15from django.utils.text import slugify
16from django.utils.translation import check_for_language, get_supported_language_variant
17
18
19WAGTAIL_APPEND_SLASH = getattr(settings, 'WAGTAIL_APPEND_SLASH', True)
20
21
22def camelcase_to_underscore(str):
23    # https://djangosnippets.org/snippets/585/
24    return re.sub('(((?<=[a-z])[A-Z])|([A-Z](?![A-Z]|$)))', '_\\1', str).lower().strip('_')
25
26
27def string_to_ascii(value):
28    """
29    Convert a string to ascii.
30    """
31
32    return str(anyascii(value))
33
34
35def get_model_string(model):
36    """
37    Returns a string that can be used to identify the specified model.
38
39    The format is: `app_label.ModelName`
40
41    This an be reversed with the `resolve_model_string` function
42    """
43    return model._meta.app_label + '.' + model.__name__
44
45
46def resolve_model_string(model_string, default_app=None):
47    """
48    Resolve an 'app_label.model_name' string into an actual model class.
49    If a model class is passed in, just return that.
50
51    Raises a LookupError if a model can not be found, or ValueError if passed
52    something that is neither a model or a string.
53    """
54    if isinstance(model_string, str):
55        try:
56            app_label, model_name = model_string.split(".")
57        except ValueError:
58            if default_app is not None:
59                # If we can't split, assume a model in current app
60                app_label = default_app
61                model_name = model_string
62            else:
63                raise ValueError("Can not resolve {0!r} into a model. Model names "
64                                 "should be in the form app_label.model_name".format(
65                                     model_string), model_string)
66
67        return apps.get_model(app_label, model_name)
68
69    elif isinstance(model_string, type) and issubclass(model_string, Model):
70        return model_string
71
72    else:
73        raise ValueError("Can not resolve {0!r} into a model".format(model_string), model_string)
74
75
76SCRIPT_RE = re.compile(r'<(-*)/script>')
77
78
79def escape_script(text):
80    """
81    Escape `</script>` tags in 'text' so that it can be placed within a `<script>` block without
82    accidentally closing it. A '-' character will be inserted for each time it is escaped:
83    `<-/script>`, `<--/script>` etc.
84    """
85    return SCRIPT_RE.sub(r'<-\1/script>', text)
86
87
88SLUGIFY_RE = re.compile(r'[^\w\s-]', re.UNICODE)
89
90
91def cautious_slugify(value):
92    """
93    Convert a string to ASCII exactly as Django's slugify does, with the exception
94    that any non-ASCII alphanumeric characters (that cannot be ASCIIfied under Unicode
95    normalisation) are escaped into codes like 'u0421' instead of being deleted entirely.
96
97    This ensures that the result of slugifying e.g. Cyrillic text will not be an empty
98    string, and can thus be safely used as an identifier (albeit not a human-readable one).
99    """
100    value = force_str(value)
101
102    # Normalize the string to decomposed unicode form. This causes accented Latin
103    # characters to be split into 'base character' + 'accent modifier'; the latter will
104    # be stripped out by the regexp, resulting in an ASCII-clean character that doesn't
105    # need to be escaped
106    value = unicodedata.normalize('NFKD', value)
107
108    # Strip out characters that aren't letterlike, underscores or hyphens,
109    # using the same regexp that slugify uses. This ensures that non-ASCII non-letters
110    # (e.g. accent modifiers, fancy punctuation) get stripped rather than escaped
111    value = SLUGIFY_RE.sub('', value)
112
113    # Encode as ASCII, escaping non-ASCII characters with backslashreplace, then convert
114    # back to a unicode string (which is what slugify expects)
115    value = value.encode('ascii', 'backslashreplace').decode('ascii')
116
117    # Pass to slugify to perform final conversion (whitespace stripping, applying
118    # mark_safe); this will also strip out the backslashes from the 'backslashreplace'
119    # conversion
120    return slugify(value)
121
122
123def safe_snake_case(value):
124    """
125    Convert a string to ASCII similar to Django's slugify, with catious handling of
126    non-ASCII alphanumeric characters. See `cautious_slugify`.
127
128    Any inner whitespace, hyphens or dashes will be converted to underscores and
129    will be safe for Django template or filename usage.
130    """
131
132    slugified_ascii_string = cautious_slugify(value)
133
134    snake_case_string = slugified_ascii_string.replace("-", "_")
135
136    return snake_case_string
137
138
139def accepts_kwarg(func, kwarg):
140    """
141    Determine whether the callable `func` has a signature that accepts the keyword argument `kwarg`
142    """
143    signature = inspect.signature(func)
144    try:
145        signature.bind_partial(**{kwarg: None})
146        return True
147    except TypeError:
148        return False
149
150
151class InvokeViaAttributeShortcut:
152    """
153    Used to create a shortcut that allows an object's named
154    single-argument method to be invoked using a simple
155    attribute reference syntax. For example, adding the
156    following to an object:
157
158    obj.page_url = InvokeViaAttributeShortcut(obj, 'get_page_url')
159
160    Would allow you to invoke get_page_url() like so:
161
162    obj.page_url.terms_and_conditions
163
164    As well as the usual:
165
166    obj.get_page_url('terms_and_conditions')
167    """
168
169    __slots__ = 'obj', 'method_name'
170
171    def __init__(self, obj, method_name):
172        self.obj = obj
173        self.method_name = method_name
174
175    def __getattr__(self, name):
176        method = getattr(self.obj, self.method_name)
177        return method(name)
178
179
180def find_available_slug(parent, requested_slug, ignore_page_id=None):
181    """
182    Finds an available slug within the specified parent.
183
184    If the requested slug is not available, this adds a number on the end, for example:
185
186     - 'requested-slug'
187     - 'requested-slug-1'
188     - 'requested-slug-2'
189
190    And so on, until an available slug is found.
191
192    The `ignore_page_id` keyword argument is useful for when you are updating a page,
193    you can pass the page being updated here so the page's current slug is not
194    treated as in use by another page.
195    """
196    pages = parent.get_children().filter(slug__startswith=requested_slug)
197
198    if ignore_page_id:
199        pages = pages.exclude(id=ignore_page_id)
200
201    existing_slugs = set(pages.values_list("slug", flat=True))
202    slug = requested_slug
203    number = 1
204
205    while slug in existing_slugs:
206        slug = requested_slug + "-" + str(number)
207        number += 1
208
209    return slug
210
211
212@functools.lru_cache()
213def get_content_languages():
214    """
215    Cache of settings.WAGTAIL_CONTENT_LANGUAGES in a dictionary for easy lookups by key.
216    """
217    content_languages = getattr(settings, 'WAGTAIL_CONTENT_LANGUAGES', None)
218    languages = dict(settings.LANGUAGES)
219
220    if content_languages is None:
221        # Default to a single language based on LANGUAGE_CODE
222        default_language_code = get_supported_language_variant(settings.LANGUAGE_CODE)
223        try:
224            language_name = languages[default_language_code]
225        except KeyError:
226            # get_supported_language_variant on the 'null' translation backend (used for
227            # USE_I18N=False) returns settings.LANGUAGE_CODE unchanged without accounting for
228            # language variants (en-us versus en), so retry with the generic version.
229            default_language_code = default_language_code.split("-")[0]
230            try:
231                language_name = languages[default_language_code]
232            except KeyError:
233                # Can't extract a display name, so fall back on displaying LANGUAGE_CODE instead
234                language_name = settings.LANGUAGE_CODE
235                # Also need to tweak the languages dict to get around the check below
236                languages[default_language_code] = settings.LANGUAGE_CODE
237
238        content_languages = [
239            (default_language_code, language_name),
240        ]
241
242    # Check that each content language is in LANGUAGES
243    for language_code, name in content_languages:
244        if language_code not in languages:
245            raise ImproperlyConfigured(
246                "The language {} is specified in WAGTAIL_CONTENT_LANGUAGES but not LANGUAGES. "
247                "WAGTAIL_CONTENT_LANGUAGES must be a subset of LANGUAGES.".format(language_code)
248            )
249
250    return dict(content_languages)
251
252
253@functools.lru_cache(maxsize=1000)
254def get_supported_content_language_variant(lang_code, strict=False):
255    """
256    Return the language code that's listed in supported languages, possibly
257    selecting a more generic variant. Raise LookupError if nothing is found.
258    If `strict` is False (the default), look for a country-specific variant
259    when neither the language code nor its generic variant is found.
260    lru_cache should have a maxsize to prevent from memory exhaustion attacks,
261    as the provided language codes are taken from the HTTP request. See also
262    <https://www.djangoproject.com/weblog/2007/oct/26/security-fix/>.
263
264    This is equvilant to Django's `django.utils.translation.get_supported_content_language_variant`
265    but reads the `WAGTAIL_CONTENT_LANGUAGES` setting instead.
266    """
267    if lang_code:
268        # If 'fr-ca' is not supported, try special fallback or language-only 'fr'.
269        possible_lang_codes = [lang_code]
270        try:
271            possible_lang_codes.extend(LANG_INFO[lang_code]["fallback"])
272        except KeyError:
273            pass
274        generic_lang_code = lang_code.split("-")[0]
275        possible_lang_codes.append(generic_lang_code)
276        supported_lang_codes = get_content_languages()
277
278        for code in possible_lang_codes:
279            if code in supported_lang_codes and check_for_language(code):
280                return code
281        if not strict:
282            # if fr-fr is not supported, try fr-ca.
283            for supported_code in supported_lang_codes:
284                if supported_code.startswith(generic_lang_code + "-"):
285                    return supported_code
286    raise LookupError(lang_code)
287
288
289@receiver(setting_changed)
290def reset_cache(**kwargs):
291    """
292    Clear cache when global WAGTAIL_CONTENT_LANGUAGES/LANGUAGES/LANGUAGE_CODE settings are changed
293    """
294    if kwargs["setting"] in ("WAGTAIL_CONTENT_LANGUAGES", "LANGUAGES", "LANGUAGE_CODE"):
295        get_content_languages.cache_clear()
296        get_supported_content_language_variant.cache_clear()
297