1"""
2Transliterate the given text to the latin script.
3
4This attempts to convert a given text to latin script using the
5closest match of characters vis a vis the original script.
6
7Transliteration requires an extensive unicode mapping. Since all
8Python implementations are either GPL-licensed (and thus more
9restrictive than this library) or come with a massive C code
10dependency, this module requires neither but will use a package
11if it is installed.
12"""
13import warnings
14from typing import Optional
15
16from normality.cleaning import compose_nfkc, is_text
17
18# Transform to latin, separate accents, decompose, remove
19# symbols, compose, push to ASCII
20ASCII_SCRIPT = 'Any-Latin; NFKD; [:Symbol:] Remove; [:Nonspacing Mark:] Remove; NFKC; Accents-Any; Latin-ASCII'  # noqa
21
22
23class ICUWarning(UnicodeWarning):
24    pass
25
26
27def latinize_text(text: Optional[str], ascii=False) -> Optional[str]:
28    """Transliterate the given text to the latin script.
29
30    This attempts to convert a given text to latin script using the
31    closest match of characters vis a vis the original script.
32    """
33    if text is None or not is_text(text) or not len(text):
34        return text
35
36    if ascii:
37        if not hasattr(latinize_text, '_ascii'):
38            latinize_text._ascii = make_trans(ASCII_SCRIPT)  # type: ignore
39        return latinize_text._ascii(text)  # type: ignore
40
41    if not hasattr(latinize_text, '_tr'):
42        latinize_text._tr = make_trans('Any-Latin')  # type: ignore
43    return latinize_text._tr(text)  # type: ignore
44
45
46def ascii_text(text: Optional[str]) -> Optional[str]:
47    """Transliterate the given text and make sure it ends up as ASCII."""
48    text = latinize_text(text, ascii=True)
49    if text is None or not is_text(text):
50        return None
51    return text.encode('ascii', 'ignore').decode('ascii')
52
53
54def make_trans(script):
55    try:
56        from icu import Transliterator  # type: ignore
57        inst = Transliterator.createInstance(script)
58        return inst.transliterate
59    except ImportError:
60        from text_unidecode import unidecode  # type: ignore
61        warnings.warn("Install 'pyicu' for better text transliteration.", ICUWarning, stacklevel=4)  # noqa
62
63        def transliterate(text):
64            text = compose_nfkc(text)
65            return unidecode(text)
66
67        return transliterate
68