1""" 2Transliterate the given text to the latin script. 3 4This attempts to convert a given text to latin script using the 5closest match of characters vis a vis the original script. 6 7Transliteration requires an extensive unicode mapping. Since all 8Python implementations are either GPL-licensed (and thus more 9restrictive than this library) or come with a massive C code 10dependency, this module requires neither but will use a package 11if it is installed. 12""" 13import warnings 14from typing import Optional 15 16from normality.cleaning import compose_nfkc, is_text 17 18# Transform to latin, separate accents, decompose, remove 19# symbols, compose, push to ASCII 20ASCII_SCRIPT = 'Any-Latin; NFKD; [:Symbol:] Remove; [:Nonspacing Mark:] Remove; NFKC; Accents-Any; Latin-ASCII' # noqa 21 22 23class ICUWarning(UnicodeWarning): 24 pass 25 26 27def latinize_text(text: Optional[str], ascii=False) -> Optional[str]: 28 """Transliterate the given text to the latin script. 29 30 This attempts to convert a given text to latin script using the 31 closest match of characters vis a vis the original script. 32 """ 33 if text is None or not is_text(text) or not len(text): 34 return text 35 36 if ascii: 37 if not hasattr(latinize_text, '_ascii'): 38 latinize_text._ascii = make_trans(ASCII_SCRIPT) # type: ignore 39 return latinize_text._ascii(text) # type: ignore 40 41 if not hasattr(latinize_text, '_tr'): 42 latinize_text._tr = make_trans('Any-Latin') # type: ignore 43 return latinize_text._tr(text) # type: ignore 44 45 46def ascii_text(text: Optional[str]) -> Optional[str]: 47 """Transliterate the given text and make sure it ends up as ASCII.""" 48 text = latinize_text(text, ascii=True) 49 if text is None or not is_text(text): 50 return None 51 return text.encode('ascii', 'ignore').decode('ascii') 52 53 54def make_trans(script): 55 try: 56 from icu import Transliterator # type: ignore 57 inst = Transliterator.createInstance(script) 58 return inst.transliterate 59 except ImportError: 60 from text_unidecode import unidecode # type: ignore 61 warnings.warn("Install 'pyicu' for better text transliteration.", ICUWarning, stacklevel=4) # noqa 62 63 def transliterate(text): 64 text = compose_nfkc(text) 65 return unidecode(text) 66 67 return transliterate 68