1""" 2 Charamel: Truly Universal Encoding Detection in Python 3~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 4 5Licensed under Apache 2.0 6""" 7import encodings.aliases 8import enum 9 10 11@enum.unique 12class Encoding(str, enum.Enum): 13 """ 14 Python character encodings 15 """ 16 17 ASCII = 'ascii' 18 BIG_5 = 'big5' 19 BIG_5_HKSCS = 'big5hkscs' 20 CP_037 = 'cp037' 21 CP_273 = 'cp273' 22 CP_424 = 'cp424' 23 CP_437 = 'cp437' 24 CP_500 = 'cp500' 25 CP_720 = 'cp720' 26 CP_737 = 'cp737' 27 CP_775 = 'cp775' 28 CP_850 = 'cp850' 29 CP_852 = 'cp852' 30 CP_855 = 'cp855' 31 CP_856 = 'cp856' 32 CP_857 = 'cp857' 33 CP_858 = 'cp858' 34 CP_860 = 'cp860' 35 CP_861 = 'cp861' 36 CP_862 = 'cp862' 37 CP_863 = 'cp863' 38 CP_864 = 'cp864' 39 CP_865 = 'cp865' 40 CP_866 = 'cp866' 41 CP_869 = 'cp869' 42 CP_874 = 'cp874' 43 CP_875 = 'cp875' 44 CP_932 = 'cp932' 45 CP_949 = 'cp949' 46 CP_950 = 'cp950' 47 CP_1006 = 'cp1006' 48 CP_1026 = 'cp1026' 49 CP_1125 = 'cp1125' 50 CP_1140 = 'cp1140' 51 CP_1250 = 'cp1250' 52 CP_1251 = 'cp1251' 53 CP_1252 = 'cp1252' 54 CP_1253 = 'cp1253' 55 CP_1254 = 'cp1254' 56 CP_1255 = 'cp1255' 57 CP_1256 = 'cp1256' 58 CP_1257 = 'cp1257' 59 CP_1258 = 'cp1258' 60 EUC_JP = 'euc_jp' 61 EUC_JIS_2004 = 'euc_jis_2004' 62 EUC_JIS_X_0213 = 'euc_jisx0213' 63 EUC_KR = 'euc_kr' 64 GB_2312 = 'gb2312' 65 GB_K = 'gbk' 66 GB_18030 = 'gb18030' 67 HZ = 'hz' 68 ISO_2022_JP = 'iso2022_jp' 69 ISO_2022_JP_1 = 'iso2022_jp_1' 70 ISO_2022_JP_2 = 'iso2022_jp_2' 71 ISO_2022_JP_2004 = 'iso2022_jp_2004' 72 ISO_2022_JP_3 = 'iso2022_jp_3' 73 ISO_2022_JP_EXT = 'iso2022_jp_ext' 74 ISO_2022_KR = 'iso2022_kr' 75 LATIN_1 = 'latin_1' 76 ISO_8859_2 = 'iso8859_2' 77 ISO_8859_3 = 'iso8859_3' 78 ISO_8859_4 = 'iso8859_4' 79 ISO_8859_5 = 'iso8859_5' 80 ISO_8859_6 = 'iso8859_6' 81 ISO_8859_7 = 'iso8859_7' 82 ISO_8859_8 = 'iso8859_8' 83 ISO_8859_9 = 'iso8859_9' 84 ISO_8859_10 = 'iso8859_10' 85 ISO_8859_11 = 'iso8859_11' 86 ISO_8859_13 = 'iso8859_13' 87 ISO_8859_14 = 'iso8859_14' 88 ISO_8859_15 = 'iso8859_15' 89 ISO_8859_16 = 'iso8859_16' 90 JOHAB = 'johab' 91 KOI_8_R = 'koi8_r' 92 KOI_8_T = 'koi8_t' 93 KOI_8_U = 'koi8_u' 94 KZ_1048 = 'kz1048' 95 MAC_CYRILLIC = 'mac_cyrillic' 96 MAC_GREEK = 'mac_greek' 97 MAC_ICELAND = 'mac_iceland' 98 MAC_LATIN_2 = 'mac_latin2' 99 MAC_ROMAN = 'mac_roman' 100 MAC_TURKISH = 'mac_turkish' 101 PTCP_154 = 'ptcp154' 102 SHIFT_JIS = 'shift_jis' 103 SHIFT_JIS_2004 = 'shift_jis_2004' 104 SHIFT_JIS_X_0213 = 'shift_jisx0213' 105 TIS_620 = 'tis_620' 106 UTF_32 = 'utf_32' 107 UTF_32_BE = 'utf_32_be' 108 UTF_32_LE = 'utf_32_le' 109 UTF_16 = 'utf_16' 110 UTF_16_BE = 'utf_16_be' 111 UTF_16_LE = 'utf_16_le' 112 UTF_7 = 'utf_7' 113 UTF_8 = 'utf_8' 114 UTF_8_SIG = 'utf_8_sig' 115 116 @classmethod 117 def _missing_(cls, value): 118 normalized = encodings.normalize_encoding(value).lower() 119 normalized = encodings.aliases.aliases.get(normalized, normalized) 120 if value != normalized: 121 return cls(normalized) 122 return super()._missing_(value) 123