1""" 2All of the Enums that are used throughout the chardet package. 3 4:author: Dan Blanchard (dan.blanchard@gmail.com) 5""" 6 7 8class InputState(object): 9 """ 10 This enum represents the different states a universal detector can be in. 11 """ 12 PURE_ASCII = 0 13 ESC_ASCII = 1 14 HIGH_BYTE = 2 15 16 17class LanguageFilter(object): 18 """ 19 This enum represents the different language filters we can apply to a 20 ``UniversalDetector``. 21 """ 22 CHINESE_SIMPLIFIED = 0x01 23 CHINESE_TRADITIONAL = 0x02 24 JAPANESE = 0x04 25 KOREAN = 0x08 26 NON_CJK = 0x10 27 ALL = 0x1F 28 CHINESE = CHINESE_SIMPLIFIED | CHINESE_TRADITIONAL 29 CJK = CHINESE | JAPANESE | KOREAN 30 31 32class ProbingState(object): 33 """ 34 This enum represents the different states a prober can be in. 35 """ 36 DETECTING = 0 37 FOUND_IT = 1 38 NOT_ME = 2 39 40 41class MachineState(object): 42 """ 43 This enum represents the different states a state machine can be in. 44 """ 45 START = 0 46 ERROR = 1 47 ITS_ME = 2 48 49 50class SequenceLikelihood(object): 51 """ 52 This enum represents the likelihood of a character following the previous one. 53 """ 54 NEGATIVE = 0 55 UNLIKELY = 1 56 LIKELY = 2 57 POSITIVE = 3 58 59 @classmethod 60 def get_num_categories(cls): 61 """:returns: The number of likelihood categories in the enum.""" 62 return 4 63 64 65class CharacterCategory(object): 66 """ 67 This enum represents the different categories language models for 68 ``SingleByteCharsetProber`` put characters into. 69 70 Anything less than CONTROL is considered a letter. 71 """ 72 UNDEFINED = 255 73 LINE_BREAK = 254 74 SYMBOL = 253 75 DIGIT = 252 76 CONTROL = 251 77