1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677 |
- """
- All of the Enums that are used throughout the chardet package.
- :author: Dan Blanchard (dan.blanchard@gmail.com)
- """
- class InputState(object):
- """
- This enum represents the different states a universal detector can be in.
- """
- PURE_ASCII = 0
- ESC_ASCII = 1
- HIGH_BYTE = 2
- class LanguageFilter(object):
- """
- This enum represents the different language filters we can apply to a
- ``UniversalDetector``.
- """
- CHINESE_SIMPLIFIED = 0x01
- CHINESE_TRADITIONAL = 0x02
- JAPANESE = 0x04
- KOREAN = 0x08
- NON_CJK = 0x10
- ALL = 0x1F
- CHINESE = CHINESE_SIMPLIFIED | CHINESE_TRADITIONAL
- CJK = CHINESE | JAPANESE | KOREAN
- class ProbingState(object):
- """
- This enum represents the different states a prober can be in.
- """
- DETECTING = 0
- FOUND_IT = 1
- NOT_ME = 2
- class MachineState(object):
- """
- This enum represents the different states a state machine can be in.
- """
- START = 0
- ERROR = 1
- ITS_ME = 2
- class SequenceLikelihood(object):
- """
- This enum represents the likelihood of a character following the previous one.
- """
- NEGATIVE = 0
- UNLIKELY = 1
- LIKELY = 2
- POSITIVE = 3
- @classmethod
- def get_num_categories(cls):
- """:returns: The number of likelihood categories in the enum."""
- return 4
- class CharacterCategory(object):
- """
- This enum represents the different categories language models for
- ``SingleByteCharsetProber`` put characters into.
- Anything less than CONTROL is considered a letter.
- """
- UNDEFINED = 255
- LINE_BREAK = 254
- SYMBOL = 253
- DIGIT = 252
- CONTROL = 251
|