1 #ifndef TOKEN_TYPES_H 2 #define TOKEN_TYPES_H 3 4 #include "libpostal.h" 5 6 // Doing these as #defines so we can duplicate the values exactly in Python 7 8 9 #define END LIBPOSTAL_TOKEN_TYPE_END 10 11 #define WORD LIBPOSTAL_TOKEN_TYPE_WORD 12 #define ABBREVIATION LIBPOSTAL_TOKEN_TYPE_ABBREVIATION 13 #define IDEOGRAPHIC_CHAR LIBPOSTAL_TOKEN_TYPE_IDEOGRAPHIC_CHAR 14 #define HANGUL_SYLLABLE LIBPOSTAL_TOKEN_TYPE_HANGUL_SYLLABLE 15 #define ACRONYM LIBPOSTAL_TOKEN_TYPE_ACRONYM 16 #define PHRASE LIBPOSTAL_TOKEN_TYPE_PHRASE 17 18 #define EMAIL LIBPOSTAL_TOKEN_TYPE_EMAIL 19 #define URL LIBPOSTAL_TOKEN_TYPE_URL 20 #define US_PHONE LIBPOSTAL_TOKEN_TYPE_US_PHONE 21 #define INTL_PHONE LIBPOSTAL_TOKEN_TYPE_INTL_PHONE 22 23 #define NUMERIC LIBPOSTAL_TOKEN_TYPE_NUMERIC 24 #define ORDINAL LIBPOSTAL_TOKEN_TYPE_ORDINAL 25 #define ROMAN_NUMERAL LIBPOSTAL_TOKEN_TYPE_ROMAN_NUMERAL 26 #define IDEOGRAPHIC_NUMBER LIBPOSTAL_TOKEN_TYPE_IDEOGRAPHIC_NUMBER 27 28 #define PERIOD LIBPOSTAL_TOKEN_TYPE_PERIOD 29 #define EXCLAMATION LIBPOSTAL_TOKEN_TYPE_EXCLAMATION 30 #define QUESTION_MARK LIBPOSTAL_TOKEN_TYPE_QUESTION_MARK 31 #define COMMA LIBPOSTAL_TOKEN_TYPE_COMMA 32 #define COLON LIBPOSTAL_TOKEN_TYPE_COLON 33 #define SEMICOLON LIBPOSTAL_TOKEN_TYPE_SEMICOLON 34 #define PLUS LIBPOSTAL_TOKEN_TYPE_PLUS 35 #define AMPERSAND LIBPOSTAL_TOKEN_TYPE_AMPERSAND 36 #define AT_SIGN LIBPOSTAL_TOKEN_TYPE_AT_SIGN 37 #define POUND LIBPOSTAL_TOKEN_TYPE_POUND 38 #define ELLIPSIS LIBPOSTAL_TOKEN_TYPE_ELLIPSIS 39 #define DASH LIBPOSTAL_TOKEN_TYPE_DASH 40 #define BREAKING_DASH LIBPOSTAL_TOKEN_TYPE_BREAKING_DASH 41 #define HYPHEN LIBPOSTAL_TOKEN_TYPE_HYPHEN 42 #define PUNCT_OPEN LIBPOSTAL_TOKEN_TYPE_PUNCT_OPEN 43 #define PUNCT_CLOSE LIBPOSTAL_TOKEN_TYPE_PUNCT_CLOSE 44 #define DOUBLE_QUOTE LIBPOSTAL_TOKEN_TYPE_DOUBLE_QUOTE 45 #define SINGLE_QUOTE LIBPOSTAL_TOKEN_TYPE_SINGLE_QUOTE 46 #define OPEN_QUOTE LIBPOSTAL_TOKEN_TYPE_OPEN_QUOTE 47 #define CLOSE_QUOTE LIBPOSTAL_TOKEN_TYPE_CLOSE_QUOTE 48 #define SLASH LIBPOSTAL_TOKEN_TYPE_SLASH 49 #define BACKSLASH LIBPOSTAL_TOKEN_TYPE_BACKSLASH 50 #define GREATER_THAN LIBPOSTAL_TOKEN_TYPE_GREATER_THAN 51 #define LESS_THAN LIBPOSTAL_TOKEN_TYPE_LESS_THAN 52 53 #define OTHER LIBPOSTAL_TOKEN_TYPE_OTHER 54 #define WHITESPACE LIBPOSTAL_TOKEN_TYPE_WHITESPACE 55 #define NEWLINE LIBPOSTAL_TOKEN_TYPE_NEWLINE 56 57 #define INVALID_CHAR LIBPOSTAL_TOKEN_TYPE_INVALID_CHAR 58 59 60 #define is_word_token(type) ((type) == WORD || (type) == ABBREVIATION || (type) == ACRONYM || (type) == IDEOGRAPHIC_CHAR || (type) == HANGUL_SYLLABLE) 61 62 #define is_ideographic(type) ((type) == IDEOGRAPHIC_CHAR || (type) == HANGUL_SYLLABLE || (type) == IDEOGRAPHIC_NUMBER) 63 64 #define is_numeric_token(type) ((type) == NUMERIC || (type) == IDEOGRAPHIC_NUMBER) 65 66 #define is_punctuation(type) ((type) >= PERIOD && (type) < OTHER) 67 68 #define is_special_punctuation(type) ((type) == AMPERSAND || (type) == PLUS || (type) == POUND) 69 70 #define is_special_token(type) ((type) == EMAIL || (type) == URL || (type) == US_PHONE || (type) == INTL_PHONE) 71 72 #define is_whitespace(type) ((type) == WHITESPACE) 73 74 #endif 75