1 /*********************************************************************** 2 * * 3 * This software is part of the ast package * 4 * Copyright (c) 1982-2013 AT&T Intellectual Property * 5 * and is licensed under the * 6 * Eclipse Public License, Version 1.0 * 7 * by AT&T Intellectual Property * 8 * * 9 * A copy of the License is available at * 10 * http://www.eclipse.org/org/documents/epl-v10.html * 11 * (with md5 checksum b35adb5213ca9657e911e9befb180842) * 12 * * 13 * Information and Software Systems Research * 14 * AT&T Research * 15 * Florham Park NJ * 16 * * 17 * David Korn <dgkorn@gmail.com> * 18 * * 19 ***********************************************************************/ 20 #ifndef _LEXSTATES_H 21 #define _LEXSTATES_H 1 22 23 #include <stdbool.h> 24 #include <wchar.h> 25 #include <wctype.h> 26 27 #define S_NOP 0 // absence of a state change, do nothing 28 #define S_BREAK 1 // end of token 29 #define S_EOF 2 // end of buffer 30 #define S_NL 3 // new-line when not a token 31 #define S_RES 4 // first character of reserved word 32 #define S_NAME 5 // other identifier characters 33 #define S_REG 6 // non-special characters 34 #define S_TILDE 7 // first char is tilde 35 #define S_PUSH 8 36 #define S_POP 9 37 #define S_BRACT 10 38 #define S_LIT 11 // literal quote character 39 #define S_NLTOK 12 // new-line token 40 #define S_OP 13 // operator character 41 #define S_PAT 14 // pattern characters * and ? 42 #define S_EPAT 15 // pattern char when followed by ( 43 #define S_EQ 16 // assignment character 44 #define S_COM 17 // comment character 45 #define S_MOD1 18 // ${...} modifier character - old quoting 46 #define S_MOD2 19 // ${...} modifier character - new quoting 47 #define S_ERR 20 // invalid character in ${...} 48 #define S_SPC1 21 // special prefix characters after $ 49 #define S_SPC2 22 // special characters after $ 50 #define S_DIG 23 // digit character after $ 51 #define S_ALP 24 // alpahbetic character after $ 52 #define S_LBRA 25 // left brace after $ 53 #define S_RBRA 26 // right brace after $ 54 #define S_PAR 27 // set for $( 55 #define S_ENDCH 28 // macro expansion terminator 56 #define S_SLASH 29 // / character terminates ~ expansion 57 #define S_COLON 30 // for character : 58 #define S_LABEL 31 // for goto label 59 #define S_EDOL 32 // ends $identifier 60 #define S_BRACE 33 // left brace 61 #define S_DOT 34 // . char 62 #define S_META 35 // | & ; < > inside ${...} reserved for future use 63 #define S_SPACE S_BREAK // IFS space characters 64 #define S_DELIM S_RES // IFS delimter characters 65 #define S_MBYTE S_NAME // IFS first byte of multi-byte char 66 #define S_BLNK 36 // space or tab 67 // The following must be the highest numbered states. 68 #define S_QUOTE 37 // double quote character 69 #define S_GRAVE 38 // old comsub character 70 #define S_ESC 39 // escape character 71 #define S_DOL 40 // $ substitution character 72 #define S_ESC2 41 // escape character inside '...' 73 74 // These are the lexical state table names. 75 #define ST_BEGIN 0 76 #define ST_NAME 1 77 #define ST_NORM 2 78 #define ST_LIT 3 79 #define ST_QUOTE 4 80 #define ST_NESTED 5 81 #define ST_DOL 6 82 #define ST_BRACE 7 83 #define ST_DOLNAME 8 84 #define ST_MACRO 9 85 #define ST_QNEST 10 86 #define ST_NONE 11 87 88 #undef LEN 89 #define LEN _Fcin.fclen 90 #define STATE(s, c) (s[mbwide() ? ((c = fcmbget(&LEN)), LEN > 1 ? 'a' : c) : (c = fcget())]) 91 92 extern const char *sh_lexstates[ST_NONE]; 93 extern const char e_lexversion[]; 94 extern const char e_lexspace[]; 95 extern const char e_lexslash[]; 96 extern const char e_lexlabignore[]; 97 extern const char e_lexlabunknown[]; 98 extern const char e_lexsyntax1[]; 99 extern const char e_lexsyntax2[]; 100 extern const char e_lexsyntax3[]; 101 extern const char e_lexsyntax4[]; 102 extern const char e_lexsyntax5[]; 103 extern const char e_lexwarnvar[]; 104 extern const char e_lexarithwarn[]; 105 extern const char e_lexobsolete1[]; 106 extern const char e_lexobsolete2[]; 107 extern const char e_lexobsolete3[]; 108 extern const char e_lexobsolete4[]; 109 extern const char e_lexobsolete5[]; 110 extern const char e_lexobsolete6[]; 111 extern const char e_lexnonstandard[]; 112 extern const char e_lexusebrace[]; 113 extern const char e_lexusequote[]; 114 extern const char e_lexescape[]; 115 extern const char e_lexquote[]; 116 extern const char e_lexnested[]; 117 extern const char e_lexbadchar[]; 118 extern const char e_lexlongquote[]; 119 extern const char e_lexfuture[]; 120 extern const char e_lexzerobyte[]; 121 extern const char e_lexemptyfor[]; 122 extern const char e_lextypeset[]; 123 extern const char e_lexcharclass[]; 124 isaname(int c)125static inline bool isaname(int c) { 126 if (c < 0) return false; 127 if (c > 0x7F) return iswalpha(c); 128 return sh_lexstates[ST_NAME][c] == S_NOP; 129 } 130 isaletter(int c)131static inline bool isaletter(int c) { 132 if (c < 0) return false; 133 if (c > 0x7F) return iswalpha(c); 134 return sh_lexstates[ST_DOL][c] == S_ALP && c != '.'; 135 } 136 isadigit(int c)137static inline bool isadigit(int c) { 138 if (c < 0) return false; 139 return sh_lexstates[ST_DOL][c] == S_DIG; 140 } 141 isastchar(int c)142static inline bool isastchar(int c) { 143 if (c < 0) return false; 144 return c == '@' || c == '*'; 145 } 146 isexp(int c)147static inline bool isexp(int c) { 148 if (c < 0) return false; 149 return sh_lexstates[ST_MACRO][c] == S_PAT || (c) == '$' || (c) == '`'; 150 } 151 ismeta(int c)152static inline bool ismeta(int c) { 153 if (c < 0) return false; 154 return sh_lexstates[ST_NAME][c] == S_BREAK; 155 } 156 157 #endif // _LEXSTATES_H 158