1 /***********************************************************************
2  *                                                                      *
3  *               This software is part of the ast package               *
4  *          Copyright (c) 1982-2013 AT&T Intellectual Property          *
5  *                      and is licensed under the                       *
6  *                 Eclipse Public License, Version 1.0                  *
7  *                    by AT&T Intellectual Property                     *
8  *                                                                      *
9  *                A copy of the License is available at                 *
10  *          http://www.eclipse.org/org/documents/epl-v10.html           *
11  *         (with md5 checksum b35adb5213ca9657e911e9befb180842)         *
12  *                                                                      *
13  *              Information and Software Systems Research               *
14  *                            AT&T Research                             *
15  *                           Florham Park NJ                            *
16  *                                                                      *
17  *                    David Korn <dgkorn@gmail.com>                     *
18  *                                                                      *
19  ***********************************************************************/
20 #ifndef _LEXSTATES_H
21 #define _LEXSTATES_H 1
22 
23 #include <stdbool.h>
24 #include <wchar.h>
25 #include <wctype.h>
26 
27 #define S_NOP 0    // absence of a state change, do nothing
28 #define S_BREAK 1  // end of token
29 #define S_EOF 2    // end of buffer
30 #define S_NL 3     // new-line when not a token
31 #define S_RES 4    // first character of reserved word
32 #define S_NAME 5   // other identifier characters
33 #define S_REG 6    // non-special characters
34 #define S_TILDE 7  // first char is tilde
35 #define S_PUSH 8
36 #define S_POP 9
37 #define S_BRACT 10
38 #define S_LIT 11         // literal quote character
39 #define S_NLTOK 12       // new-line token
40 #define S_OP 13          // operator character
41 #define S_PAT 14         // pattern characters * and ?
42 #define S_EPAT 15        // pattern char when followed by (
43 #define S_EQ 16          // assignment character
44 #define S_COM 17         // comment character
45 #define S_MOD1 18        // ${...} modifier character - old quoting
46 #define S_MOD2 19        // ${...} modifier character - new quoting
47 #define S_ERR 20         // invalid character in ${...}
48 #define S_SPC1 21        // special prefix characters after $
49 #define S_SPC2 22        // special characters after $
50 #define S_DIG 23         // digit character after $
51 #define S_ALP 24         // alpahbetic character after $
52 #define S_LBRA 25        // left brace after $
53 #define S_RBRA 26        // right brace after $
54 #define S_PAR 27         // set for $(
55 #define S_ENDCH 28       // macro expansion terminator
56 #define S_SLASH 29       // / character terminates ~ expansion
57 #define S_COLON 30       // for character :
58 #define S_LABEL 31       // for goto label
59 #define S_EDOL 32        // ends $identifier
60 #define S_BRACE 33       // left brace
61 #define S_DOT 34         // . char
62 #define S_META 35        // | & ; < > inside ${...} reserved for future use
63 #define S_SPACE S_BREAK  // IFS space characters
64 #define S_DELIM S_RES    // IFS delimter characters
65 #define S_MBYTE S_NAME   // IFS first byte of multi-byte char
66 #define S_BLNK 36        // space or tab
67 // The following must be the highest numbered states.
68 #define S_QUOTE 37  // double quote character
69 #define S_GRAVE 38  // old comsub character
70 #define S_ESC 39    // escape character
71 #define S_DOL 40    // $ substitution character
72 #define S_ESC2 41   // escape character inside '...'
73 
74 // These are the lexical state table names.
75 #define ST_BEGIN 0
76 #define ST_NAME 1
77 #define ST_NORM 2
78 #define ST_LIT 3
79 #define ST_QUOTE 4
80 #define ST_NESTED 5
81 #define ST_DOL 6
82 #define ST_BRACE 7
83 #define ST_DOLNAME 8
84 #define ST_MACRO 9
85 #define ST_QNEST 10
86 #define ST_NONE 11
87 
88 #undef LEN
89 #define LEN _Fcin.fclen
90 #define STATE(s, c) (s[mbwide() ? ((c = fcmbget(&LEN)), LEN > 1 ? 'a' : c) : (c = fcget())])
91 
92 extern const char *sh_lexstates[ST_NONE];
93 extern const char e_lexversion[];
94 extern const char e_lexspace[];
95 extern const char e_lexslash[];
96 extern const char e_lexlabignore[];
97 extern const char e_lexlabunknown[];
98 extern const char e_lexsyntax1[];
99 extern const char e_lexsyntax2[];
100 extern const char e_lexsyntax3[];
101 extern const char e_lexsyntax4[];
102 extern const char e_lexsyntax5[];
103 extern const char e_lexwarnvar[];
104 extern const char e_lexarithwarn[];
105 extern const char e_lexobsolete1[];
106 extern const char e_lexobsolete2[];
107 extern const char e_lexobsolete3[];
108 extern const char e_lexobsolete4[];
109 extern const char e_lexobsolete5[];
110 extern const char e_lexobsolete6[];
111 extern const char e_lexnonstandard[];
112 extern const char e_lexusebrace[];
113 extern const char e_lexusequote[];
114 extern const char e_lexescape[];
115 extern const char e_lexquote[];
116 extern const char e_lexnested[];
117 extern const char e_lexbadchar[];
118 extern const char e_lexlongquote[];
119 extern const char e_lexfuture[];
120 extern const char e_lexzerobyte[];
121 extern const char e_lexemptyfor[];
122 extern const char e_lextypeset[];
123 extern const char e_lexcharclass[];
124 
isaname(int c)125 static inline bool isaname(int c) {
126     if (c < 0) return false;
127     if (c > 0x7F) return iswalpha(c);
128     return sh_lexstates[ST_NAME][c] == S_NOP;
129 }
130 
isaletter(int c)131 static inline bool isaletter(int c) {
132     if (c < 0) return false;
133     if (c > 0x7F) return iswalpha(c);
134     return sh_lexstates[ST_DOL][c] == S_ALP && c != '.';
135 }
136 
isadigit(int c)137 static inline bool isadigit(int c) {
138     if (c < 0) return false;
139     return sh_lexstates[ST_DOL][c] == S_DIG;
140 }
141 
isastchar(int c)142 static inline bool isastchar(int c) {
143     if (c < 0) return false;
144     return c == '@' || c == '*';
145 }
146 
isexp(int c)147 static inline bool isexp(int c) {
148     if (c < 0) return false;
149     return sh_lexstates[ST_MACRO][c] == S_PAT || (c) == '$' || (c) == '`';
150 }
151 
ismeta(int c)152 static inline bool ismeta(int c) {
153     if (c < 0) return false;
154     return sh_lexstates[ST_NAME][c] == S_BREAK;
155 }
156 
157 #endif  // _LEXSTATES_H
158