1 /*
2   Copyright 2002-2004 John Plevyak, All Rights Reserved
3 */
4 
5 #ifndef _gram_H_
6 #define _gram_H_
7 
8 #define EOF_SENTINAL "\377"
9 #define NO_PROD 0xFFFFFFFF
10 
11 struct Production;
12 struct Rule;
13 struct Elem;
14 struct Term;
15 struct State;
16 struct ScanState;
17 struct ScanStateTransition;
18 struct D_ParserTables;
19 
20 typedef struct Elem Item;
21 
22 typedef struct Code {
23   char *code;
24   int line;
25 } Code;
26 
27 typedef struct Goto {
28   struct Elem *elem;
29   struct State *state;
30 } Goto;
31 typedef Vec(Goto *) VecGoto;
32 
33 typedef enum ActionKind { ACTION_ACCEPT, ACTION_SHIFT, ACTION_REDUCE, ACTION_SHIFT_TRAILING } ActionKind;
34 typedef struct Action {
35   ActionKind kind;
36   struct Term *term;
37   struct Rule *rule;
38   struct State *state;
39   uint index;
40   char *temp_string;
41 } Action;
42 typedef Vec(Action *) VecAction;
43 
44 typedef struct Hint {
45   uint depth;
46   struct State *state;
47   struct Rule *rule;
48 } Hint;
49 typedef Vec(Hint *) VecHint;
50 
51 typedef Vec(struct ScanStateTransition *) VecScanStateTransition;
52 typedef Vec(struct ScanState *) VecScanState;
53 
54 typedef struct Scanner {
55   VecScanState states;
56   VecScanStateTransition transitions;
57 } Scanner;
58 
59 typedef struct State {
60   uint index;
61   uint64 hash;
62   Vec(Item *) items;
63   Vec(Item *) items_hash;
64   VecGoto gotos;
65   VecAction shift_actions;
66   VecAction reduce_actions;
67   VecHint right_epsilon_hints;
68   VecHint error_recovery_hints;
69   Scanner scanner;
70   uint accept : 1;
71   uint scanner_code : 1;
72   uint goto_on_token : 1;
73   uint scan_kind : 3;
74   uint trailing_context : 1;
75   uint8 *goto_valid;
76   int goto_table_offset;
77   struct State *same_shifts;
78   struct State *reduces_to;
79   struct Rule *reduces_with;
80   struct Rule *reduces_to_then_with;
81 } State;
82 
83 #define ASSOC_LEFT 0x0001
84 #define ASSOC_RIGHT 0x0002
85 #define ASSOC_NARY 0x0004
86 #define ASSOC_UNARY 0x0008
87 #define ASSOC_BINARY 0x0010
88 
89 typedef enum AssocKind {
90   ASSOC_NONE = 0,
91   ASSOC_NARY_LEFT = (ASSOC_NARY | ASSOC_LEFT),
92   ASSOC_NARY_RIGHT = (ASSOC_NARY | ASSOC_RIGHT),
93   ASSOC_UNARY_LEFT = (ASSOC_UNARY | ASSOC_LEFT),
94   ASSOC_UNARY_RIGHT = (ASSOC_UNARY | ASSOC_RIGHT),
95   ASSOC_BINARY_LEFT = (ASSOC_BINARY | ASSOC_LEFT),
96   ASSOC_BINARY_RIGHT = (ASSOC_BINARY | ASSOC_RIGHT),
97   ASSOC_NO = 0x0020
98 } AssocKind;
99 #define IS_RIGHT_ASSOC(_x) ((_x)&ASSOC_RIGHT)
100 #define IS_LEFT_ASSOC(_x) ((_x)&ASSOC_LEFT)
101 #define IS_NARY_ASSOC(_x) ((_x)&ASSOC_NARY)
102 #define IS_BINARY_ASSOC(_x) ((_x)&ASSOC_BINARY)
103 #define IS_UNARY_ASSOC(_x) ((_x)&ASSOC_UNARY)
104 #define IS_UNARY_BINARY_ASSOC(_x) (IS_BINARY_ASSOC(_x) || IS_UNARY_ASSOC(_x))
105 #define IS_BINARY_NARY_ASSOC(_x) (IS_BINARY_ASSOC(_x) || IS_NARY_ASSOC(_x))
106 /* not valid for NARY */
107 #define IS_EXPECT_RIGHT_ASSOC(_x) ((_x) && (_x) != ASSOC_UNARY_LEFT)
108 #define IS_EXPECT_LEFT_ASSOC(_x) ((_x) && _x != ASSOC_UNARY_RIGHT)
109 typedef struct Rule {
110   uint index;
111   struct Production *prod;
112   int op_priority;
113   AssocKind op_assoc;
114   int rule_priority;
115   AssocKind rule_assoc;
116   Vec(struct Elem *) elems;
117   struct Elem *end;
118   Code speculative_code;
119   Code final_code;
120   Vec(Code *) pass_code;
121   int action_index;
122   struct Rule *same_reduction;
123 } Rule;
124 
125 typedef enum TermKind { TERM_STRING, TERM_REGEX, TERM_CODE, TERM_TOKEN } TermKind;
126 typedef struct Term {
127   TermKind kind;
128   uint index;
129   int term_priority;
130   char *term_name;
131   AssocKind op_assoc;
132   int op_priority;
133   char *string;
134   int string_len;
135   uint scan_kind : 3;
136   uint ignore_case : 1;
137   uint trailing_context : 1;
138   struct Production *regex_production;
139 } Term;
140 
141 typedef Vec(Term *) TermVec;
142 
143 typedef enum DeclarationKind {
144   DECLARE_TOKENIZE,
145   DECLARE_LONGEST_MATCH,
146   DECLARE_ALL_MATCHES,
147   DECLARE_SET_OP_PRIORITY,
148   DECLARE_STATES_FOR_ALL_NTERMS,
149   DECLARE_STATE_FOR,
150   DECLARE_WHITESPACE,
151   DECLARE_SAVE_PARSE_TREE,
152   DECLARE_NUM
153 } DeclarationKind;
154 typedef struct Declaration {
155   struct Elem *elem;
156   uint kind;
157   uint index;
158 } Declaration;
159 
160 typedef enum InternalKind {
161   INTERNAL_NOT,
162   INTERNAL_HIDDEN,
163   INTERNAL_CONDITIONAL,
164   INTERNAL_STAR,
165   INTERNAL_PLUS
166 } InternalKind;
167 
168 typedef struct Production {
169   char *name;
170   uint name_len;
171   Vec(Rule *) rules;
172   uint index;
173   uint regex : 1;
174   uint in_regex : 1;
175   uint internal : 3; /* production used for EBNF */
176   uint live : 1;
177   Rule *nullable; /* shortest rule for epsilon reduction */
178   struct Production *declaration_group[DECLARE_NUM];
179   struct Declaration *last_declaration[DECLARE_NUM];
180   State *state;            /* state for independent parsing of this productions*/
181   struct Elem *elem;       /* base elem for the item set of the above state */
182   struct Term *regex_term; /* regex production terminal */
183   char *regex_term_name;
184   struct Production *next;
185 } Production;
186 
187 typedef enum ElemKind { ELEM_NTERM, ELEM_TERM, ELEM_UNRESOLVED, ELEM_END } ElemKind;
188 typedef struct Elem {
189   ElemKind kind;
190   uint index;
191   Rule *rule;
192   union {
193     Production *nterm;
194     Term *term;
195     void *term_or_nterm;
196     struct Unresolved {
197       char *string;
198       uint len;
199     } unresolved;
200   } e;
201 } Elem;
202 
203 typedef struct Grammar {
204   char *pathname;
205   Vec(Production *) productions;
206   Vec(Term *) terminals;
207   Vec(State *) states;
208   Vec(Action *) actions;
209   Code scanner;
210   Code *code;
211   uint ncode;
212   Vec(Declaration *) declarations;
213   Vec(D_Pass *) passes;
214   Vec(char *) all_pathnames;
215   char *default_white_space;
216   /* grammar construction options */
217   int set_op_priority_from_rule;
218   int right_recursive_BNF;
219   int states_for_whitespace;
220   int states_for_all_nterms;
221   int tokenizer;
222   int longest_match;
223   int save_parse_tree;
224   /* grammar writing options */
225   char grammar_ident[256];
226   int scanner_blocks;
227   int scanner_block_size;
228   int write_line_directives;
229   int write_header;
230   int token_type;
231   int write_cpp;
232   char write_extension[256];
233   /* temporary variables for grammar construction */
234   struct Production *p;
235   struct Rule *r;
236   struct Elem *e;
237   int action_index;
238   int action_count;
239   int pass_index;
240   int rule_index;
241   int write_line;
242   char *write_pathname;
243 } Grammar;
244 
245 /* automatically add %op_XXX to rightmost token of %XXX rule, default off */
246 
247 Grammar *new_D_Grammar(char *pathname);
248 void free_D_Grammar(Grammar *g);
249 int build_grammar(Grammar *g);
250 int parse_grammar(Grammar *g, char *pathname, char *str);
251 void print_grammar(Grammar *g);
252 void print_rdebug_grammar(Grammar *g, char *pathname);
253 void print_states(Grammar *g);
254 void print_rule(Rule *r);
255 void print_term(Term *t);
256 Production *lookup_production(Grammar *g, char *name, uint len);
257 
258 /* for creating grammars */
259 #define last_elem(_r) ((_r)->elems.v[(_r)->elems.n - 1])
260 
261 Rule *new_rule(Grammar *g, Production *p);
262 Elem *new_elem_nterm(Production *p, Rule *r);
263 void new_declaration(Grammar *g, Elem *e, uint kind);
264 Production *new_production(Grammar *g, char *name);
265 Elem *new_string(Grammar *g, char *s, char *e, Rule *r);
266 Elem *new_utf8_char(Grammar *g, char *s, char *e, Rule *r);
267 Elem *new_ident(char *s, char *e, Rule *r);
268 void new_token(Grammar *g, char *s, char *e);
269 Elem *new_code(Grammar *g, char *s, char *e, Rule *r);
270 void add_global_code(Grammar *g, char *start, char *end, int line);
271 Production *new_internal_production(Grammar *g, Production *p);
272 Elem *dup_elem(Elem *e, Rule *r);
273 void add_declaration(Grammar *g, char *start, char *end, uint kind, uint line);
274 void add_pass(Grammar *g, char *start, char *end, uint kind, uint line);
275 void add_pass_code(Grammar *g, Rule *r, char *pass_start, char *pass_end, char *code_start, char *code_end, uint line,
276                    uint pass_line);
277 D_Pass *find_pass(Grammar *g, char *start, char *end);
278 void conditional_EBNF(Grammar *g); /* applied to g->e,g->r,g->p */
279 void star_EBNF(Grammar *g);        /* ditto */
280 void plus_EBNF(Grammar *g);        /* ditto */
281 void rep_EBNF(Grammar *g, int minimum, int maximum);
282 void initialize_productions(Grammar *g);
283 void finalize_productions(Grammar *g);
284 uint state_for_declaration(Grammar *g, uint iproduction);
285 
286 #endif
287