1 /* 2 Copyright 2002-2004 John Plevyak, All Rights Reserved 3 */ 4 5 #ifndef _gram_H_ 6 #define _gram_H_ 7 8 #define EOF_SENTINAL "\377" 9 #define NO_PROD 0xFFFFFFFF 10 11 struct Production; 12 struct Rule; 13 struct Elem; 14 struct Term; 15 struct State; 16 struct ScanState; 17 struct ScanStateTransition; 18 struct D_ParserTables; 19 20 typedef struct Elem Item; 21 22 typedef struct Code { 23 char *code; 24 int line; 25 } Code; 26 27 typedef struct Goto { 28 struct Elem *elem; 29 struct State *state; 30 } Goto; 31 typedef Vec(Goto *) VecGoto; 32 33 typedef enum ActionKind { ACTION_ACCEPT, ACTION_SHIFT, ACTION_REDUCE, ACTION_SHIFT_TRAILING } ActionKind; 34 typedef struct Action { 35 ActionKind kind; 36 struct Term *term; 37 struct Rule *rule; 38 struct State *state; 39 uint index; 40 char *temp_string; 41 } Action; 42 typedef Vec(Action *) VecAction; 43 44 typedef struct Hint { 45 uint depth; 46 struct State *state; 47 struct Rule *rule; 48 } Hint; 49 typedef Vec(Hint *) VecHint; 50 51 typedef Vec(struct ScanStateTransition *) VecScanStateTransition; 52 typedef Vec(struct ScanState *) VecScanState; 53 54 typedef struct Scanner { 55 VecScanState states; 56 VecScanStateTransition transitions; 57 } Scanner; 58 59 typedef struct State { 60 uint index; 61 uint64 hash; 62 Vec(Item *) items; 63 Vec(Item *) items_hash; 64 VecGoto gotos; 65 VecAction shift_actions; 66 VecAction reduce_actions; 67 VecHint right_epsilon_hints; 68 VecHint error_recovery_hints; 69 Scanner scanner; 70 uint accept : 1; 71 uint scanner_code : 1; 72 uint goto_on_token : 1; 73 uint scan_kind : 3; 74 uint trailing_context : 1; 75 uint8 *goto_valid; 76 int goto_table_offset; 77 struct State *same_shifts; 78 struct State *reduces_to; 79 struct Rule *reduces_with; 80 struct Rule *reduces_to_then_with; 81 } State; 82 83 #define ASSOC_LEFT 0x0001 84 #define ASSOC_RIGHT 0x0002 85 #define ASSOC_NARY 0x0004 86 #define ASSOC_UNARY 0x0008 87 #define ASSOC_BINARY 0x0010 88 89 typedef enum AssocKind { 90 ASSOC_NONE = 0, 91 ASSOC_NARY_LEFT = (ASSOC_NARY | ASSOC_LEFT), 92 ASSOC_NARY_RIGHT = (ASSOC_NARY | ASSOC_RIGHT), 93 ASSOC_UNARY_LEFT = (ASSOC_UNARY | ASSOC_LEFT), 94 ASSOC_UNARY_RIGHT = (ASSOC_UNARY | ASSOC_RIGHT), 95 ASSOC_BINARY_LEFT = (ASSOC_BINARY | ASSOC_LEFT), 96 ASSOC_BINARY_RIGHT = (ASSOC_BINARY | ASSOC_RIGHT), 97 ASSOC_NO = 0x0020 98 } AssocKind; 99 #define IS_RIGHT_ASSOC(_x) ((_x)&ASSOC_RIGHT) 100 #define IS_LEFT_ASSOC(_x) ((_x)&ASSOC_LEFT) 101 #define IS_NARY_ASSOC(_x) ((_x)&ASSOC_NARY) 102 #define IS_BINARY_ASSOC(_x) ((_x)&ASSOC_BINARY) 103 #define IS_UNARY_ASSOC(_x) ((_x)&ASSOC_UNARY) 104 #define IS_UNARY_BINARY_ASSOC(_x) (IS_BINARY_ASSOC(_x) || IS_UNARY_ASSOC(_x)) 105 #define IS_BINARY_NARY_ASSOC(_x) (IS_BINARY_ASSOC(_x) || IS_NARY_ASSOC(_x)) 106 /* not valid for NARY */ 107 #define IS_EXPECT_RIGHT_ASSOC(_x) ((_x) && (_x) != ASSOC_UNARY_LEFT) 108 #define IS_EXPECT_LEFT_ASSOC(_x) ((_x) && _x != ASSOC_UNARY_RIGHT) 109 typedef struct Rule { 110 uint index; 111 struct Production *prod; 112 int op_priority; 113 AssocKind op_assoc; 114 int rule_priority; 115 AssocKind rule_assoc; 116 Vec(struct Elem *) elems; 117 struct Elem *end; 118 Code speculative_code; 119 Code final_code; 120 Vec(Code *) pass_code; 121 int action_index; 122 struct Rule *same_reduction; 123 } Rule; 124 125 typedef enum TermKind { TERM_STRING, TERM_REGEX, TERM_CODE, TERM_TOKEN } TermKind; 126 typedef struct Term { 127 TermKind kind; 128 uint index; 129 int term_priority; 130 char *term_name; 131 AssocKind op_assoc; 132 int op_priority; 133 char *string; 134 int string_len; 135 uint scan_kind : 3; 136 uint ignore_case : 1; 137 uint trailing_context : 1; 138 struct Production *regex_production; 139 } Term; 140 141 typedef Vec(Term *) TermVec; 142 143 typedef enum DeclarationKind { 144 DECLARE_TOKENIZE, 145 DECLARE_LONGEST_MATCH, 146 DECLARE_ALL_MATCHES, 147 DECLARE_SET_OP_PRIORITY, 148 DECLARE_STATES_FOR_ALL_NTERMS, 149 DECLARE_STATE_FOR, 150 DECLARE_WHITESPACE, 151 DECLARE_SAVE_PARSE_TREE, 152 DECLARE_NUM 153 } DeclarationKind; 154 typedef struct Declaration { 155 struct Elem *elem; 156 uint kind; 157 uint index; 158 } Declaration; 159 160 typedef enum InternalKind { 161 INTERNAL_NOT, 162 INTERNAL_HIDDEN, 163 INTERNAL_CONDITIONAL, 164 INTERNAL_STAR, 165 INTERNAL_PLUS 166 } InternalKind; 167 168 typedef struct Production { 169 char *name; 170 uint name_len; 171 Vec(Rule *) rules; 172 uint index; 173 uint regex : 1; 174 uint in_regex : 1; 175 uint internal : 3; /* production used for EBNF */ 176 uint live : 1; 177 Rule *nullable; /* shortest rule for epsilon reduction */ 178 struct Production *declaration_group[DECLARE_NUM]; 179 struct Declaration *last_declaration[DECLARE_NUM]; 180 State *state; /* state for independent parsing of this productions*/ 181 struct Elem *elem; /* base elem for the item set of the above state */ 182 struct Term *regex_term; /* regex production terminal */ 183 char *regex_term_name; 184 struct Production *next; 185 } Production; 186 187 typedef enum ElemKind { ELEM_NTERM, ELEM_TERM, ELEM_UNRESOLVED, ELEM_END } ElemKind; 188 typedef struct Elem { 189 ElemKind kind; 190 uint index; 191 Rule *rule; 192 union { 193 Production *nterm; 194 Term *term; 195 void *term_or_nterm; 196 struct Unresolved { 197 char *string; 198 uint len; 199 } unresolved; 200 } e; 201 } Elem; 202 203 typedef struct Grammar { 204 char *pathname; 205 Vec(Production *) productions; 206 Vec(Term *) terminals; 207 Vec(State *) states; 208 Vec(Action *) actions; 209 Code scanner; 210 Code *code; 211 uint ncode; 212 Vec(Declaration *) declarations; 213 Vec(D_Pass *) passes; 214 Vec(char *) all_pathnames; 215 char *default_white_space; 216 /* grammar construction options */ 217 int set_op_priority_from_rule; 218 int right_recursive_BNF; 219 int states_for_whitespace; 220 int states_for_all_nterms; 221 int tokenizer; 222 int longest_match; 223 int save_parse_tree; 224 /* grammar writing options */ 225 char grammar_ident[256]; 226 int scanner_blocks; 227 int scanner_block_size; 228 int write_line_directives; 229 int write_header; 230 int token_type; 231 int write_cpp; 232 char write_extension[256]; 233 /* temporary variables for grammar construction */ 234 struct Production *p; 235 struct Rule *r; 236 struct Elem *e; 237 int action_index; 238 int action_count; 239 int pass_index; 240 int rule_index; 241 int write_line; 242 char *write_pathname; 243 } Grammar; 244 245 /* automatically add %op_XXX to rightmost token of %XXX rule, default off */ 246 247 Grammar *new_D_Grammar(char *pathname); 248 void free_D_Grammar(Grammar *g); 249 int build_grammar(Grammar *g); 250 int parse_grammar(Grammar *g, char *pathname, char *str); 251 void print_grammar(Grammar *g); 252 void print_rdebug_grammar(Grammar *g, char *pathname); 253 void print_states(Grammar *g); 254 void print_rule(Rule *r); 255 void print_term(Term *t); 256 Production *lookup_production(Grammar *g, char *name, uint len); 257 258 /* for creating grammars */ 259 #define last_elem(_r) ((_r)->elems.v[(_r)->elems.n - 1]) 260 261 Rule *new_rule(Grammar *g, Production *p); 262 Elem *new_elem_nterm(Production *p, Rule *r); 263 void new_declaration(Grammar *g, Elem *e, uint kind); 264 Production *new_production(Grammar *g, char *name); 265 Elem *new_string(Grammar *g, char *s, char *e, Rule *r); 266 Elem *new_utf8_char(Grammar *g, char *s, char *e, Rule *r); 267 Elem *new_ident(char *s, char *e, Rule *r); 268 void new_token(Grammar *g, char *s, char *e); 269 Elem *new_code(Grammar *g, char *s, char *e, Rule *r); 270 void add_global_code(Grammar *g, char *start, char *end, int line); 271 Production *new_internal_production(Grammar *g, Production *p); 272 Elem *dup_elem(Elem *e, Rule *r); 273 void add_declaration(Grammar *g, char *start, char *end, uint kind, uint line); 274 void add_pass(Grammar *g, char *start, char *end, uint kind, uint line); 275 void add_pass_code(Grammar *g, Rule *r, char *pass_start, char *pass_end, char *code_start, char *code_end, uint line, 276 uint pass_line); 277 D_Pass *find_pass(Grammar *g, char *start, char *end); 278 void conditional_EBNF(Grammar *g); /* applied to g->e,g->r,g->p */ 279 void star_EBNF(Grammar *g); /* ditto */ 280 void plus_EBNF(Grammar *g); /* ditto */ 281 void rep_EBNF(Grammar *g, int minimum, int maximum); 282 void initialize_productions(Grammar *g); 283 void finalize_productions(Grammar *g); 284 uint state_for_declaration(Grammar *g, uint iproduction); 285 286 #endif 287