1 /* 2 tre-ast.h - Abstract syntax tree (AST) definitions 3 4 This software is released under a BSD-style license. 5 See the file LICENSE for details and copyright. 6 7 */ 8 9 10 #ifndef TRE_AST_H 11 #define TRE_AST_H 1 12 13 #include <limits.h> 14 15 #include "tre-mem.h" 16 #include "tre-internal.h" 17 #include "tre-compile.h" 18 19 /* The different AST node types. */ 20 typedef enum { 21 LITERAL, 22 CATENATION, 23 ITERATION, 24 UNION 25 } tre_ast_type_t; 26 27 /* Special subtypes of TRE_LITERAL. */ 28 #define EMPTY -1 /* Empty leaf (denotes empty string). */ 29 #define ASSERTION -2 /* Assertion leaf. */ 30 #define TAG -3 /* Tag leaf. */ 31 #define BACKREF -4 /* Back reference leaf. */ 32 #define PARAMETER -5 /* Parameter. */ 33 34 #define IS_SPECIAL(x) ((x)->code_min < 0) 35 #define IS_EMPTY(x) ((x)->code_min == EMPTY) 36 #define IS_ASSERTION(x) ((x)->code_min == ASSERTION) 37 #define IS_TAG(x) ((x)->code_min == TAG) 38 #define IS_BACKREF(x) ((x)->code_min == BACKREF) 39 #define IS_PARAMETER(x) ((x)->code_min == PARAMETER) 40 41 #define SUBMATCH_ID_INVISIBLE_START (INT_MAX / 2 + 1) 42 43 44 /* A generic AST node. All AST nodes consist of this node on the top 45 level with `obj' pointing to the actual content. */ 46 typedef struct _tre_ast_node { 47 void *obj; /* Pointer to actual node. */ 48 tre_last_matched_branch_pre_t *last_matched_branch; 49 tre_last_matched_pre_t *last_matched_in_progress; 50 tre_pos_and_tags_t *firstpos; 51 tre_pos_and_tags_t *lastpos; 52 /* The original pointer is used to point to the original node, when a 53 * CATENATION and tag are added. If NULL, this is node is the original */ 54 struct _tre_ast_node *original; 55 tre_ast_type_t type; /* Type of the node. */ 56 int submatch_id; 57 int num_submatches; 58 int num_tags; 59 short nullable; 60 short make_branches; 61 } tre_ast_node_t; 62 63 64 /* A "literal" node. These are created for assertions, back references, 65 tags, matching parameter settings, and all expressions that match one 66 character. */ 67 typedef struct { 68 tre_cint_t code_min; 69 tre_cint_t code_max; 70 int position; 71 union { 72 tre_bracket_match_list_t *bracket_match_list; 73 int *params; 74 } u; 75 } tre_literal_t; 76 77 /* A "catenation" node. These are created when two regexps are concatenated. 78 If there are more than one subexpressions in sequence, the `left' part 79 holds all but the last, and `right' part holds the last subexpression 80 (catenation is left associative). */ 81 typedef struct { 82 tre_ast_node_t *left; 83 tre_ast_node_t *right; 84 } tre_catenation_t; 85 86 /* An "iteration" node. These are created for the "*", "+", "?", and "{m,n}" 87 operators. */ 88 typedef struct { 89 /* Subexpression to match. */ 90 tre_ast_node_t *arg; 91 /* Minimum number of consecutive matches. */ 92 int min; 93 /* Maximum number of consecutive matches. */ 94 int max; 95 /* If 0, match as many characters as possible, if 1 match as few as 96 possible. Note that this does not always mean the same thing as 97 matching as many/few repetitions as possible. */ 98 unsigned int minimal:1; 99 /* Approximate matching parameters (or NULL). */ 100 int *params; 101 } tre_iteration_t; 102 103 /* An "union" node. These are created for the "|" operator. */ 104 typedef struct { 105 tre_ast_node_t *left; 106 tre_ast_node_t *right; 107 /* The left end right end tags if non-zero */ 108 int left_tag; 109 int right_tag; 110 } tre_union_t; 111 112 tre_ast_node_t * 113 tre_ast_new_node(tre_mem_t mem, tre_ast_type_t type, size_t size); 114 115 tre_ast_node_t * 116 tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position); 117 118 tre_ast_node_t * 119 tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max, 120 int minimal); 121 122 tre_ast_node_t * 123 tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right); 124 125 tre_ast_node_t * 126 tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left, 127 tre_ast_node_t *right); 128 129 #ifdef TRE_DEBUG 130 void 131 tre_ast_print(tre_ast_node_t *tree); 132 133 /* XXX - rethink AST printing API */ 134 void 135 tre_print_params(int *params); 136 #endif /* TRE_DEBUG */ 137 138 #endif /* TRE_AST_H */ 139 140 /* EOF */ 141