1 /*
2 tre-ast.c - Abstract syntax tree (AST) routines
3
4 This software is released under a BSD-style license.
5 See the file LICENSE for details and copyright.
6
7 */
8
9 #ifdef HAVE_CONFIG_H
10 #include <config.h>
11 #endif /* HAVE_CONFIG_H */
12 #include <assert.h>
13 #include <limits.h>
14
15 #include "tre-ast.h"
16 #include "tre-mem.h"
17
18 tre_ast_node_t *
tre_ast_new_node(tre_mem_t mem,tre_ast_type_t type,size_t size)19 tre_ast_new_node(tre_mem_t mem, tre_ast_type_t type, size_t size)
20 {
21 tre_ast_node_t *node;
22
23 node = tre_mem_calloc(mem, sizeof(*node));
24 if (!node)
25 return NULL;
26 node->obj = tre_mem_calloc(mem, size);
27 if (!node->obj)
28 return NULL;
29 node->type = type;
30 node->nullable = -1;
31 node->submatch_id = -1;
32
33 return node;
34 }
35
36 tre_ast_node_t *
tre_ast_new_literal(tre_mem_t mem,int code_min,int code_max,int position)37 tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position)
38 {
39 tre_ast_node_t *node;
40 tre_literal_t *lit;
41
42 node = tre_ast_new_node(mem, LITERAL, sizeof(tre_literal_t));
43 if (!node)
44 return NULL;
45 lit = node->obj;
46 lit->code_min = code_min;
47 lit->code_max = code_max;
48 lit->position = position;
49
50 return node;
51 }
52
53 tre_ast_node_t *
tre_ast_new_iter(tre_mem_t mem,tre_ast_node_t * arg,int min,int max,int minimal)54 tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max,
55 int minimal)
56 {
57 tre_ast_node_t *node;
58 tre_iteration_t *iter;
59
60 node = tre_ast_new_node(mem, ITERATION, sizeof(tre_iteration_t));
61 if (!node)
62 return NULL;
63 iter = node->obj;
64 iter->arg = arg;
65 iter->min = min;
66 iter->max = max;
67 iter->minimal = minimal;
68 node->num_submatches = arg->num_submatches;
69
70 return node;
71 }
72
73 tre_ast_node_t *
tre_ast_new_union(tre_mem_t mem,tre_ast_node_t * left,tre_ast_node_t * right)74 tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right)
75 {
76 tre_ast_node_t *node;
77
78 node = tre_ast_new_node(mem, UNION, sizeof(tre_union_t));
79 if (node == NULL)
80 return NULL;
81 ((tre_union_t *)node->obj)->left = left;
82 ((tre_union_t *)node->obj)->right = right;
83 node->num_submatches = left->num_submatches + right->num_submatches;
84
85 return node;
86 }
87
88 tre_ast_node_t *
tre_ast_new_catenation(tre_mem_t mem,tre_ast_node_t * left,tre_ast_node_t * right)89 tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left,
90 tre_ast_node_t *right)
91 {
92 tre_ast_node_t *node;
93
94 node = tre_ast_new_node(mem, CATENATION, sizeof(tre_catenation_t));
95 if (node == NULL)
96 return NULL;
97 ((tre_catenation_t *)node->obj)->left = left;
98 ((tre_catenation_t *)node->obj)->right = right;
99 // UBSAN warning in clang 10: signed integer overflow
100 double tmp = (double)left->num_submatches + (double)right->num_submatches;
101 if (tmp >= INT_MIN && tmp <= INT_MAX)
102 node->num_submatches = left->num_submatches + right->num_submatches;
103 else node->num_submatches = 0;
104
105 return node;
106 }
107
108 #ifdef TRE_DEBUG
109
110 static void
tre_findent(FILE * stream,int i)111 tre_findent(FILE *stream, int i)
112 {
113 while (i-- > 0)
114 fputc(' ', stream);
115 }
116
117 void
tre_print_params(int * params)118 tre_print_params(int *params)
119 {
120 int i;
121 if (params)
122 {
123 DPRINT(("params ["));
124 for (i = 0; i < TRE_PARAM_LAST; i++)
125 {
126 if (params[i] == TRE_PARAM_UNSET)
127 DPRINT(("unset"));
128 else if (params[i] == TRE_PARAM_DEFAULT)
129 DPRINT(("default"));
130 else
131 DPRINT(("%d", params[i]));
132 if (i < TRE_PARAM_LAST - 1)
133 DPRINT((", "));
134 }
135 DPRINT(("]"));
136 }
137 }
138
139 static void
tre_do_print(FILE * stream,tre_ast_node_t * ast,int indent)140 tre_do_print(FILE *stream, tre_ast_node_t *ast, int indent)
141 {
142 int code_min, code_max, pos;
143 int num_tags = ast->num_tags;
144 tre_literal_t *lit;
145 tre_iteration_t *iter;
146
147 tre_findent(stream, indent);
148 switch (ast->type)
149 {
150 case LITERAL:
151 lit = ast->obj;
152 code_min = lit->code_min;
153 code_max = lit->code_max;
154 pos = lit->position;
155 if (IS_EMPTY(lit))
156 {
157 fprintf(stream, "literal empty\n");
158 }
159 else if (IS_ASSERTION(lit))
160 {
161 int i;
162 char *assertions[] = { "bol", "eol", "ctype", "!ctype",
163 "bow", "eow", "wb", "!wb" };
164 if (code_max >= ASSERT_LAST << 1)
165 assert(0);
166 fprintf(stream, "assertions: ");
167 for (i = 0; (1 << i) <= ASSERT_LAST; i++)
168 if (code_max & (1 << i))
169 fprintf(stream, "%s ", assertions[i]);
170 fprintf(stream, "\n");
171 }
172 else if (IS_TAG(lit))
173 {
174 fprintf(stream, "tag %d\n", code_max);
175 }
176 else if (IS_BACKREF(lit))
177 {
178 fprintf(stream, "backref %d, pos %d\n", code_max, pos);
179 }
180 else if (IS_PARAMETER(lit))
181 {
182 tre_print_params(lit->u.params);
183 fprintf(stream, "\n");
184 }
185 else
186 {
187 fprintf(stream, "literal (%c, %c) (%d, %d), pos %d, sub %d, "
188 "%d tags\n", code_min, code_max, code_min, code_max, pos,
189 ast->submatch_id, num_tags);
190 }
191 break;
192 case ITERATION:
193 iter = ast->obj;
194 fprintf(stream, "iteration {%d, %d}, sub %d, %d tags, %s\n",
195 iter->min, iter->max, ast->submatch_id, num_tags,
196 iter->minimal ? "minimal" : "greedy");
197 tre_do_print(stream, iter->arg, indent + 2);
198 break;
199 case UNION:
200 fprintf(stream, "union, sub %d, %d tags\n", ast->submatch_id, num_tags);
201 tre_do_print(stream, ((tre_union_t *)ast->obj)->left, indent + 2);
202 tre_do_print(stream, ((tre_union_t *)ast->obj)->right, indent + 2);
203 break;
204 case CATENATION:
205 fprintf(stream, "catenation, sub %d, %d tags\n", ast->submatch_id,
206 num_tags);
207 tre_do_print(stream, ((tre_catenation_t *)ast->obj)->left, indent + 2);
208 tre_do_print(stream, ((tre_catenation_t *)ast->obj)->right, indent + 2);
209 break;
210 default:
211 assert(0);
212 break;
213 }
214 }
215
216 static void
tre_ast_fprint(FILE * stream,tre_ast_node_t * ast)217 tre_ast_fprint(FILE *stream, tre_ast_node_t *ast)
218 {
219 tre_do_print(stream, ast, 0);
220 }
221
222 void
tre_ast_print(tre_ast_node_t * tree)223 tre_ast_print(tre_ast_node_t *tree)
224 {
225 printf("AST:\n");
226 tre_ast_fprint(stdout, tree);
227 }
228
229 #endif /* TRE_DEBUG */
230
231 /* EOF */
232