1 /*
2   tre-ast.c - Abstract syntax tree (AST) routines
3 
4   This software is released under a BSD-style license.
5   See the file LICENSE for details and copyright.
6 
7 */
8 
9 #ifdef HAVE_CONFIG_H
10 #include <config.h>
11 #endif /* HAVE_CONFIG_H */
12 #include <assert.h>
13 #include <limits.h>
14 
15 #include "tre-ast.h"
16 #include "tre-mem.h"
17 
18 tre_ast_node_t *
tre_ast_new_node(tre_mem_t mem,tre_ast_type_t type,size_t size)19 tre_ast_new_node(tre_mem_t mem, tre_ast_type_t type, size_t size)
20 {
21   tre_ast_node_t *node;
22 
23   node = tre_mem_calloc(mem, sizeof(*node));
24   if (!node)
25     return NULL;
26   node->obj = tre_mem_calloc(mem, size);
27   if (!node->obj)
28     return NULL;
29   node->type = type;
30   node->nullable = -1;
31   node->submatch_id = -1;
32 
33   return node;
34 }
35 
36 tre_ast_node_t *
tre_ast_new_literal(tre_mem_t mem,int code_min,int code_max,int position)37 tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position)
38 {
39   tre_ast_node_t *node;
40   tre_literal_t *lit;
41 
42   node = tre_ast_new_node(mem, LITERAL, sizeof(tre_literal_t));
43   if (!node)
44     return NULL;
45   lit = node->obj;
46   lit->code_min = code_min;
47   lit->code_max = code_max;
48   lit->position = position;
49 
50   return node;
51 }
52 
53 tre_ast_node_t *
tre_ast_new_iter(tre_mem_t mem,tre_ast_node_t * arg,int min,int max,int minimal)54 tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max,
55 		 int minimal)
56 {
57   tre_ast_node_t *node;
58   tre_iteration_t *iter;
59 
60   node = tre_ast_new_node(mem, ITERATION, sizeof(tre_iteration_t));
61   if (!node)
62     return NULL;
63   iter = node->obj;
64   iter->arg = arg;
65   iter->min = min;
66   iter->max = max;
67   iter->minimal = minimal;
68   node->num_submatches = arg->num_submatches;
69 
70   return node;
71 }
72 
73 tre_ast_node_t *
tre_ast_new_union(tre_mem_t mem,tre_ast_node_t * left,tre_ast_node_t * right)74 tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right)
75 {
76   tre_ast_node_t *node;
77 
78   node = tre_ast_new_node(mem, UNION, sizeof(tre_union_t));
79   if (node == NULL)
80     return NULL;
81   ((tre_union_t *)node->obj)->left = left;
82   ((tre_union_t *)node->obj)->right = right;
83   node->num_submatches = left->num_submatches + right->num_submatches;
84 
85   return node;
86 }
87 
88 tre_ast_node_t *
tre_ast_new_catenation(tre_mem_t mem,tre_ast_node_t * left,tre_ast_node_t * right)89 tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left,
90 		       tre_ast_node_t *right)
91 {
92   tre_ast_node_t *node;
93 
94   node = tre_ast_new_node(mem, CATENATION, sizeof(tre_catenation_t));
95   if (node == NULL)
96     return NULL;
97   ((tre_catenation_t *)node->obj)->left = left;
98   ((tre_catenation_t *)node->obj)->right = right;
99   // UBSAN warning in clang 10: signed integer overflow
100   double tmp = (double)left->num_submatches + (double)right->num_submatches;
101   if (tmp >= INT_MIN && tmp <= INT_MAX)
102       node->num_submatches = left->num_submatches + right->num_submatches;
103   else node->num_submatches = 0;
104 
105   return node;
106 }
107 
108 #ifdef TRE_DEBUG
109 
110 static void
tre_findent(FILE * stream,int i)111 tre_findent(FILE *stream, int i)
112 {
113   while (i-- > 0)
114     fputc(' ', stream);
115 }
116 
117 void
tre_print_params(int * params)118 tre_print_params(int *params)
119 {
120   int i;
121   if (params)
122     {
123       DPRINT(("params ["));
124       for (i = 0; i < TRE_PARAM_LAST; i++)
125 	{
126 	  if (params[i] == TRE_PARAM_UNSET)
127 	    DPRINT(("unset"));
128 	  else if (params[i] == TRE_PARAM_DEFAULT)
129 	    DPRINT(("default"));
130 	  else
131 	    DPRINT(("%d", params[i]));
132 	  if (i < TRE_PARAM_LAST - 1)
133 	    DPRINT((", "));
134 	}
135       DPRINT(("]"));
136     }
137 }
138 
139 static void
tre_do_print(FILE * stream,tre_ast_node_t * ast,int indent)140 tre_do_print(FILE *stream, tre_ast_node_t *ast, int indent)
141 {
142   int code_min, code_max, pos;
143   int num_tags = ast->num_tags;
144   tre_literal_t *lit;
145   tre_iteration_t *iter;
146 
147   tre_findent(stream, indent);
148   switch (ast->type)
149     {
150     case LITERAL:
151       lit = ast->obj;
152       code_min = lit->code_min;
153       code_max = lit->code_max;
154       pos = lit->position;
155       if (IS_EMPTY(lit))
156 	{
157 	  fprintf(stream, "literal empty\n");
158 	}
159       else if (IS_ASSERTION(lit))
160 	{
161 	  int i;
162 	  char *assertions[] = { "bol", "eol", "ctype", "!ctype",
163 				 "bow", "eow", "wb", "!wb" };
164 	  if (code_max >= ASSERT_LAST << 1)
165 	    assert(0);
166 	  fprintf(stream, "assertions: ");
167 	  for (i = 0; (1 << i) <= ASSERT_LAST; i++)
168 	    if (code_max & (1 << i))
169 	      fprintf(stream, "%s ", assertions[i]);
170 	  fprintf(stream, "\n");
171 	}
172       else if (IS_TAG(lit))
173 	{
174 	  fprintf(stream, "tag %d\n", code_max);
175 	}
176       else if (IS_BACKREF(lit))
177 	{
178 	  fprintf(stream, "backref %d, pos %d\n", code_max, pos);
179 	}
180       else if (IS_PARAMETER(lit))
181 	{
182 	  tre_print_params(lit->u.params);
183 	  fprintf(stream, "\n");
184 	}
185       else
186 	{
187 	  fprintf(stream, "literal (%c, %c) (%d, %d), pos %d, sub %d, "
188 		  "%d tags\n", code_min, code_max, code_min, code_max, pos,
189 		  ast->submatch_id, num_tags);
190 	}
191       break;
192     case ITERATION:
193       iter = ast->obj;
194       fprintf(stream, "iteration {%d, %d}, sub %d, %d tags, %s\n",
195 	      iter->min, iter->max, ast->submatch_id, num_tags,
196 	      iter->minimal ? "minimal" : "greedy");
197       tre_do_print(stream, iter->arg, indent + 2);
198       break;
199     case UNION:
200       fprintf(stream, "union, sub %d, %d tags\n", ast->submatch_id, num_tags);
201       tre_do_print(stream, ((tre_union_t *)ast->obj)->left, indent + 2);
202       tre_do_print(stream, ((tre_union_t *)ast->obj)->right, indent + 2);
203       break;
204     case CATENATION:
205       fprintf(stream, "catenation, sub %d, %d tags\n", ast->submatch_id,
206 	      num_tags);
207       tre_do_print(stream, ((tre_catenation_t *)ast->obj)->left, indent + 2);
208       tre_do_print(stream, ((tre_catenation_t *)ast->obj)->right, indent + 2);
209       break;
210     default:
211       assert(0);
212       break;
213     }
214 }
215 
216 static void
tre_ast_fprint(FILE * stream,tre_ast_node_t * ast)217 tre_ast_fprint(FILE *stream, tre_ast_node_t *ast)
218 {
219   tre_do_print(stream, ast, 0);
220 }
221 
222 void
tre_ast_print(tre_ast_node_t * tree)223 tre_ast_print(tre_ast_node_t *tree)
224 {
225   printf("AST:\n");
226   tre_ast_fprint(stdout, tree);
227 }
228 
229 #endif /* TRE_DEBUG */
230 
231 /* EOF */
232