1 // ================================================================
2 // Miller abstract syntax tree for put and filter.
3 // ================================================================
4 
5 #ifndef MLR_DSL_AST_H
6 #define MLR_DSL_AST_H
7 #include "../containers/sllv.h"
8 #include "../containers/type_decl.h"
9 
10 // ----------------------------------------------------------------
11 typedef enum _mlr_dsl_ast_node_type_t {
12 	MD_AST_NODE_TYPE_STATEMENT_BLOCK,
13 	MD_AST_NODE_TYPE_STATEMENT_LIST,
14 	MD_AST_NODE_TYPE_FUNC_DEF,
15 	MD_AST_NODE_TYPE_SUBR_DEF,
16 	MD_AST_NODE_TYPE_FUNCTION_CALLSITE,
17 	MD_AST_NODE_TYPE_INDEXED_FUNCTION_CALLSITE,
18 	MD_AST_NODE_TYPE_INDEXED_FUNCTION_INDEX_LIST,
19 	MD_AST_NODE_TYPE_SUBR_CALLSITE,
20 	MD_AST_NODE_TYPE_UNTYPED_LOCAL_DEFINITION,
21 	MD_AST_NODE_TYPE_NUMERIC_LOCAL_DEFINITION,
22 	MD_AST_NODE_TYPE_INT_LOCAL_DEFINITION,
23 	MD_AST_NODE_TYPE_FLOAT_LOCAL_DEFINITION,
24 	MD_AST_NODE_TYPE_BOOLEAN_LOCAL_DEFINITION,
25 	MD_AST_NODE_TYPE_STRING_LOCAL_DEFINITION,
26 	MD_AST_NODE_TYPE_MAP_LOCAL_DEFINITION,
27 	MD_AST_NODE_TYPE_UNTYPED_PARAMETER_DEFINITION,
28 	MD_AST_NODE_TYPE_NUMERIC_PARAMETER_DEFINITION,
29 	MD_AST_NODE_TYPE_INT_PARAMETER_DEFINITION,
30 	MD_AST_NODE_TYPE_FLOAT_PARAMETER_DEFINITION,
31 	MD_AST_NODE_TYPE_BOOLEAN_PARAMETER_DEFINITION,
32 	MD_AST_NODE_TYPE_STRING_PARAMETER_DEFINITION,
33 	MD_AST_NODE_TYPE_MAP_PARAMETER_DEFINITION,
34 	MD_AST_NODE_TYPE_RETURN_VALUE,
35 	MD_AST_NODE_TYPE_RETURN_VOID,
36 	MD_AST_NODE_TYPE_BEGIN,
37 	MD_AST_NODE_TYPE_END,
38 	MD_AST_NODE_TYPE_STRING_LITERAL,
39 	MD_AST_NODE_TYPE_NUMERIC_LITERAL,
40 	MD_AST_NODE_TYPE_BOOLEAN_LITERAL,
41 	MD_AST_NODE_TYPE_MAP_LITERAL,
42 	MD_AST_NODE_TYPE_MAP_LITERAL_PAIR,
43 	MD_AST_NODE_TYPE_MAP_LITERAL_KEY,
44 	MD_AST_NODE_TYPE_MAP_LITERAL_VALUE,
45 	MD_AST_NODE_TYPE_REGEXI,
46 	MD_AST_NODE_TYPE_FIELD_NAME, // E.g. value = $x
47 	MD_AST_NODE_TYPE_INDIRECT_FIELD_NAME, // E.g. value = $[@x]
48 	MD_AST_NODE_TYPE_POSITIONAL_SREC_NAME, // E.g. key = $[[3]]
49 	MD_AST_NODE_TYPE_FULL_SREC,
50 	MD_AST_NODE_TYPE_OOSVAR_KEYLIST,
51 	MD_AST_NODE_TYPE_FULL_OOSVAR,
52 	MD_AST_NODE_TYPE_NON_SIGIL_NAME,
53 	MD_AST_NODE_TYPE_OPERATOR,
54 	MD_AST_NODE_TYPE_NONINDEXED_LOCAL_ASSIGNMENT,
55 	MD_AST_NODE_TYPE_INDEXED_LOCAL_ASSIGNMENT,
56 	MD_AST_NODE_TYPE_SREC_ASSIGNMENT,
57 	MD_AST_NODE_TYPE_INDIRECT_SREC_ASSIGNMENT, // E.g. $[@y] = newvalue
58 	MD_AST_NODE_TYPE_POSITIONAL_SREC_NAME_ASSIGNMENT, // E.g. $[[3]] = newkey
59 	MD_AST_NODE_TYPE_OOSVAR_ASSIGNMENT,
60 	MD_AST_NODE_TYPE_OOSVAR_FROM_FULL_SREC_ASSIGNMENT,
61 	MD_AST_NODE_TYPE_FULL_OOSVAR_ASSIGNMENT,
62 	MD_AST_NODE_TYPE_FULL_OOSVAR_FROM_FULL_SREC_ASSIGNMENT,
63 	MD_AST_NODE_TYPE_FULL_SREC_ASSIGNMENT,
64 	MD_AST_NODE_TYPE_ENV_ASSIGNMENT,
65 	MD_AST_NODE_TYPE_CONTEXT_VARIABLE,
66 	MD_AST_NODE_TYPE_ENV,
67 	MD_AST_NODE_TYPE_STRIPPED_AWAY,
68 	MD_AST_NODE_TYPE_CONDITIONAL_BLOCK,
69 	MD_AST_NODE_TYPE_FILTER,
70 	MD_AST_NODE_TYPE_UNSET,
71 	MD_AST_NODE_TYPE_PIPE,
72 	MD_AST_NODE_TYPE_FILE_WRITE,
73 	MD_AST_NODE_TYPE_FILE_APPEND,
74 	MD_AST_NODE_TYPE_TEE,
75 	MD_AST_NODE_TYPE_EMITF,
76 	MD_AST_NODE_TYPE_EMITP,
77 	MD_AST_NODE_TYPE_EMIT,
78 	MD_AST_NODE_TYPE_EMITP_LASHED,
79 	MD_AST_NODE_TYPE_EMIT_LASHED,
80 	MD_AST_NODE_TYPE_DUMP,
81 	MD_AST_NODE_TYPE_EDUMP,
82 	MD_AST_NODE_TYPE_PRINT,
83 	MD_AST_NODE_TYPE_PRINTN,
84 	MD_AST_NODE_TYPE_EPRINT,
85 	MD_AST_NODE_TYPE_EPRINTN,
86 	MD_AST_NODE_TYPE_STDOUT,
87 	MD_AST_NODE_TYPE_STDERR,
88 	MD_AST_NODE_TYPE_STREAM,
89 	MD_AST_NODE_TYPE_ALL,
90 	MD_AST_NODE_TYPE_NOP, // only for parser internals; should not be in the AST returned by the parser
91 	MD_AST_NODE_TYPE_WHILE,
92 	MD_AST_NODE_TYPE_DO_WHILE,
93 	MD_AST_NODE_TYPE_FOR_SREC,
94 	MD_AST_NODE_TYPE_FOR_SREC_KEY_ONLY,
95 	MD_AST_NODE_TYPE_FOR_OOSVAR,
96 	MD_AST_NODE_TYPE_FOR_OOSVAR_KEY_ONLY,
97 	MD_AST_NODE_TYPE_FOR_LOCAL_MAP,
98 	MD_AST_NODE_TYPE_FOR_LOCAL_MAP_KEY_ONLY,
99 	MD_AST_NODE_TYPE_FOR_MAP_LITERAL,
100 	MD_AST_NODE_TYPE_FOR_MAP_LITERAL_KEY_ONLY,
101 	MD_AST_NODE_TYPE_FOR_FUNC_RETVAL,
102 	MD_AST_NODE_TYPE_FOR_FUNC_RETVAL_KEY_ONLY,
103 	MD_AST_NODE_TYPE_FOR_VARIABLES,
104 	MD_AST_NODE_TYPE_TRIPLE_FOR,
105 	MD_AST_NODE_TYPE_NONINDEXED_LOCAL_VARIABLE,
106 	MD_AST_NODE_TYPE_INDEXED_LOCAL_VARIABLE,
107 	MD_AST_NODE_TYPE_IN,
108 	MD_AST_NODE_TYPE_BREAK,
109 	MD_AST_NODE_TYPE_CONTINUE,
110 	MD_AST_NODE_TYPE_IF_HEAD,
111 	MD_AST_NODE_TYPE_IF_ITEM,
112 } mlr_dsl_ast_node_type_t;
113 
114 #define MD_UNUSED_INDEX -1000000000
115 
116 typedef struct _mlr_dsl_ast_node_t {
117 	char*                   text;
118 	mlr_dsl_ast_node_type_t type;
119 	sllv_t*                 pchildren;
120 
121 	// For bind-stack allocation only in local-var nodes: unused for any other node types.
122 	int vardef_subframe_relative_index; // pass 1 output: which index in subframe
123 	int vardef_subframe_index;          // pass 1 output: which subframe the variable is defined in
124 	int vardef_frame_relative_index;    // pass 2 output: index relative to full stack frame
125 
126 	// For bind-stack allocation only in statement-block nodes: unused for any other node types.
127 	int subframe_var_count;
128 	int max_subframe_depth;
129 	int max_var_depth;
130 
131 } mlr_dsl_ast_node_t;
132 
133 typedef struct _mlr_dsl_ast_t {
134 	mlr_dsl_ast_node_t* proot;
135 } mlr_dsl_ast_t;
136 
137 // ----------------------------------------------------------------
138 mlr_dsl_ast_t* mlr_dsl_ast_alloc();
139 
140 mlr_dsl_ast_node_t* mlr_dsl_ast_node_alloc(char* text, mlr_dsl_ast_node_type_t type);
141 
142 mlr_dsl_ast_node_t* mlr_dsl_ast_node_alloc_zary(char* text, mlr_dsl_ast_node_type_t type);
143 
144 mlr_dsl_ast_node_t* mlr_dsl_ast_node_alloc_unary(char* text, mlr_dsl_ast_node_type_t type, mlr_dsl_ast_node_t* pa);
145 
146 mlr_dsl_ast_node_t* mlr_dsl_ast_node_alloc_binary(char* text, mlr_dsl_ast_node_type_t type,
147 	mlr_dsl_ast_node_t* pa, mlr_dsl_ast_node_t* pb);
148 
149 mlr_dsl_ast_node_t* mlr_dsl_ast_node_alloc_ternary(char* text, mlr_dsl_ast_node_type_t type,
150 	mlr_dsl_ast_node_t* pa, mlr_dsl_ast_node_t* pb, mlr_dsl_ast_node_t* pc);
151 
152 mlr_dsl_ast_node_t* mlr_dsl_ast_node_alloc_quaternary(char* text, mlr_dsl_ast_node_type_t type,
153 	mlr_dsl_ast_node_t* pa, mlr_dsl_ast_node_t* pb, mlr_dsl_ast_node_t* pc, mlr_dsl_ast_node_t* pd);
154 
155 mlr_dsl_ast_node_t* mlr_dsl_ast_node_copy(mlr_dsl_ast_node_t* pother);
156 // These are so the parser can expand '$x += 1' to '$x = $x + 1', etc.
157 mlr_dsl_ast_node_t* mlr_dsl_ast_tree_copy(mlr_dsl_ast_node_t* pother);
158 
159 // See comments in mlr_dsl_parse.y for this seemingly awkward syntax wherein
160 // we change the function name after having set it up. This is a consequence of
161 // bottom-up DSL parsing.
162 mlr_dsl_ast_node_t* mlr_dsl_ast_node_prepend_arg(mlr_dsl_ast_node_t* pa, mlr_dsl_ast_node_t* pb);
163 mlr_dsl_ast_node_t* mlr_dsl_ast_node_append_arg(mlr_dsl_ast_node_t* pa, mlr_dsl_ast_node_t* pb);
164 mlr_dsl_ast_node_t* mlr_dsl_ast_node_append_arg_to_second_child(mlr_dsl_ast_node_t* pa, mlr_dsl_ast_node_t* pb);
165 mlr_dsl_ast_node_t* mlr_dsl_ast_node_set_function_name(mlr_dsl_ast_node_t* pa, char* name);
166 
167 void mlr_dsl_ast_node_replace_text(mlr_dsl_ast_node_t* pa, char* text);
168 
169 int mlr_dsl_ast_node_type_to_type_mask(mlr_dsl_ast_node_type_t type);
170 
171 int mlr_dsl_ast_node_cannot_be_bare_boolean(mlr_dsl_ast_node_t* pnode);
172 
173 void mlr_dsl_ast_print(mlr_dsl_ast_t* past);
174 void mlr_dsl_ast_node_print(mlr_dsl_ast_node_t* pnode);
175 void mlr_dsl_ast_node_fprint(mlr_dsl_ast_node_t* pnode, FILE* o);
176 void mlr_dsl_ast_node_pretty_fprint(mlr_dsl_ast_node_t* pnode, FILE* o);
177 char* mlr_dsl_ast_node_describe_type(mlr_dsl_ast_node_type_t type);
178 
179 void mlr_dsl_ast_node_free(mlr_dsl_ast_node_t* pnode);
180 
181 void mlr_dsl_ast_free(mlr_dsl_ast_t* past);
182 
183 #endif // MLR_DSL_AST_H
184