1 #ifndef MLR_DSL_CST_H 2 #define MLR_DSL_CST_H 3 4 #include "cli/mlrcli.h" 5 #include "lib/context.h" 6 #include "containers/lhmsmv.h" 7 #include "containers/local_stack.h" 8 #include "containers/loop_stack.h" 9 #include "containers/type_decl.h" 10 #include "dsl/mlr_dsl_ast.h" 11 #include "dsl/mlr_dsl_blocked_ast.h" 12 #include "dsl/rval_evaluators.h" 13 #include "dsl/rxval_evaluators.h" 14 #include "dsl/function_manager.h" 15 #include "output/multi_out.h" 16 #include "output/multi_lrec_writer.h" 17 18 // ================================================================ 19 // Concrete syntax tree (CST) derived from an abstract syntax tree (AST). 20 // 21 // Statements are of the form: 22 // 23 // * Assignment of mlrval (i.e. result of expression evaluation, e.g. $name or f($x,$y)) to oosvar (out-of-stream 24 // variables, prefixed with @ sigil) 25 // 26 // * Assignment to srec (in-stream records, with field names prefixed with $ sigil) 27 // 28 // * Copying full srec ($* syntax) to/from an oosvar 29 // 30 // * Oosvar-to-oosvar assignments (recursively if RHS is non-terminal) 31 // 32 // * pattern-action statements: boolean expression with curly-braced statements which are executed only 33 // when the boolean evaluates to true. 34 // 35 // * bare-boolean statements: no-ops unless they have side effects: namely, the matches/does-not-match 36 // operators =~ and !=~ setting regex captures \1, \2, etc. 37 // 38 // * emit statements: these place oosvar key-value pairs into the output stream. These can be of the following forms: 39 // 40 // o 'emit @a; emit @b' which produce separate records such as a=3 and b=4 41 // 42 // o 'emitf @a, @b' which produce records such as a=3,b=4 43 // 44 // o For nested maps, 'emit @c, "x", "y"' in which case the first two map levels are pulled out and named "x" and "y" 45 // in separate fields. See containers/mlhmmv.h for more information. 46 // 47 // Further, these statements are organized into three groups: 48 // 49 // * begin: executed once, before the first input record is read. 50 // * main: executed for each input record. 51 // * end: executed once, after the last input record is read. 52 // 53 // The exceptions being, of course, assignment to/from srec is disallowed for begin/end statements since those occur 54 // before/after stream processing, respectively. 55 // ================================================================ 56 57 // ---------------------------------------------------------------- 58 // dsl/mlr_dsl_stack_allocate.c 59 // Two-pass stack allocator which operates on the block-structured AST 60 // before the CST is build (mlr_dsl_stack_allocate.c). 61 void blocked_ast_allocate_locals(blocked_ast_t* paast, int trace); 62 63 // ---------------------------------------------------------------- 64 // Forward references for virtual-function prototypes 65 struct _mlr_dsl_cst_t; 66 struct _mlr_dsl_cst_statement_t; 67 struct _subr_defsite_t; 68 69 // Parameter bag to reduce parameter-marshaling 70 typedef struct _cst_outputs_t { 71 int* pshould_emit_rec; 72 sllv_t* poutrecs; 73 char* oosvar_flatten_separator; 74 cli_writer_opts_t* pwriter_opts; 75 } cst_outputs_t; 76 77 // ---------------------------------------------------------------- 78 typedef struct _cst_statement_block_t { 79 int subframe_var_count; 80 sllv_t* pstatements; 81 } cst_statement_block_t; 82 83 cst_statement_block_t* cst_statement_block_alloc(int subframe_var_count); 84 void cst_statement_block_free(cst_statement_block_t* pblock, context_t* pctx); 85 86 // ---------------------------------------------------------------- 87 typedef struct _cst_top_level_statement_block_t { 88 local_stack_frame_t* pframe; 89 int max_var_depth; 90 cst_statement_block_t* pblock; 91 } cst_top_level_statement_block_t; 92 93 cst_top_level_statement_block_t* cst_top_level_statement_block_alloc(int max_var_depth, int subframe_var_count); 94 void cst_top_level_statement_block_free(cst_top_level_statement_block_t* pblock, context_t* pctx); 95 96 // ---------------------------------------------------------------- 97 // Generic handler for a statement. 98 99 // Handler for statement lists: begin/main/end; cond/if/for/while/do-while. 100 typedef void mlr_dsl_cst_block_handler_t( 101 cst_statement_block_t* pblock, 102 variables_t* pvars, 103 cst_outputs_t* pcst_outputs); 104 105 // ---------------------------------------------------------------- 106 // mlr_dsl_cst_statement_t is a base class extended by all manner of subclasses. 107 // The following are for their method pointers. 108 typedef struct _mlr_dsl_cst_statement_t* mlr_dsl_cst_statement_allocator_t( 109 struct _mlr_dsl_cst_t* pcst, 110 mlr_dsl_ast_node_t* pnode, 111 int type_inferencing, 112 int context_flags); 113 114 typedef void mlr_dsl_cst_statement_handler_t( 115 struct _mlr_dsl_cst_statement_t* pstatement, 116 variables_t* pvars, 117 cst_outputs_t* pcst_outputs); 118 119 typedef void mlr_dsl_cst_statement_freer_t( 120 struct _mlr_dsl_cst_statement_t* pstatement, 121 context_t* pctx); 122 123 // ---------------------------------------------------------------- 124 // MLR_DSL_CST_STATEMENT OBJECT 125 126 typedef struct _mlr_dsl_cst_statement_t { 127 128 // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 129 // Common to most or all statement types: 130 131 // For trace-mode. 132 mlr_dsl_ast_node_t* past_node; 133 134 // Function-pointer for the handler of the given statement type, e.g. srec-assignment, while-loop, etc. 135 mlr_dsl_cst_statement_handler_t* pstatement_handler; 136 137 // Subclass destructor. It should free whatever's in the pvstate but it should not 138 // free the pstatement itself. 139 mlr_dsl_cst_statement_freer_t* pstatement_freer; 140 141 // The reason for this being a function pointer is that there are two variants of 142 // statement-list handlers: one for inside loop bodies which has to check 143 // break/continue flags after each statement, and another for outside loop bodies 144 // which doesn't need to check those. (This is a micro-optimization.) For bodyless 145 // statements (e.g. assignment) this is null. 146 cst_statement_block_t* pblock; 147 mlr_dsl_cst_block_handler_t* pblock_handler; 148 149 // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 150 // Specific to each statement type: 151 152 void* pvstate; 153 154 } mlr_dsl_cst_statement_t; 155 156 // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 157 // For use by the statement-subclass constructors 158 159 mlr_dsl_cst_statement_t* mlr_dsl_cst_statement_valloc( 160 mlr_dsl_ast_node_t* past_node, 161 mlr_dsl_cst_statement_handler_t* pstatement_handler, 162 mlr_dsl_cst_statement_freer_t* pstatement_freer, 163 void* pvstate); 164 165 mlr_dsl_cst_statement_t* mlr_dsl_cst_statement_valloc_with_block( 166 mlr_dsl_ast_node_t* past_node, 167 mlr_dsl_cst_statement_handler_t* pstatement_handler, 168 cst_statement_block_t* pblock, 169 mlr_dsl_cst_block_handler_t* pblock_handler, 170 mlr_dsl_cst_statement_freer_t* pstatement_freer, 171 void* pvstate); 172 173 // ---------------------------------------------------------------- 174 // MLR_DSL_CST OBJECT 175 176 typedef struct _mlr_dsl_cst_t { 177 sllv_t* pbegin_blocks; 178 cst_top_level_statement_block_t* pmain_block; 179 sllv_t* pend_blocks; 180 181 // Function manager for built-in functions as well as user-defined functions (which are CST-specific). 182 fmgr_t* pfmgr; 183 184 // Subroutine bodies 185 lhmsv_t* psubr_defsites; 186 187 // Subroutine callsites, used to bootstrap (e.g. subroutine f calls subroutine g before the latter 188 // has been defined). 189 sllv_t* psubr_callsite_statements_to_resolve; 190 191 // fflush on emit/tee/print/dump 192 int flush_every_record; 193 194 // The CST object retains the AST pointer (in order to reuse its strings etc. with minimal copying) 195 // and will free the AST in the CST destructor. 196 blocked_ast_t* paast; 197 } mlr_dsl_cst_t; 198 199 // ---------------------------------------------------------------- 200 // CONSTRUCTORS/DESTRUCTORS/METHODS 201 202 // Notes: 203 // * do_final_filter is FALSE for mlr put, TRUE for mlr filter. 204 // * negate_final_filter is TRUE for mlr filter -x. 205 // * The CST object strips nodes off the raw AST, constructed by the Lemon parser, in order 206 // to do analysis on it. Nonetheless the caller should free what's left. 207 mlr_dsl_cst_t* mlr_dsl_cst_alloc(mlr_dsl_ast_t* past, int print_ast, int trace_stack_allocation, 208 int type_inferencing, int flush_every_record, int do_final_filter, int negate_final_filter); 209 210 mlr_dsl_cst_statement_t* mlr_dsl_cst_alloc_statement(mlr_dsl_cst_t* pcst, mlr_dsl_ast_node_t* pnode, 211 int type_inferencing, int context_flags); 212 213 mlr_dsl_cst_statement_t* mlr_dsl_cst_alloc_final_filter_statement(mlr_dsl_cst_t* pcst, 214 mlr_dsl_ast_node_t* pnode, int negate_final_filter, int type_inferencing, int context_flags); 215 216 void mlr_dsl_cst_free(mlr_dsl_cst_t* pcst, context_t* pctx); 217 void mlr_dsl_cst_statement_free(mlr_dsl_cst_statement_t* pstatement, context_t* pctx); 218 219 // Top-level entry point, e.g. from mapper_put. 220 void mlr_dsl_cst_handle_top_level_statement_blocks( 221 sllv_t* ptop_level_blocks, // block bodies for begins, main, ends 222 variables_t* pvars, 223 cst_outputs_t* pcst_outputs); 224 225 void mlr_dsl_cst_handle_top_level_statement_block( 226 cst_top_level_statement_block_t* ptop_level_block, 227 variables_t* pvars, 228 cst_outputs_t* pcst_outputs); 229 230 // Recursive entry point: block bodies for begin, main, end; cond, if, for, while. 231 void mlr_dsl_cst_handle_statement_block( 232 cst_statement_block_t* pblock, 233 variables_t* pvars, 234 cst_outputs_t* pcst_outputs); 235 236 void mlr_dsl_cst_handle_statement_block_with_break_continue( 237 cst_statement_block_t* pblock, 238 variables_t* pvars, 239 cst_outputs_t* pcst_outputs); 240 241 // Statement lists which are not curly-braced bodies: start/continuation/update statements for triple-for. 242 void mlr_dsl_cst_handle_statement_list( 243 sllv_t* pstatements, 244 variables_t* pvars, 245 cst_outputs_t* pcst_outputs); 246 247 // ================================================================ 248 // dsl/mlr_dsl_cst_func_subr.c 249 250 // ---------------------------------------------------------------- 251 // cst_udf_state_t is data needed to execute the body of a user-defined function which is implemented by CST statements. 252 // udf_defsite_state_t is data needed for any user-defined function (no matter how implemented). 253 typedef struct _cst_udf_state_t { 254 char* name; 255 int arity; 256 char** parameter_names; 257 int* parameter_type_masks; 258 cst_top_level_statement_block_t* ptop_level_block; 259 char* return_value_type_name; 260 int return_value_type_mask; 261 } cst_udf_state_t; 262 263 udf_defsite_state_t* mlr_dsl_cst_alloc_udf( 264 mlr_dsl_cst_t* pcst, 265 mlr_dsl_ast_node_t* pnode, 266 int type_inferencing, 267 int context_flags); 268 269 void mlr_dsl_cst_free_udf(cst_udf_state_t* pstate, context_t* pctx); 270 271 // ---------------------------------------------------------------- 272 273 typedef struct _subr_callsite_t { 274 char* name; 275 int arity; 276 int type_inferencing; 277 int context_flags; 278 } subr_callsite_t; 279 280 typedef struct _subr_defsite_t { 281 char* name; 282 int arity; 283 char** parameter_names; 284 int* parameter_type_masks; 285 cst_top_level_statement_block_t* ptop_level_block; 286 } subr_defsite_t; 287 288 subr_defsite_t* mlr_dsl_cst_alloc_subroutine( 289 mlr_dsl_cst_t* pcst, 290 mlr_dsl_ast_node_t* pnode, 291 int type_inferencing, 292 int context_flags); 293 294 void mlr_dsl_cst_free_subroutine(subr_defsite_t* psubr_defsite, context_t* pctx); 295 296 // Invoked directly from the CST statement handler for a subroutine callsite. 297 // (Functions, by contrast, are invoked by callback from the right-hand-site-evaluator logic 298 // -- hence no execute-function method here.) 299 void mlr_dsl_cst_execute_subroutine(subr_defsite_t* pstate, variables_t* pvars, 300 cst_outputs_t* pcst_outputs, int callsite_arity, boxed_xval_t* args); 301 302 // ================================================================ 303 // For on-line help / manpage 304 // dsl/mlr_dsl_cst_keywords.c 305 306 void mlr_dsl_list_all_keywords_raw(FILE* output_stream); 307 308 // Pass function_name == NULL to get usage for all keywords: 309 void mlr_dsl_keyword_usage(FILE* output_stream, char* keyword); 310 311 // ================================================================ 312 // Specific CST-statement subclasses 313 314 // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 315 // dsl/mlr_dsl_cst_condish_statements.c 316 mlr_dsl_cst_statement_allocator_t alloc_conditional_block; 317 mlr_dsl_cst_statement_allocator_t alloc_if_head; 318 mlr_dsl_cst_statement_allocator_t alloc_while; 319 mlr_dsl_cst_statement_allocator_t alloc_do_while; 320 mlr_dsl_cst_statement_allocator_t alloc_bare_boolean; 321 322 mlr_dsl_cst_statement_t* alloc_filter( 323 mlr_dsl_cst_t* pcst, 324 mlr_dsl_ast_node_t* pnode, 325 int type_inferencing, 326 int context_flags); 327 328 mlr_dsl_cst_statement_t* alloc_final_filter( 329 mlr_dsl_cst_t* pcst, 330 mlr_dsl_ast_node_t* pnode, 331 int negate_final_filter, 332 int type_inferencing, 333 int context_flags); 334 335 // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 336 // dsl/mlr_dsl_cst_terminal_assignment_statements.c 337 mlr_dsl_cst_statement_allocator_t alloc_srec_assignment; 338 mlr_dsl_cst_statement_allocator_t alloc_indirect_srec_assignment; 339 mlr_dsl_cst_statement_allocator_t alloc_positional_srec_name_assignment; 340 mlr_dsl_cst_statement_allocator_t alloc_env_assignment; 341 342 // dsl/mlr_dsl_cst_map_assignment_statements.c 343 mlr_dsl_cst_statement_allocator_t alloc_full_srec_assignment; 344 mlr_dsl_cst_statement_t* alloc_local_variable_definition( 345 mlr_dsl_cst_t* pcst, 346 mlr_dsl_ast_node_t* pnode, 347 int type_inferencing, 348 int context_flags, 349 int type_mask); 350 mlr_dsl_cst_statement_allocator_t alloc_nonindexed_local_variable_assignment; 351 mlr_dsl_cst_statement_allocator_t alloc_indexed_local_variable_assignment; 352 mlr_dsl_cst_statement_allocator_t alloc_oosvar_assignment; 353 mlr_dsl_cst_statement_allocator_t alloc_full_oosvar_assignment; 354 355 // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 356 // dsl/mlr_dsl_cst_unset_statements.c 357 mlr_dsl_cst_statement_allocator_t alloc_unset; 358 359 // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 360 // dsl/mlr_dsl_cst_for_srec_statements.c 361 mlr_dsl_cst_statement_allocator_t alloc_for_srec; 362 mlr_dsl_cst_statement_allocator_t alloc_for_srec_key_only; 363 364 // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 365 // dsl/mlr_dsl_cst_for_map_statements.c 366 mlr_dsl_cst_statement_allocator_t alloc_for_map; 367 mlr_dsl_cst_statement_allocator_t alloc_for_map_key_only; 368 369 // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 370 // dsl/mlr_dsl_cst_triple_for_statements.c 371 mlr_dsl_cst_statement_allocator_t alloc_triple_for; 372 373 // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 374 // dsl/mlr_dsl_cst_loop_control_statements.c 375 mlr_dsl_cst_statement_allocator_t alloc_break; 376 mlr_dsl_cst_statement_allocator_t alloc_continue; 377 378 // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 379 // dsl/mlr_dsl_cst_return_statements.c 380 mlr_dsl_cst_statement_allocator_t alloc_return_void; // For subroutines 381 mlr_dsl_cst_statement_allocator_t alloc_return_value; // For UDFs 382 383 // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 384 // dsl/mlr_dsl_cst_output_statements.c 385 386 mlr_dsl_cst_statement_t* alloc_print( 387 mlr_dsl_cst_t* pcst, 388 mlr_dsl_ast_node_t* pnode, 389 int type_inferencing, 390 int context_flags, 391 char* print_terminator); 392 393 mlr_dsl_cst_statement_allocator_t alloc_tee; 394 395 mlr_dsl_cst_statement_allocator_t alloc_emitf; 396 397 mlr_dsl_cst_statement_t* alloc_emit( 398 mlr_dsl_cst_t* pcst, 399 mlr_dsl_ast_node_t* pnode, 400 int type_inferencing, 401 int context_flags, 402 int do_full_prefixing); 403 404 mlr_dsl_cst_statement_t* alloc_emit_lashed( 405 mlr_dsl_cst_t* pcst, 406 mlr_dsl_ast_node_t* pnode, 407 int type_inferencing, 408 int context_flags, 409 int do_full_prefixing); 410 411 mlr_dsl_cst_statement_allocator_t alloc_dump; 412 413 // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 414 // dsl/mlr_dsl_cst_func_subr.c 415 416 // When we allocate a callsite we can do so before the callee has been defined. 417 // Hence the two-step process, with the second step being an object-binding step. 418 mlr_dsl_cst_statement_allocator_t alloc_subr_callsite_statement; 419 void mlr_dsl_cst_resolve_subr_callsite(mlr_dsl_cst_t* pcst, mlr_dsl_cst_statement_t* pstatement); 420 421 #endif // MLR_DSL_CST_H 422