1 /*
2 * profile.c - gawk bytecode pretty-printer with counts
3 */
4
5 /*
6 * Copyright (C) 1999-2021 the Free Software Foundation, Inc.
7 *
8 * This file is part of GAWK, the GNU implementation of the
9 * AWK Programming Language.
10 *
11 * GAWK is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 3 of the License, or
14 * (at your option) any later version.
15 *
16 * GAWK is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24 */
25
26 #include "awk.h"
27
28 static void pprint(INSTRUCTION *startp, INSTRUCTION *endp, int flags);
29 static INSTRUCTION *end_line(INSTRUCTION *ip);
30 static void pp_parenthesize(NODE *n);
31 static void parenthesize(int type, NODE *left, NODE *right);
32 static char *pp_list(int nargs, const char *paren, const char *delim);
33 static char *pp_group3(const char *s1, const char *s2, const char *s3);
34 static char *pp_concat(int nargs);
35 static char *pp_string_or_typed_regex(const char *in_str, size_t len, int delim, bool typed_regex);
36 static char *pp_typed_regex(const char *in_str, size_t len, int delim);
37 static bool is_binary(int type);
38 static bool is_scalar(int type);
39 static int prec_level(int type);
40 static void pp_push(int type, char *s, int flag, INSTRUCTION *comment);
41 static NODE *pp_pop(void);
42 static void print_comment(INSTRUCTION *pc, long in);
43 const char *redir2str(int redirtype);
44 static void pp_namespace(const char *name, INSTRUCTION *comment);
45 static void pp_namespace_list(INSTRUCTION *list);
46 static char *adjust_namespace(char *name, bool *malloced);
47
48 #define pp_str vname
49 #define pp_len sub.nodep.reserved
50 #define pp_next rnode
51 #define pp_comment sub.nodep.x.cmnt
52
53 #define DONT_FREE 1
54 #define CAN_FREE 2
55
56 static void dump_and_exit(int signum) ATTRIBUTE_NORETURN;
57 static void just_dump(int signum);
58
59 /* pretty printing related functions and variables */
60
61 static NODE *pp_stack = NULL;
62 static NODE *func_params; /* function parameters */
63 static FILE *prof_fp; /* where to send the profile */
64
65 static long indent_level = 0;
66
67 static const char tabs[] = "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t";
68 static const size_t tabs_len = sizeof(tabs) - 1;
69
70 #define check_indent_level() \
71 if (indent_level + 1 > tabs_len) \
72 /* We're allowed to be snarky, occasionally. */ \
73 fatal(_("Program indentation level too deep. Consider refactoring your code"));
74
75
76 #define SPACEOVER 0
77
78 #define NO_PPRINT_FLAGS 0
79 #define IN_FOR_HEADER 1
80 #define IN_ELSE_IF 2
81
82 /* set_prof_file --- set the output file for profiling or pretty-printing */
83
84 void
set_prof_file(const char * file)85 set_prof_file(const char *file)
86 {
87 int fd;
88
89 assert(file != NULL);
90 fd = devopen_simple(file, "w", true);
91 if (fd == INVALID_HANDLE)
92 prof_fp = NULL;
93 else if (fd == fileno(stdout))
94 prof_fp = stdout;
95 else if (fd == fileno(stderr))
96 prof_fp = stderr;
97 else
98 prof_fp = fdopen(fd, "w");
99
100 if (prof_fp == NULL) {
101 /* don't leak file descriptors */
102 int e = errno;
103
104 if ( fd != INVALID_HANDLE
105 && fd != fileno(stdout)
106 && fd != fileno(stderr))
107 (void) close(fd);
108
109 errno = e;
110 warning(_("could not open `%s' for writing: %s"),
111 file, strerror(errno));
112 warning(_("sending profile to standard error"));
113 prof_fp = stderr;
114 }
115 }
116
117 /* init_profiling_signals --- set up signal handling for gawk --profile */
118
119 void
init_profiling_signals()120 init_profiling_signals()
121 {
122 #ifdef __DJGPP__
123 signal(SIGINT, dump_and_exit);
124 signal(SIGQUIT, just_dump);
125 #else /* !__DJGPP__ */
126 #ifdef SIGHUP
127 signal(SIGHUP, dump_and_exit);
128 #endif
129 #ifdef SIGUSR1
130 signal(SIGUSR1, just_dump);
131 #endif
132 #endif /* !__DJGPP__ */
133 }
134
135 /* indent --- print out enough tabs */
136
137 static void
indent(exec_count_t count)138 indent(exec_count_t count)
139 {
140 int i;
141
142 if (do_profile) {
143 if (count == 0)
144 fprintf(prof_fp, "\t");
145 else
146 fprintf(prof_fp, EXEC_COUNT_PROFILE_FMT " ", count);
147 }
148
149 assert(indent_level >= 0);
150 for (i = 0; i < indent_level; i++)
151 fprintf(prof_fp, "\t");
152 }
153
154 /* indent_in --- increase the level, with error checking */
155
156 static void
indent_in(void)157 indent_in(void)
158 {
159 assert(indent_level >= 0);
160 indent_level++;
161 }
162
163 /* indent_out --- decrease the level, with error checking */
164
165 static void
indent_out(void)166 indent_out(void)
167 {
168 indent_level--;
169 assert(indent_level >= 0);
170 }
171
172 /* pp_push --- push a pretty printed string onto the stack */
173
174 static void
pp_push(int type,char * s,int flag,INSTRUCTION * comment)175 pp_push(int type, char *s, int flag, INSTRUCTION *comment)
176 {
177 NODE *n;
178 getnode(n);
179 n->pp_str = s;
180 n->pp_len = strlen(s);
181 n->flags = flag;
182 n->type = (NODETYPE) type;
183 n->pp_next = pp_stack;
184 n->pp_comment = comment;
185 pp_stack = n;
186 }
187
188 /* pp_pop --- pop a pretty printed string off the stack */
189
190 static NODE *
pp_pop()191 pp_pop()
192 {
193 NODE *n;
194 n = pp_stack;
195 pp_stack = n->pp_next;
196 return n;
197 }
198
199 /* pp_top --- look at what's on the top of the stack */
200
201 #define pp_top() pp_stack
202
203 /* pp_free --- release a pretty printed node */
204
205 static void
pp_free(NODE * n)206 pp_free(NODE *n)
207 {
208 if ((n->flags & CAN_FREE) != 0)
209 efree(n->pp_str);
210 freenode(n);
211 }
212
213 /* pprint --- pretty print a program segment */
214
215 static void
pprint(INSTRUCTION * startp,INSTRUCTION * endp,int flags)216 pprint(INSTRUCTION *startp, INSTRUCTION *endp, int flags)
217 {
218 INSTRUCTION *pc;
219 NODE *t1;
220 char *str;
221 NODE *t2;
222 INSTRUCTION *ip1;
223 INSTRUCTION *ip2;
224 NODE *m;
225 char *tmp;
226 int rule;
227 static int rule_count[MAXRULE];
228 static bool skip_comment = false;
229
230 for (pc = startp; pc != endp; pc = pc->nexti) {
231 if (pc->source_line > 0)
232 sourceline = pc->source_line;
233
234 /* skip leading EOL comment as it has already been printed */
235 if (pc->opcode == Op_comment
236 && pc->memory->comment_type == EOL_COMMENT
237 && skip_comment) {
238 skip_comment = false;
239 continue;
240 }
241 skip_comment = false;
242
243 switch (pc->opcode) {
244 case Op_rule:
245 /*
246 * Rules are four instructions long.
247 * See append_rule in awkgram.y.
248 * The first has the Rule Op Code, nexti etc.
249 * The second, (pc + 1) has firsti and lasti:
250 * the first/last ACTION instructions for this rule.
251 * The third has first_line and last_line:
252 * the first and last source line numbers.
253 * The fourth holds the namespace name if there is one.
254 * (there should be one if we're in this file)
255 * This can actually be a list in reverse order if
256 * there were several @namespace directives one
257 * after the other.
258 */
259 source = pc->source_file;
260 rule = pc->in_rule;
261
262 pp_namespace_list(pc[3].nexti);
263
264 if (rule != Rule) {
265 /* Allow for pre-non-rule-block comment */
266 if (pc->nexti != (pc+1)->firsti
267 && pc->nexti->opcode == Op_comment
268 && pc->nexti->memory->comment_type == BLOCK_COMMENT)
269 print_comment(pc->nexti, -1);
270 ip1 = (pc + 1)->firsti;
271 ip2 = (pc + 1)->lasti;
272
273 if (do_profile) {
274 if (! rule_count[rule]++)
275 fprintf(prof_fp, _("\t# %s rule(s)\n\n"), ruletab[rule]);
276 indent(0);
277 }
278 fprintf(prof_fp, "%s {", ruletab[rule]);
279 end_line(pc);
280 skip_comment = true;
281 } else {
282 if (do_profile && ! rule_count[rule]++)
283 fprintf(prof_fp, _("\t# Rule(s)\n\n"));
284 ip1 = pc->nexti;
285 indent(ip1->exec_count);
286 if (ip1 != (pc + 1)->firsti) { /* non-empty pattern */
287 pprint(ip1->nexti, (pc + 1)->firsti, NO_PPRINT_FLAGS);
288 /* Allow for case where the "pattern" is just a comment */
289 if (ip1->nexti->nexti->nexti != (pc +1)->firsti
290 || ip1->nexti->opcode != Op_comment) {
291 t1 = pp_pop();
292 fprintf(prof_fp, "%s {", t1->pp_str);
293 pp_free(t1);
294 } else
295 fprintf(prof_fp, "{");
296 ip1 = (pc + 1)->firsti;
297 ip2 = (pc + 1)->lasti;
298
299 if (do_profile && ip1->exec_count > 0)
300 fprintf(prof_fp, " # " EXEC_COUNT_FMT, ip1->exec_count);
301
302 end_line(ip1);
303 skip_comment = true;
304 } else {
305 fprintf(prof_fp, "{\n");
306 ip1 = (pc + 1)->firsti;
307 ip2 = (pc + 1)->lasti;
308 }
309 ip1 = ip1->nexti;
310 }
311 indent_in();
312 pprint(ip1, ip2, NO_PPRINT_FLAGS);
313 indent_out();
314 if (do_profile)
315 indent(0);
316 fprintf(prof_fp, "}\n\n");
317 pc = (pc + 1)->lasti;
318 break;
319
320 case Op_atexit:
321 break;
322
323 case Op_stop:
324 memset(rule_count, 0, MAXRULE * sizeof(int));
325 break;
326
327 case Op_push_i:
328 m = pc->memory;
329 if (m == Nnull_string) /* optional return or exit value; don't print 0 or "" */
330 pp_push(pc->opcode, m->stptr, DONT_FREE, pc->comment);
331 else if ((m->flags & NUMBER) != 0)
332 pp_push(pc->opcode, pp_number(m), CAN_FREE, pc->comment);
333 else {
334 str = pp_string(m->stptr, m->stlen, '"');
335 if ((m->flags & INTLSTR) != 0) {
336 char *tmp = str;
337 str = pp_group3("_", tmp, "");
338 efree(tmp);
339 }
340 pp_push(pc->opcode, str, CAN_FREE, pc->comment);
341 }
342 break;
343
344 case Op_store_var:
345 if (pc->initval != NULL)
346 pp_push(Op_push_i, pp_node(pc->initval), CAN_FREE, pc->comment);
347 /* fall through */
348 case Op_store_sub:
349 case Op_assign_concat:
350 case Op_push_lhs:
351 case Op_push_param:
352 case Op_push_array:
353 case Op_push:
354 case Op_push_arg:
355 case Op_push_arg_untyped:
356 m = pc->memory;
357 switch (m->type) {
358 case Node_param_list:
359 pp_push(pc->opcode, func_params[m->param_cnt].param, DONT_FREE, pc->comment);
360 break;
361
362 case Node_var:
363 case Node_var_new:
364 case Node_var_array:
365 if (m->vname != NULL) {
366 bool malloced = false;
367 char *name = adjust_namespace(m->vname, & malloced);
368
369 pp_push(pc->opcode, name, malloced ? CAN_FREE : DONT_FREE, pc->comment);
370 } else
371 fatal(_("internal error: %s with null vname"),
372 nodetype2str(m->type));
373 break;
374
375 default:
376 fprintf(stderr, "Got unexpected type %s\n", nodetype2str(m->type));
377 cant_happen();
378 }
379
380 switch (pc->opcode) {
381 case Op_store_var:
382 t2 = pp_pop(); /* l.h.s. */
383 t1 = pp_pop(); /* r.h.s. */
384 fprintf(prof_fp, "%s%s%s", t2->pp_str, op2str(pc->opcode), t1->pp_str);
385 goto cleanup;
386
387 case Op_store_sub:
388 t1 = pp_pop(); /* array */
389 tmp = pp_list(pc->expr_count, op2str(Op_subscript), ", "); /*subscript*/
390 t2 = pp_pop(); /* r.h.s. */
391 fprintf(prof_fp, "%s%s%s%s", t1->pp_str, tmp,
392 op2str(pc->opcode), t2->pp_str);
393 efree(tmp);
394 goto cleanup;
395
396 case Op_assign_concat:
397 t2 = pp_pop(); /* l.h.s. */
398 t1 = pp_pop();
399 tmp = pp_group3(t2->pp_str, op2str(Op_concat), t1->pp_str);
400 fprintf(prof_fp, "%s%s%s", t2->pp_str, op2str(Op_assign), tmp);
401 efree(tmp);
402 cleanup:
403 pp_free(t2);
404 pp_free(t1);
405 if ((flags & IN_FOR_HEADER) == 0)
406 pc = end_line(pc);
407 break;
408
409 default:
410 break;
411 }
412 break;
413
414 case Op_sub_array:
415 case Op_subscript_lhs:
416 case Op_subscript:
417 tmp = pp_list(pc->sub_count, op2str(pc->opcode), ", ");
418 t1 = pp_pop();
419 str = pp_group3(t1->pp_str, tmp, "");
420 efree(tmp);
421 pp_free(t1);
422 pp_push(pc->opcode, str, CAN_FREE, pc->comment);
423 break;
424
425 case Op_and:
426 case Op_or:
427 pprint(pc->nexti, pc->target_jmp, flags);
428 t2 = pp_pop();
429 t1 = pp_pop();
430 parenthesize(pc->opcode, t1, t2);
431 if (pc->comment == NULL)
432 str = pp_group3(t1->pp_str, op2str(pc->opcode), t2->pp_str);
433 else {
434 check_indent_level();
435
436 size_t len = strlen(t1->pp_str)
437 + strlen(op2str(pc->opcode)) + strlen(t2->pp_str) // foo && bar
438 + indent_level + 1 // indent
439 + pc->comment->memory->stlen + 3; // tab comment
440
441 emalloc(str, char *, len, "pprint");
442 sprintf(str, "%s%s%s%.*s %s", t1->pp_str, op2str(pc->opcode),
443 pc->comment->memory->stptr,
444 (int) (indent_level + 1), tabs, t2->pp_str);
445 }
446 pp_free(t1);
447 pp_free(t2);
448 pp_push(pc->opcode, str, CAN_FREE, pc->comment);
449 pc = pc->target_jmp;
450 break;
451
452 case Op_plus_i:
453 case Op_minus_i:
454 case Op_times_i:
455 case Op_exp_i:
456 case Op_quotient_i:
457 case Op_mod_i:
458 m = pc->memory;
459 t1 = pp_pop();
460 if (prec_level(pc->opcode) > prec_level(t1->type)
461 && is_binary(t1->type)) /* (a - b) * 1 */
462 pp_parenthesize(t1);
463 if ((m->flags & NUMBER) != 0)
464 tmp = pp_number(m);
465 else
466 tmp = pp_string(m->stptr, m->stlen, '"');
467 str = pp_group3(t1->pp_str, op2str(pc->opcode), tmp);
468 efree(tmp);
469 pp_free(t1);
470 pp_push(pc->opcode, str, CAN_FREE, pc->comment);
471 break;
472
473 case Op_parens:
474 t1 = pp_pop();
475 str = pp_group3("(", t1->pp_str, ")");
476 pp_free(t1);
477 pp_push(pc->opcode, str, CAN_FREE, pc->comment);
478 break;
479
480 case Op_plus:
481 case Op_minus:
482 case Op_times:
483 case Op_exp:
484 case Op_quotient:
485 case Op_mod:
486 case Op_equal:
487 case Op_notequal:
488 case Op_less:
489 case Op_greater:
490 case Op_leq:
491 case Op_geq:
492 t2 = pp_pop();
493 t1 = pp_pop();
494 parenthesize(pc->opcode, t1, t2);
495 str = pp_group3(t1->pp_str, op2str(pc->opcode), t2->pp_str);
496 pp_free(t1);
497 pp_free(t2);
498 pp_push(pc->opcode, str, CAN_FREE, pc->comment);
499 break;
500
501 case Op_preincrement:
502 case Op_predecrement:
503 case Op_postincrement:
504 case Op_postdecrement:
505 t1 = pp_pop();
506 if (pc->opcode == Op_preincrement || pc->opcode == Op_predecrement)
507 str = pp_group3(op2str(pc->opcode), t1->pp_str, "");
508 else
509 str = pp_group3(t1->pp_str, op2str(pc->opcode), "");
510 pp_free(t1);
511 pp_push(pc->opcode, str, CAN_FREE, pc->comment);
512 break;
513
514 case Op_field_spec:
515 case Op_field_spec_lhs:
516 case Op_unary_minus:
517 case Op_unary_plus:
518 case Op_not:
519 t1 = pp_pop();
520 if (is_binary(t1->type)
521 || (((OPCODE) t1->type) == pc->opcode
522 && (pc->opcode == Op_unary_minus
523 || pc->opcode == Op_unary_plus)))
524 pp_parenthesize(t1);
525
526 /* optypes table (eval.c) includes space after ! */
527 str = pp_group3(op2str(pc->opcode), t1->pp_str, "");
528 pp_free(t1);
529 pp_push(pc->opcode, str, CAN_FREE, pc->comment);
530 break;
531
532 case Op_assign:
533 case Op_assign_plus:
534 case Op_assign_minus:
535 case Op_assign_times:
536 case Op_assign_quotient:
537 case Op_assign_mod:
538 case Op_assign_exp:
539 t2 = pp_pop(); /* l.h.s. */
540 t1 = pp_pop();
541 str = pp_group3(t2->pp_str, op2str(pc->opcode), t1->pp_str);
542 pp_free(t2);
543 pp_free(t1);
544 pp_push(pc->opcode, str, CAN_FREE, pc->comment);
545 break;
546
547 case Op_store_field:
548 t1 = pp_pop(); /* field num */
549 if (is_binary(t1->type))
550 pp_parenthesize(t1);
551 t2 = pp_pop(); /* r.h.s. */
552 fprintf(prof_fp, "$%s%s%s", t1->pp_str, op2str(pc->opcode), t2->pp_str);
553 pp_free(t2);
554 pp_free(t1);
555 if ((flags & IN_FOR_HEADER) == 0)
556 pc = end_line(pc);
557 break;
558
559 case Op_concat:
560 str = pp_concat(pc->expr_count);
561 pp_push(Op_concat, str, CAN_FREE, pc->comment);
562 break;
563
564 case Op_K_delete:
565 {
566 char *array;
567 t1 = pp_pop();
568 array = t1->pp_str;
569 if (pc->expr_count > 0) {
570 char *sub;
571 sub = pp_list(pc->expr_count, NULL, pc->expr_count > 1 ? "][" : ", ");
572 fprintf(prof_fp, "%s %s[%s]", op2str(Op_K_delete), array, sub);
573 efree(sub);
574 } else
575 fprintf(prof_fp, "%s %s", op2str(Op_K_delete), array);
576 if ((flags & IN_FOR_HEADER) == 0)
577 pc = end_line(pc);
578 pp_free(t1);
579 }
580 break;
581
582 case Op_K_delete_loop:
583 /* Efficency hack not in effect because of exec_count instruction */
584 cant_happen();
585 break;
586
587 case Op_in_array:
588 {
589 char *array, *sub;
590 t1 = pp_pop();
591 array = t1->pp_str;
592 if (pc->expr_count > 1) {
593 sub = pp_list(pc->expr_count, "()", ", ");
594 str = pp_group3(sub, op2str(Op_in_array), array);
595 efree(sub);
596 } else {
597 t2 = pp_pop();
598 if (prec_level(t2->type) < prec_level(Op_in_array)) {
599 pp_parenthesize(t2);
600 }
601 sub = t2->pp_str;
602 str = pp_group3(sub, op2str(Op_in_array), array);
603 pp_free(t2);
604 }
605 pp_free(t1);
606 pp_push(Op_in_array, str, CAN_FREE, pc->comment);
607 }
608 break;
609
610 case Op_var_update:
611 case Op_var_assign:
612 case Op_field_assign:
613 case Op_subscript_assign:
614 case Op_arrayfor_init:
615 case Op_arrayfor_incr:
616 case Op_arrayfor_final:
617 case Op_newfile:
618 case Op_get_record:
619 case Op_lint:
620 case Op_jmp:
621 case Op_jmp_false:
622 case Op_jmp_true:
623 case Op_no_op:
624 case Op_and_final:
625 case Op_or_final:
626 case Op_cond_pair:
627 case Op_after_beginfile:
628 case Op_after_endfile:
629 break;
630
631 case Op_sub_builtin:
632 {
633 const char *fname = "sub";
634 if ((pc->sub_flags & GSUB) != 0)
635 fname = "gsub";
636 else if ((pc->sub_flags & GENSUB) != 0)
637 fname = "gensub";
638 tmp = pp_list(pc->expr_count, "()", ", ");
639 str = pp_group3(fname, tmp, "");
640 efree(tmp);
641 pp_push(Op_sub_builtin, str, CAN_FREE, pc->comment);
642 }
643 break;
644
645 case Op_builtin:
646 case Op_ext_builtin:
647 {
648 const char *fname;
649 if (pc->opcode == Op_builtin) {
650 bool prepend_awk = (current_namespace != awk_namespace && strcmp(current_namespace, awk_namespace) != 0);
651 fname = getfname(pc->builtin, prepend_awk);
652 } else
653 fname = (pc + 1)->func_name;
654 if (fname != NULL) {
655 if (pc->expr_count > 0) {
656 tmp = pp_list(pc->expr_count, "()", ", ");
657 str = pp_group3(fname, tmp, "");
658 efree(tmp);
659 } else
660 str = pp_group3(fname, "()", "");
661 pp_push(Op_builtin, str, CAN_FREE, pc->comment);
662 } else
663 fatal(_("internal error: builtin with null fname"));
664 }
665 break;
666
667 case Op_K_print:
668 case Op_K_printf:
669 case Op_K_print_rec:
670 if (pc->opcode == Op_K_print_rec)
671 // instead of `print $0', just `print'
672 tmp = strdup("");
673 else if (pc->redir_type != 0) {
674 // Avoid turning printf("hello\n") into printf(("hello\n"))
675 NODE *n = pp_top();
676
677 if (pc->expr_count == 1
678 && n->pp_str[0] == '('
679 && n->pp_str[n->pp_len - 1] == ')') {
680 n = pp_pop();
681
682 tmp = strdup(n->pp_str);
683 pp_free(n);
684 } else
685 tmp = pp_list(pc->expr_count, "()", ", ");
686 } else {
687 tmp = pp_list(pc->expr_count, " ", ", ");
688 tmp[strlen(tmp) - 1] = '\0'; /* remove trailing space */
689 }
690
691 if (pc->redir_type != 0) {
692 t1 = pp_pop();
693 if (is_binary(t1->type))
694 pp_parenthesize(t1);
695 fprintf(prof_fp, "%s%s%s%s", op2str(pc->opcode),
696 tmp, redir2str(pc->redir_type), t1->pp_str);
697 pp_free(t1);
698 } else
699 fprintf(prof_fp, "%s%s", op2str(pc->opcode), tmp);
700 efree(tmp);
701 if ((flags & IN_FOR_HEADER) == 0)
702 pc = end_line(pc);
703 break;
704
705 case Op_push_re:
706 if (pc->memory->type != Node_regex && (pc->memory->flags & REGEX) == 0)
707 break;
708 /* else
709 fall through */
710 case Op_match_rec:
711 {
712 if (pc->memory->type == Node_regex) {
713 NODE *re = pc->memory->re_exp;
714 str = pp_string(re->stptr, re->stlen, '/');
715 } else {
716 assert((pc->memory->flags & REGEX) != 0);
717 str = pp_typed_regex(pc->memory->stptr, pc->memory->stlen, '/');
718 }
719 pp_push(pc->opcode, str, CAN_FREE, pc->comment);
720 }
721 break;
722
723 case Op_nomatch:
724 case Op_match:
725 {
726 char *restr, *txt;
727 t1 = pp_pop();
728 if (is_binary(t1->type))
729 pp_parenthesize(t1);
730 txt = t1->pp_str;
731 m = pc->memory;
732 if (m->type == Node_dynregex) {
733 restr = txt;
734 t2 = pp_pop();
735 if (is_binary(t2->type))
736 pp_parenthesize(t2);
737 txt = t2->pp_str;
738 str = pp_group3(txt, op2str(pc->opcode), restr);
739 pp_free(t2);
740 } else if (m->type == Node_val && (m->flags & REGEX) != 0) {
741 restr = pp_typed_regex(m->stptr, m->stlen, '/');
742 str = pp_group3(txt, op2str(pc->opcode), restr);
743 efree(restr);
744 } else {
745 NODE *re = m->re_exp;
746 restr = pp_string(re->stptr, re->stlen, '/');
747 str = pp_group3(txt, op2str(pc->opcode), restr);
748 efree(restr);
749 }
750 pp_free(t1);
751 pp_push(pc->opcode, str, CAN_FREE, pc->comment);
752 }
753 break;
754
755 case Op_K_getline:
756 case Op_K_getline_redir:
757 if (pc->into_var) {
758 t1 = pp_pop();
759 tmp = pp_group3(op2str(Op_K_getline), " ", t1->pp_str);
760 pp_free(t1);
761 } else
762 tmp = pp_group3(op2str(Op_K_getline), "", "");
763
764 if (pc->redir_type != 0) {
765 int before = (pc->redir_type == redirect_pipein
766 || pc->redir_type == redirect_twoway);
767
768 t2 = pp_pop();
769 if (is_binary(t2->type))
770 pp_parenthesize(t2);
771 if (before)
772 str = pp_group3(t2->pp_str, redir2str(pc->redir_type), tmp);
773 else
774 str = pp_group3(tmp, redir2str(pc->redir_type), t2->pp_str);
775 efree(tmp);
776 pp_free(t2);
777 } else
778 str = tmp;
779 pp_push(pc->opcode, str, CAN_FREE, pc->comment);
780 break;
781
782 case Op_indirect_func_call:
783 case Op_func_call:
784 {
785 const char *pre;
786 int pcount;
787 bool malloced = false;
788 char *fname = adjust_namespace(pc->func_name, & malloced);
789
790 if (pc->opcode == Op_indirect_func_call)
791 pre = "@";
792 else
793 pre = "";
794 pcount = (pc + 1)->expr_count;
795 if (pcount > 0) {
796 tmp = pp_list(pcount, "()", ", ");
797 str = pp_group3(pre, fname, tmp);
798 efree(tmp);
799 } else
800 str = pp_group3(pre, fname, "()");
801 if (pc->opcode == Op_indirect_func_call) {
802 t1 = pp_pop(); /* indirect var */
803 pp_free(t1);
804 }
805
806 pp_push(pc->opcode, str, CAN_FREE, pc->comment);
807 if (malloced)
808 efree((void *) fname);
809 }
810 break;
811
812 case Op_K_continue:
813 case Op_K_break:
814 case Op_K_nextfile:
815 case Op_K_next:
816 fprintf(prof_fp, "%s", op2str(pc->opcode));
817 pc = end_line(pc);
818 break;
819
820 case Op_K_return:
821 case Op_K_exit:
822 t1 = pp_pop();
823 if (is_binary(t1->type))
824 pp_parenthesize(t1);
825 if (pc->source_line > 0) { /* don't print implicit 'return' at end of function */
826 // avoid final trailing space to keep whiny users happy
827 if (t1->pp_str[0] != '\0')
828 fprintf(prof_fp, "%s %s", op2str(pc->opcode), t1->pp_str);
829 else
830 fprintf(prof_fp, "%s", op2str(pc->opcode));
831 pc = end_line(pc);
832 }
833 pp_free(t1);
834 break;
835
836 case Op_pop:
837 t1 = pp_pop();
838 fprintf(prof_fp, "%s", t1->pp_str);
839 if ((flags & IN_FOR_HEADER) == 0)
840 pc = end_line(pc);
841 pp_free(t1);
842 break;
843
844 case Op_line_range:
845 ip1 = pc + 1;
846 pprint(pc->nexti, ip1->condpair_left, NO_PPRINT_FLAGS);
847 pprint(ip1->condpair_left->nexti, ip1->condpair_right, NO_PPRINT_FLAGS);
848 t2 = pp_pop();
849 t1 = pp_pop();
850 str = pp_group3(t1->pp_str, ", ", t2->pp_str);
851 pp_free(t1);
852 pp_free(t2);
853 pp_push(Op_line_range, str, CAN_FREE, pc->comment);
854 pc = ip1->condpair_right;
855 break;
856
857 case Op_K_while:
858 ip1 = pc + 1;
859 indent(ip1->while_body->exec_count);
860 fprintf(prof_fp, "%s (", op2str(pc->opcode));
861 pprint(pc->nexti, ip1->while_body, NO_PPRINT_FLAGS);
862 t1 = pp_pop();
863 fprintf(prof_fp, "%s) {", t1->pp_str);
864 pp_free(t1);
865 ip1->while_body = end_line(ip1->while_body);
866 indent_in();
867 pprint(ip1->while_body->nexti, pc->target_break, NO_PPRINT_FLAGS);
868 indent_out();
869 indent(SPACEOVER);
870 fprintf(prof_fp, "}");
871 pc = end_line(pc->target_break);
872 break;
873
874 case Op_K_do:
875 ip1 = pc + 1;
876 indent(pc->nexti->exec_count);
877 fprintf(prof_fp, "%s {", op2str(pc->opcode));
878 end_line(pc->nexti);
879 skip_comment = true;
880 indent_in();
881 pprint(pc->nexti->nexti, ip1->doloop_cond, NO_PPRINT_FLAGS);
882 indent_out();
883 pprint(ip1->doloop_cond, pc->target_break, NO_PPRINT_FLAGS);
884 indent(SPACEOVER);
885 t1 = pp_pop();
886 fprintf(prof_fp, "} %s (%s)", op2str(Op_K_while), t1->pp_str);
887 if (pc->comment)
888 fprintf(prof_fp, "\t%s", pc->comment->memory->stptr);
889 else {
890 end_line(pc->target_break);
891 skip_comment = true;
892 }
893 pp_free(t1);
894 pc = pc->target_break;
895 break;
896
897 case Op_K_for:
898 {
899 INSTRUCTION *comment1 = NULL, *comment2 = NULL;
900
901 if (pc->comment != NULL) {
902 comment1 = pc->comment;
903 pc->comment = NULL;
904 if (comment1 != NULL && comment1->comment != NULL) {
905 comment2 = comment1->comment;
906 comment1->comment = NULL;
907 }
908 if (comment2 == NULL && comment1->memory->comment_type == FOR_COMMENT) {
909 comment2 = comment1;
910 comment2->memory->comment_type = EOL_COMMENT;
911 comment1 = NULL;
912 }
913 }
914
915 ip1 = pc + 1;
916 indent(ip1->forloop_body->exec_count);
917 fprintf(prof_fp, "%s (", op2str(pc->opcode));
918
919 /* If empty for looop header, print it a little more nicely. */
920 if ( pc->nexti->opcode == Op_no_op
921 && ip1->forloop_cond == pc->nexti
922 && pc->target_continue->opcode == Op_jmp
923 && comment1 == NULL && comment2 == NULL) {
924 fprintf(prof_fp, ";;");
925 } else {
926 pprint(pc->nexti, ip1->forloop_cond, IN_FOR_HEADER);
927 fprintf(prof_fp, "; ");
928
929 if (comment1 != NULL) {
930 print_comment(comment1, 0);
931 indent(ip1->forloop_body->exec_count);
932 indent(1);
933 }
934
935 if (ip1->forloop_cond->opcode == Op_no_op &&
936 ip1->forloop_cond->nexti == ip1->forloop_body)
937 fprintf(prof_fp, "; ");
938 else {
939 pprint(ip1->forloop_cond, ip1->forloop_body, IN_FOR_HEADER);
940 t1 = pp_pop();
941 fprintf(prof_fp, "%s; ", t1->pp_str);
942 pp_free(t1);
943 }
944
945 if (comment2 != NULL) {
946 print_comment(comment2, 0);
947 indent(ip1->forloop_body->exec_count);
948 indent(1);
949 }
950
951 pprint(pc->target_continue, pc->target_break, IN_FOR_HEADER);
952 }
953 fprintf(prof_fp, ") {");
954 end_line(ip1->forloop_body);
955 skip_comment = true;
956 indent_in();
957 pprint(ip1->forloop_body->nexti, pc->target_continue, NO_PPRINT_FLAGS);
958 indent_out();
959 indent(SPACEOVER);
960 fprintf(prof_fp, "}");
961 end_line(pc->target_break);
962 skip_comment = true;
963 pc = pc->target_break;
964 }
965 break;
966
967 case Op_K_arrayfor:
968 {
969 char *array;
970 const char *item;
971
972 ip1 = pc + 1;
973 t1 = pp_pop();
974 array = t1->pp_str;
975 m = ip1->forloop_cond->array_var;
976 if (m->type == Node_param_list)
977 item = func_params[m->param_cnt].param;
978 else
979 item = m->vname;
980 indent(ip1->forloop_body->exec_count);
981 fprintf(prof_fp, "%s (%s%s%s) {", op2str(Op_K_arrayfor),
982 item, op2str(Op_in_array), array);
983 end_line(ip1->forloop_body);
984 skip_comment = true;
985 indent_in();
986 pp_free(t1);
987 pprint(ip1->forloop_body->nexti, pc->target_break, NO_PPRINT_FLAGS);
988 indent_out();
989 indent(SPACEOVER);
990 fprintf(prof_fp, "}");
991 end_line(pc->target_break);
992 skip_comment = true;
993 pc = pc->target_break;
994 }
995 break;
996
997 case Op_K_switch:
998 ip1 = pc + 1;
999 fprintf(prof_fp, "%s (", op2str(pc->opcode));
1000 pprint(pc->nexti, ip1->switch_start, NO_PPRINT_FLAGS);
1001 t1 = pp_pop();
1002 fprintf(prof_fp, "%s) {\n", t1->pp_str);
1003 if (pc->comment)
1004 print_comment(pc->comment, 0);
1005 pp_free(t1);
1006 pprint(ip1->switch_start, ip1->switch_end, NO_PPRINT_FLAGS);
1007 indent(SPACEOVER);
1008 fprintf(prof_fp, "}\n");
1009 if (ip1->switch_end->comment)
1010 print_comment(ip1->switch_end->comment, 0);
1011 pc = pc->target_break;
1012 break;
1013
1014 case Op_K_case:
1015 case Op_K_default:
1016 indent(pc->stmt_start->exec_count);
1017 if (pc->opcode == Op_K_case) {
1018 t1 = pp_pop();
1019 fprintf(prof_fp, "%s %s:", op2str(pc->opcode), t1->pp_str);
1020 pp_free(t1);
1021 } else
1022 fprintf(prof_fp, "%s:", op2str(pc->opcode));
1023
1024 indent_in();
1025 if (pc->comment != NULL) {
1026 if (pc->comment->memory->comment_type == EOL_COMMENT)
1027 fprintf(prof_fp, "\t%s", pc->comment->memory->stptr);
1028 else {
1029 fprintf(prof_fp, "\n");
1030 print_comment(pc->comment, indent_level);
1031 }
1032 } else
1033 fprintf(prof_fp, "\n");
1034 pprint(pc->stmt_start->nexti, pc->stmt_end->nexti, NO_PPRINT_FLAGS);
1035 indent_out();
1036 break;
1037
1038 case Op_K_if:
1039 fprintf(prof_fp, "%s (", op2str(pc->opcode));
1040 pprint(pc->nexti, pc->branch_if, NO_PPRINT_FLAGS);
1041 t1 = pp_pop();
1042 fprintf(prof_fp, "%s) {", t1->pp_str);
1043 pp_free(t1);
1044
1045 ip1 = pc->branch_if;
1046 if (ip1->exec_count > 0)
1047 fprintf(prof_fp, " # " EXEC_COUNT_FMT, ip1->exec_count);
1048 ip1 = end_line(ip1);
1049 indent_in();
1050 if (pc->comment != NULL)
1051 print_comment(pc->comment, indent_level);
1052 pprint(ip1->nexti, pc->branch_else, NO_PPRINT_FLAGS);
1053 indent_out();
1054 pc = pc->branch_else;
1055 if (pc->nexti->opcode == Op_no_op) { /* no following else */
1056 indent(SPACEOVER);
1057 fprintf(prof_fp, "}");
1058 if (pc->nexti->nexti->opcode != Op_comment
1059 || pc->nexti->nexti->memory->comment_type == BLOCK_COMMENT)
1060 fprintf(prof_fp, "\n");
1061 /* else
1062 It will be printed at the top. */
1063 }
1064 /*
1065 * See next case; turn off the flag so that the
1066 * following else is correctly indented.
1067 */
1068 flags &= ~IN_ELSE_IF;
1069 break;
1070
1071 case Op_K_else:
1072 /*
1073 * If possible, chain else-if's together on the
1074 * same line.
1075 *
1076 * See awkgram.y:mk_condition to understand
1077 * what is being checked here.
1078 *
1079 * Op_exec_count follows Op_K_else, check the
1080 * opcode of the following instruction.
1081 * Additionally, check that the subsequent if
1082 * terminates where this else does; in that case
1083 * it's ok to compact the if to follow the else.
1084 */
1085
1086 fprintf(prof_fp, "} %s ", op2str(pc->opcode));
1087 if (pc->nexti->nexti->opcode == Op_K_if
1088 && pc->branch_end == pc->nexti->nexti->branch_else->lasti) {
1089 pprint(pc->nexti, pc->branch_end, IN_ELSE_IF);
1090 } else {
1091 fprintf(prof_fp, "{");
1092 end_line(pc);
1093 skip_comment = true;
1094 indent_in();
1095 if (pc->comment != NULL)
1096 print_comment(pc->comment, indent_level);
1097 pprint(pc->nexti, pc->branch_end, NO_PPRINT_FLAGS);
1098 indent_out();
1099 indent(SPACEOVER);
1100 fprintf(prof_fp, "}");
1101 end_line(pc->branch_end);
1102 skip_comment = true;
1103 }
1104 /*
1105 * Don't do end_line() here, we get multiple blank lines after
1106 * the final else in a chain of else-ifs since they all point
1107 * to the same branch_end.
1108 */
1109 pc = pc->branch_end;
1110 break;
1111
1112 case Op_cond_exp:
1113 {
1114 NODE *f, *t, *cond;
1115 size_t len;
1116 INSTRUCTION *qm_comment = NULL, *colon_comment = NULL;
1117
1118 qm_comment = pc->comment;
1119
1120 pprint(pc->nexti, pc->branch_if, NO_PPRINT_FLAGS);
1121 ip1 = pc->branch_if;
1122 pprint(ip1->nexti, pc->branch_else, NO_PPRINT_FLAGS);
1123 ip1 = pc->branch_else->nexti;
1124
1125 pc = ip1->nexti;
1126 colon_comment = pc->comment;
1127 assert(pc->opcode == Op_cond_exp);
1128 pprint(pc->nexti, pc->branch_end, NO_PPRINT_FLAGS);
1129
1130 f = pp_pop();
1131 t = pp_pop();
1132 cond = pp_pop();
1133
1134 /*
1135 * This stuff handles comments that come after a ?, :, or both.
1136 * Allowing newlines after ? and : is a gawk extension.
1137 * Theoretically this is fragile, since ?: expressions can be nested.
1138 * In practice, it's not, since if there was a comment following ? or :
1139 * in the original code, then it wasn't nested.
1140 */
1141
1142 len = f->pp_len + t->pp_len + cond->pp_len + 12;
1143 if (qm_comment == NULL && colon_comment == NULL) {
1144 // easy case
1145 emalloc(str, char *, len, "pprint");
1146 sprintf(str, "%s ? %s : %s", cond->pp_str, t->pp_str, f->pp_str);
1147 } else if (qm_comment != NULL && colon_comment != NULL) {
1148 check_indent_level();
1149 len += qm_comment->memory->stlen + // comments
1150 colon_comment->memory->stlen +
1151 2 * (indent_level + 1) + 3 + // indentation
1152 t->pp_len + 6;
1153 emalloc(str, char *, len, "pprint");
1154 sprintf(str,
1155 "%s ? %s" // cond ? comment
1156 "%.*s %s" // indent true-part
1157 " : %s" // : comment
1158 "%.*s %s", // indent false-part
1159 cond->pp_str, // condition
1160 qm_comment->memory->stptr, // comment
1161 (int) (indent_level + 1), tabs, // indent
1162 t->pp_str, // true part
1163 colon_comment->memory->stptr, // comment
1164 (int) (indent_level + 1), tabs, // indent
1165 f->pp_str // false part
1166 );
1167 } else if (qm_comment != NULL) {
1168 check_indent_level();
1169 len += qm_comment->memory->stlen + // comment
1170 1 * (indent_level + 1) + 3 + // indentation
1171 t->pp_len + 3;
1172 emalloc(str, char *, len, "pprint");
1173 sprintf(str,
1174 "%s ? %s" // cond ? comment
1175 "%.*s %s" // indent true-part
1176 " : %s", // : false-part
1177 cond->pp_str, // condition
1178 qm_comment->memory->stptr, // comment
1179 (int) (indent_level + 1), tabs, // indent
1180 t->pp_str, // true part
1181 f->pp_str // false part
1182 );
1183 } else {
1184 check_indent_level();
1185 len += colon_comment->memory->stlen + // comment
1186 1 * (indent_level + 1) + 3 + // indentation
1187 t->pp_len + 3;
1188 emalloc(str, char *, len, "pprint");
1189 sprintf(str,
1190 "%s ? %s" // cond ? true-part
1191 " : %s" // : comment
1192 "%.*s %s", // indent false-part
1193 cond->pp_str, // condition
1194 t->pp_str, // true part
1195 colon_comment->memory->stptr, // comment
1196 (int) (indent_level + 1), tabs, // indent
1197 f->pp_str // false part
1198 );
1199 }
1200
1201 pp_free(cond);
1202 pp_free(t);
1203 pp_free(f);
1204 pp_push(Op_cond_exp, str, CAN_FREE, pc->comment);
1205 pc = pc->branch_end;
1206 }
1207 break;
1208
1209 case Op_exec_count:
1210 if (flags == NO_PPRINT_FLAGS)
1211 indent(pc->exec_count);
1212 break;
1213
1214 case Op_comment:
1215 print_comment(pc, 0);
1216 break;
1217
1218 case Op_list:
1219 break;
1220
1221 default:
1222 cant_happen();
1223 }
1224
1225 if (pc == endp)
1226 break;
1227 }
1228 }
1229
1230 /* end_line --- end pretty print line with new line or on-line comment */
1231
1232 INSTRUCTION *
end_line(INSTRUCTION * ip)1233 end_line(INSTRUCTION *ip)
1234 {
1235 INSTRUCTION *ret = ip;
1236
1237 if (ip->nexti->opcode == Op_comment
1238 && ip->nexti->memory->comment_type == EOL_COMMENT) {
1239 fprintf(prof_fp, "\t");
1240 print_comment(ip->nexti, -1);
1241 ret = ip->nexti;
1242 }
1243 else
1244 fprintf(prof_fp, "\n");
1245
1246 return ret;
1247 }
1248
1249 /* pp_string_fp --- pretty print a string to the fp */
1250
1251 /*
1252 * This routine concentrates string pretty printing in one place,
1253 * so that it can be called from multiple places within gawk.
1254 */
1255
1256 void
pp_string_fp(Func_print print_func,FILE * fp,const char * in_str,size_t len,int delim,bool breaklines)1257 pp_string_fp(Func_print print_func, FILE *fp, const char *in_str,
1258 size_t len, int delim, bool breaklines)
1259 {
1260 char *s = pp_string(in_str, len, delim);
1261 int count;
1262 size_t slen;
1263 const char *str = (const char *) s;
1264 #define BREAKPOINT 70 /* arbitrary */
1265
1266 slen = strlen(str);
1267 for (count = 0; slen > 0; slen--, str++) {
1268 print_func(fp, "%c", *str);
1269 if (++count >= BREAKPOINT && breaklines) {
1270 print_func(fp, "%c\n%c", delim, delim);
1271 count = 0;
1272 }
1273 }
1274 efree(s);
1275 }
1276
1277
1278 /* just_dump --- dump the profile and function stack and keep going */
1279
1280 static void
just_dump(int signum)1281 just_dump(int signum)
1282 {
1283 extern INSTRUCTION *code_block;
1284
1285 dump_prog(code_block);
1286 dump_funcs();
1287 dump_fcall_stack(prof_fp);
1288 fflush(prof_fp);
1289 signal(signum, just_dump); /* for OLD Unix systems ... */
1290 }
1291
1292 /* dump_and_exit --- dump the profile, the function stack, and exit */
1293
1294 static void
dump_and_exit(int signum)1295 dump_and_exit(int signum)
1296 {
1297 just_dump(signum);
1298 final_exit(EXIT_FAILURE);
1299 }
1300
1301 /* print_lib_list --- print a list of all libraries loaded */
1302
1303 static void
print_lib_list(FILE * prof_fp)1304 print_lib_list(FILE *prof_fp)
1305 {
1306 SRCFILE *s;
1307 static bool printed_header = false;
1308 const char *indent = "";
1309 bool found = false;
1310
1311 if (do_profile)
1312 indent = "\t";
1313
1314 for (s = srcfiles->next; s != srcfiles; s = s->next) {
1315 if (s->stype == SRC_EXTLIB) {
1316 if (do_profile && ! printed_header) {
1317 printed_header = true;
1318 fprintf(prof_fp, _("%s# Loaded extensions (-l and/or @load)\n\n"), indent);
1319 }
1320 found = true;
1321 fprintf(prof_fp, "%s@load \"%s\"", indent, s->src);
1322 if (s->comment != NULL) {
1323 fprintf(prof_fp, "\t");
1324 print_comment(s->comment, indent_level + 1);
1325 } else
1326 fprintf(prof_fp, "\n");
1327 }
1328 }
1329 if (found) /* we found some */
1330 fprintf(prof_fp, "\n");
1331 }
1332
1333 /* print_include_list --- print a list of all files included */
1334
1335 static void
print_include_list(FILE * prof_fp)1336 print_include_list(FILE *prof_fp)
1337 {
1338 SRCFILE *s;
1339 static bool printed_header = false;
1340 bool found = false;
1341
1342 if (do_profile)
1343 return;
1344
1345 for (s = srcfiles->next; s != srcfiles; s = s->next) {
1346 if (s->stype == SRC_INC) {
1347 if (! printed_header) {
1348 printed_header = true;
1349 fprintf(prof_fp, _("\n# Included files (-i and/or @include)\n\n"));
1350 }
1351 found = true;
1352 fprintf(prof_fp, "# @include \"%s\"", s->src);
1353 if (s->comment != NULL) {
1354 fprintf(prof_fp, "\t");
1355 print_comment(s->comment, indent_level + 1);
1356 } else
1357 fprintf(prof_fp, "\n");
1358 }
1359 }
1360 if (found) /* we found some */
1361 fprintf(prof_fp, "\n");
1362 }
1363
1364 /* print_comment --- print comment text with proper indentation */
1365
1366 static void
print_comment(INSTRUCTION * pc,long in)1367 print_comment(INSTRUCTION* pc, long in)
1368 {
1369 char *text;
1370 size_t count;
1371 bool after_newline = false;
1372
1373 count = pc->memory->stlen;
1374 text = pc->memory->stptr;
1375
1376 if (in >= 0)
1377 indent(in); /* is this correct? Where should comments go? */
1378 for (; count > 0; count--, text++) {
1379 if (after_newline) {
1380 indent(in);
1381 after_newline = false;
1382 }
1383 putc(*text, prof_fp);
1384 after_newline = (*text == '\n');
1385 }
1386
1387 if (pc->comment) {
1388 // chaining should only be two deep
1389 assert(pc->comment->comment == NULL);
1390 // if first was EOL comment, next must be block comment,
1391 // it needs to be indented.
1392 if (pc->memory->comment_type == EOL_COMMENT)
1393 in++;
1394 print_comment(pc->comment, in);
1395 }
1396 }
1397
1398 /* dump_prog --- dump the program */
1399
1400 /*
1401 * XXX: I am not sure it is right to have the strings in the dump
1402 * be translated, but I'll leave it alone for now.
1403 */
1404
1405 void
dump_prog(INSTRUCTION * code)1406 dump_prog(INSTRUCTION *code)
1407 {
1408 time_t now;
1409
1410 (void) time(& now);
1411 /* \n on purpose, with \n in ctime() output */
1412 if (do_profile)
1413 fprintf(prof_fp, _("\t# gawk profile, created %s\n"), ctime(& now));
1414 print_lib_list(prof_fp);
1415 pprint(code, NULL, NO_PPRINT_FLAGS);
1416 print_include_list(prof_fp);
1417 }
1418
1419 /* prec_level --- return the precedence of an operator, for paren tests */
1420
1421 static int
prec_level(int type)1422 prec_level(int type)
1423 {
1424 switch (type) {
1425 case Op_push_lhs:
1426 case Op_push_param:
1427 case Op_push_array:
1428 case Op_push:
1429 case Op_push_i:
1430 case Op_push_re:
1431 case Op_match_rec:
1432 case Op_subscript:
1433 case Op_subscript_lhs:
1434 case Op_func_call:
1435 case Op_K_delete_loop:
1436 case Op_builtin:
1437 return 16;
1438
1439 case Op_field_spec:
1440 case Op_field_spec_lhs:
1441 return 15;
1442
1443 case Op_preincrement:
1444 case Op_predecrement:
1445 case Op_postincrement:
1446 case Op_postdecrement:
1447 return 14;
1448
1449 case Op_exp:
1450 case Op_exp_i:
1451 return 13;
1452
1453 case Op_unary_minus:
1454 case Op_unary_plus:
1455 case Op_not:
1456 return 12;
1457
1458 case Op_times:
1459 case Op_times_i:
1460 case Op_quotient:
1461 case Op_quotient_i:
1462 case Op_mod:
1463 case Op_mod_i:
1464 return 11;
1465
1466 case Op_plus:
1467 case Op_plus_i:
1468 case Op_minus:
1469 case Op_minus_i:
1470 return 10;
1471
1472 case Op_concat:
1473 case Op_assign_concat:
1474 return 9;
1475
1476 case Op_equal:
1477 case Op_notequal:
1478 case Op_greater:
1479 case Op_less:
1480 case Op_leq:
1481 case Op_geq:
1482 return 8;
1483
1484 case Op_match:
1485 case Op_nomatch:
1486 return 7;
1487
1488 case Op_K_getline:
1489 case Op_K_getline_redir:
1490 return 6;
1491
1492 case Op_in_array:
1493 return 5;
1494
1495 case Op_and:
1496 return 4;
1497
1498 case Op_or:
1499 return 3;
1500
1501 case Op_cond_exp:
1502 return 2;
1503
1504 case Op_assign:
1505 case Op_assign_times:
1506 case Op_assign_quotient:
1507 case Op_assign_mod:
1508 case Op_assign_plus:
1509 case Op_assign_minus:
1510 case Op_assign_exp:
1511 return 1;
1512
1513 default:
1514 return 0;
1515 }
1516 }
1517
1518 /* is_scalar --- return true if scalar, false otherwise */
1519
1520 static bool
is_scalar(int type)1521 is_scalar(int type)
1522 {
1523 switch (type) {
1524 case Op_push_lhs:
1525 case Op_push_param:
1526 case Op_push_array:
1527 case Op_push:
1528 case Op_push_i:
1529 case Op_push_re:
1530 case Op_subscript:
1531 case Op_subscript_lhs:
1532 case Op_func_call:
1533 case Op_builtin:
1534 case Op_field_spec:
1535 case Op_field_spec_lhs:
1536 case Op_preincrement:
1537 case Op_predecrement:
1538 case Op_postincrement:
1539 case Op_postdecrement:
1540 case Op_unary_minus:
1541 case Op_unary_plus:
1542 case Op_not:
1543 return true;
1544
1545 default:
1546 return false;
1547 }
1548 }
1549
1550 /* is_binary --- return true if type represents a binary operator */
1551
1552 static bool
is_binary(int type)1553 is_binary(int type)
1554 {
1555 switch (type) {
1556 case Op_geq:
1557 case Op_leq:
1558 case Op_greater:
1559 case Op_less:
1560 case Op_notequal:
1561 case Op_equal:
1562 case Op_exp:
1563 case Op_times:
1564 case Op_quotient:
1565 case Op_mod:
1566 case Op_plus:
1567 case Op_minus:
1568 case Op_exp_i:
1569 case Op_times_i:
1570 case Op_quotient_i:
1571 case Op_mod_i:
1572 case Op_plus_i:
1573 case Op_minus_i:
1574 case Op_concat:
1575 case Op_assign_concat:
1576 case Op_match:
1577 case Op_nomatch:
1578 case Op_assign:
1579 case Op_assign_times:
1580 case Op_assign_quotient:
1581 case Op_assign_mod:
1582 case Op_assign_plus:
1583 case Op_assign_minus:
1584 case Op_assign_exp:
1585 case Op_cond_exp:
1586 case Op_and:
1587 case Op_or:
1588 case Op_in_array:
1589 case Op_K_getline_redir: /* sometimes */
1590 case Op_K_getline:
1591 return true;
1592
1593 default:
1594 return false;
1595 }
1596 }
1597
1598 /* pp_parenthesize --- parenthesize an expression in stack */
1599
1600 static void
pp_parenthesize(NODE * sp)1601 pp_parenthesize(NODE *sp)
1602 {
1603 char *p = sp->pp_str;
1604 size_t len = sp->pp_len;
1605
1606 if (p[0] == '(') // already parenthesized
1607 return;
1608
1609 emalloc(p, char *, len + 3, "pp_parenthesize");
1610 *p = '(';
1611 memcpy(p + 1, sp->pp_str, len);
1612 p[len + 1] = ')';
1613 p[len + 2] = '\0';
1614 if ((sp->flags & CAN_FREE) != 0)
1615 efree(sp->pp_str);
1616 sp->pp_str = p;
1617 sp->pp_len += 2;
1618 sp->flags |= CAN_FREE;
1619 }
1620
1621 /* parenthesize --- parenthesize two nodes relative to parent node type */
1622
1623 static void
parenthesize(int type,NODE * left,NODE * right)1624 parenthesize(int type, NODE *left, NODE *right)
1625 {
1626 int rprec = prec_level(right->type);
1627 int lprec = prec_level(left->type);
1628 int prec = prec_level(type);
1629
1630 if (lprec < prec)
1631 pp_parenthesize(left);
1632 if (rprec < prec)
1633 pp_parenthesize(right);
1634 }
1635
1636 /* pp_string --- pretty format a string or regular regex constant */
1637
1638 char *
pp_string(const char * in_str,size_t len,int delim)1639 pp_string(const char *in_str, size_t len, int delim)
1640 {
1641 return pp_string_or_typed_regex(in_str, len, delim, false);
1642 }
1643
1644 /* pp_typed_regex --- pretty format a hard regex constant */
1645
1646 static char *
pp_typed_regex(const char * in_str,size_t len,int delim)1647 pp_typed_regex(const char *in_str, size_t len, int delim)
1648 {
1649 return pp_string_or_typed_regex(in_str, len, delim, true);
1650 }
1651
1652 /* pp_string_or_typed_regex --- pretty format a string, regex, or typed regex constant */
1653
1654 char *
pp_string_or_typed_regex(const char * in_str,size_t len,int delim,bool typed_regex)1655 pp_string_or_typed_regex(const char *in_str, size_t len, int delim, bool typed_regex)
1656 {
1657 static char str_escapes[] = "\a\b\f\n\r\t\v\\";
1658 static char str_printables[] = "abfnrtv\\";
1659 static char re_escapes[] = "\a\b\f\n\r\t\v";
1660 static char re_printables[] = "abfnrtv";
1661 char *escapes;
1662 char *printables;
1663 char *cp;
1664 int i;
1665 const unsigned char *str = (const unsigned char *) in_str;
1666 size_t ofre, osiz;
1667 char *obuf, *obufout;
1668
1669 assert(delim == '"' || delim == '/');
1670
1671 if (delim == '/') {
1672 escapes = re_escapes;
1673 printables = re_printables;
1674 } else {
1675 escapes = str_escapes;
1676 printables = str_printables;
1677 }
1678
1679 /* make space for something l big in the buffer */
1680 #define chksize(l) if ((l) > ofre) { \
1681 long olen = obufout - obuf; \
1682 erealloc(obuf, char *, osiz * 2, "pp_string"); \
1683 obufout = obuf + olen; \
1684 ofre += osiz; \
1685 osiz *= 2; \
1686 } ofre -= (l)
1687
1688 /* initial size; 3 for delim + terminating null, 1 for @ */
1689 osiz = len + 3 + 1 + (typed_regex == true);
1690 emalloc(obuf, char *, osiz, "pp_string");
1691 obufout = obuf;
1692 ofre = osiz - 1;
1693
1694 if (typed_regex)
1695 *obufout++ = '@';
1696
1697 *obufout++ = delim;
1698 for (; len > 0; len--, str++) {
1699 chksize(2); /* make space for 2 chars */
1700 if (delim != '/' && *str == delim) {
1701 *obufout++ = '\\';
1702 *obufout++ = delim;
1703 } else if (*str == '\0') {
1704 *obufout++ = '\\';
1705 *obufout++ = '0';
1706 chksize(2); /* need 2 more chars for this case */
1707 *obufout++ = '0';
1708 *obufout++ = '0';
1709 } else if ((cp = strchr(escapes, *str)) != NULL) {
1710 i = cp - escapes;
1711 *obufout++ = '\\';
1712 *obufout++ = printables[i];
1713 /* NB: Deliberate use of lower-case versions. */
1714 } else if (isascii(*str) && isprint(*str)) {
1715 *obufout++ = *str;
1716 ofre += 1; /* used 1 less than expected */
1717 } else {
1718 size_t len;
1719
1720 chksize(8); /* total available space is 10 */
1721
1722 sprintf(obufout, "\\%03o", *str & 0xff);
1723 len = strlen(obufout);
1724 ofre += (10 - len); /* adjust free space count */
1725 obufout += len;
1726 }
1727 }
1728 chksize(2);
1729 *obufout++ = delim;
1730 *obufout = '\0';
1731 return obuf;
1732 #undef chksize
1733 }
1734
1735 /* pp_number --- pretty format a number */
1736
1737 char *
pp_number(NODE * n)1738 pp_number(NODE *n)
1739 {
1740 char *str;
1741
1742 assert((n->flags & NUMCONSTSTR) != 0);
1743 emalloc(str, char *, n->stlen + 1, "pp_number");
1744 strcpy(str, n->stptr);
1745 return str;
1746 }
1747
1748 /* pp_node --- pretty format a node */
1749
1750 char *
pp_node(NODE * n)1751 pp_node(NODE *n)
1752 {
1753 if ((n->flags & NUMBER) != 0)
1754 return pp_number(n);
1755 return pp_string(n->stptr, n->stlen, '"');
1756 }
1757
1758 /* pp_list --- pretty print a list, with surrounding characters and separator */
1759
1760 static NODE **pp_args = NULL;
1761 static int npp_args;
1762
1763 static char *
pp_list(int nargs,const char * paren,const char * delim)1764 pp_list(int nargs, const char *paren, const char *delim)
1765 {
1766 NODE *r;
1767 char *str, *s;
1768 size_t len;
1769 size_t delimlen;
1770 int i;
1771 INSTRUCTION *comment = NULL;
1772
1773 if (pp_args == NULL) {
1774 npp_args = nargs;
1775 emalloc(pp_args, NODE **, (nargs + 2) * sizeof(NODE *), "pp_list");
1776 } else if (nargs > npp_args) {
1777 npp_args = nargs;
1778 erealloc(pp_args, NODE **, (nargs + 2) * sizeof(NODE *), "pp_list");
1779 }
1780
1781 delimlen = strlen(delim);
1782 if (nargs == 0)
1783 len = 2;
1784 else {
1785 len = -delimlen;
1786 for (i = 1; i <= nargs; i++) {
1787 r = pp_args[i] = pp_pop();
1788 len += r->pp_len + delimlen;
1789 if (r->pp_comment != NULL) {
1790 comment = (INSTRUCTION *) r->pp_comment;
1791 len += comment->memory->stlen + indent_level + 1; // comment\n ident
1792 }
1793 }
1794 if (paren != NULL) {
1795 assert(strlen(paren) == 2);
1796 len += 2;
1797 }
1798 }
1799 comment = NULL;
1800
1801 emalloc(str, char *, len + 1, "pp_list");
1802 s = str;
1803 if (paren != NULL)
1804 *s++ = paren[0];
1805
1806 for (i = nargs; i > 0; i--) {
1807 // argument
1808 r = pp_args[i];
1809 memcpy(s, r->pp_str, r->pp_len);
1810 s += r->pp_len;
1811
1812 // delimiter
1813 if (i > 1 && delimlen > 0) {
1814 memcpy(s, delim, delimlen);
1815 s += delimlen;
1816 }
1817
1818 // comment if any
1819 if (r->pp_comment != NULL) {
1820 check_indent_level();
1821 comment = (INSTRUCTION *) r->pp_comment;
1822 memcpy(s, comment->memory->stptr, comment->memory->stlen);
1823 s += comment->memory->stlen;
1824 memcpy(s, tabs, indent_level + 1);
1825 s += indent_level + 1;
1826 }
1827 pp_free(r);
1828 }
1829
1830 if (paren != NULL)
1831 *s++ = paren[1];
1832 *s = '\0';
1833 return str;
1834 }
1835
1836 /* is_unary_minus --- return true if string starts with unary minus */
1837
1838 static bool
is_unary_minus(const char * str)1839 is_unary_minus(const char *str)
1840 {
1841 return str[0] == '-' && str[1] != '-';
1842 }
1843
1844 /* pp_concat --- handle concatenation and correct parenthesizing of expressions */
1845
1846 static char *
pp_concat(int nargs)1847 pp_concat(int nargs)
1848 {
1849 NODE *r;
1850 char *str, *s;
1851 size_t len;
1852 static const size_t delimlen = 1; /* " " */
1853 int i;
1854 int pl_l, pl_r;
1855
1856 if (pp_args == NULL) {
1857 npp_args = nargs;
1858 emalloc(pp_args, NODE **, (nargs + 2) * sizeof(NODE *), "pp_concat");
1859 } else if (nargs > npp_args) {
1860 npp_args = nargs;
1861 erealloc(pp_args, NODE **, (nargs + 2) * sizeof(NODE *), "pp_concat");
1862 }
1863
1864 /*
1865 * items are on the stack in reverse order that they
1866 * will be printed so pop them off backwards.
1867 */
1868
1869 len = -delimlen;
1870 for (i = nargs; i >= 1; i--) {
1871 r = pp_args[i] = pp_pop();
1872 len += r->pp_len + delimlen + 2;
1873 }
1874
1875 emalloc(str, char *, len + 1, "pp_concat");
1876 s = str;
1877
1878 /* now copy in */
1879 for (i = 1; i < nargs; i++) {
1880 r = pp_args[i];
1881
1882 if (r->pp_str[0] != '(') {
1883 pl_l = prec_level(pp_args[i]->type);
1884 pl_r = prec_level(pp_args[i+1]->type);
1885
1886 if (i >= 2 && is_unary_minus(r->pp_str)) {
1887 *s++ = '(';
1888 memcpy(s, r->pp_str, r->pp_len);
1889 s += r->pp_len;
1890 *s++ = ')';
1891 } else if (is_scalar(pp_args[i]->type) && is_scalar(pp_args[i+1]->type)) {
1892 memcpy(s, r->pp_str, r->pp_len);
1893 s += r->pp_len;
1894 } else if (pl_l <= pl_r || is_scalar(pp_args[i+1]->type)) {
1895 *s++ = '(';
1896 memcpy(s, r->pp_str, r->pp_len);
1897 s += r->pp_len;
1898 *s++ = ')';
1899 } else {
1900 memcpy(s, r->pp_str, r->pp_len);
1901 s += r->pp_len;
1902 }
1903 } else {
1904 memcpy(s, r->pp_str, r->pp_len);
1905 s += r->pp_len;
1906 }
1907
1908 if (i < nargs) {
1909 *s++ = ' ';
1910 }
1911 }
1912
1913 pl_l = prec_level(pp_args[nargs-1]->type);
1914 pl_r = prec_level(pp_args[nargs]->type);
1915 r = pp_args[nargs];
1916 if (r->pp_str[0] == '(') {
1917 memcpy(s, r->pp_str, r->pp_len);
1918 s += r->pp_len;
1919 } else if (is_unary_minus(r->pp_str) || ((pl_l >= pl_r && ! is_scalar(pp_args[nargs]->type)))) {
1920 *s++ = '(';
1921 memcpy(s, r->pp_str, r->pp_len);
1922 s += r->pp_len;
1923 *s++ = ')';
1924 } else {
1925 memcpy(s, r->pp_str, r->pp_len);
1926 s += r->pp_len;
1927 }
1928
1929 for (i = nargs; i >= 1; i--) {
1930 pp_free(pp_args[i]);
1931 }
1932
1933 *s = '\0';
1934 return str;
1935 }
1936
1937 /* pp_group3 --- string together up to 3 strings */
1938
1939 static char *
pp_group3(const char * s1,const char * s2,const char * s3)1940 pp_group3(const char *s1, const char *s2, const char *s3)
1941 {
1942 size_t len1, len2, len3, l;
1943 char *str, *s;
1944
1945 len1 = strlen(s1);
1946 len2 = strlen(s2);
1947 len3 = strlen(s3);
1948 l = len1 + len2 + len3 + 1;
1949 emalloc(str, char *, l, "pp_group3");
1950 s = str;
1951 if (len1 > 0) {
1952 memcpy(s, s1, len1);
1953 s += len1;
1954 }
1955 if (len2 > 0) {
1956 memcpy(s, s2, len2);
1957 s += len2;
1958 }
1959 if (len3 > 0) {
1960 memcpy(s, s3, len3);
1961 s += len3;
1962 }
1963 *s = '\0';
1964 return str;
1965 }
1966
1967 /* pp_func --- pretty print a function */
1968
1969 int
pp_func(INSTRUCTION * pc,void * data ATTRIBUTE_UNUSED)1970 pp_func(INSTRUCTION *pc, void *data ATTRIBUTE_UNUSED)
1971 {
1972 int j;
1973 static bool first = true;
1974 NODE *func;
1975 int pcount;
1976 INSTRUCTION *fp;
1977
1978 if (first) {
1979 first = false;
1980 if (do_profile)
1981 fprintf(prof_fp, _("\n\t# Functions, listed alphabetically\n"));
1982 }
1983
1984 pp_namespace_list(pc[3].nexti);
1985
1986 fp = pc->nexti->nexti;
1987 func = pc->func_body;
1988 fprintf(prof_fp, "\n");
1989
1990 /* print any function comment */
1991 if (pc->comment != NULL)
1992 print_comment(pc->comment, -1); /* -1 ==> don't indent */
1993
1994 indent(pc->nexti->exec_count);
1995
1996 bool malloced = false;
1997 char *name = adjust_namespace(func->vname, & malloced);
1998 fprintf(prof_fp, "%s %s(", op2str(Op_K_function), name);
1999 if (malloced)
2000 free(name);
2001 pcount = func->param_cnt;
2002 func_params = func->fparms;
2003 for (j = 0; j < pcount; j++) {
2004 fprintf(prof_fp, "%s", func_params[j].param);
2005 if (j < pcount - 1)
2006 fprintf(prof_fp, ", ");
2007 }
2008 if (fp->opcode == Op_comment
2009 && fp->memory->comment_type == EOL_COMMENT) {
2010 fprintf(prof_fp, ")");
2011 fp = end_line(fp);
2012 } else
2013 fprintf(prof_fp, ")\n");
2014 if (do_profile)
2015 indent(0);
2016 fprintf(prof_fp, "{\n");
2017 indent_in();
2018 pprint(fp, NULL, NO_PPRINT_FLAGS); /* function body */
2019 indent_out();
2020 if (do_profile)
2021 indent(0);
2022 fprintf(prof_fp, "}\n");
2023 return 0;
2024 }
2025
2026 /* redir2str --- convert a redirection type into a printable value */
2027
2028 const char *
redir2str(int redirtype)2029 redir2str(int redirtype)
2030 {
2031 static const char *const redirtab[] = {
2032 "",
2033 " > ", /* redirect_output */
2034 " >> ", /* redirect_append */
2035 " | ", /* redirect_pipe */
2036 " | ", /* redirect_pipein */
2037 " < ", /* redirect_input */
2038 " |& ", /* redirect_twoway */
2039 };
2040
2041 if (redirtype < 0 || redirtype > redirect_twoway)
2042 fatal(_("redir2str: unknown redirection type %d"), redirtype);
2043 return redirtab[redirtype];
2044 }
2045
2046 /* pp_namespace --- print @namespace directive */
2047
2048 static void
pp_namespace(const char * name,INSTRUCTION * comment)2049 pp_namespace(const char *name, INSTRUCTION *comment)
2050 {
2051 // Don't print the initial `@namespace "awk"' unless
2052 // @namespace was used at some point in the program
2053 if (! namespace_changed)
2054 return;
2055
2056 if (strcmp(current_namespace, name) == 0)
2057 return;
2058
2059 // don't need to free current_namespace, it comes from
2060 // info saved in Op_namespace instructions.
2061 current_namespace = name;
2062
2063 // force newline, could be after a comment
2064 fprintf(prof_fp, "\n");
2065
2066 if (do_profile)
2067 indent(SPACEOVER);
2068
2069 fprintf(prof_fp, "@namespace \"%s\"", name);
2070
2071 if (comment != NULL) {
2072 putc('\t', prof_fp);
2073 print_comment(comment, 0);
2074 putc('\n', prof_fp);
2075 } else
2076 fprintf(prof_fp, "\n\n");
2077 }
2078
2079 /* pp_namespace_list --- print the list, back to front, using recursion */
2080
2081 static void
pp_namespace_list(INSTRUCTION * list)2082 pp_namespace_list(INSTRUCTION *list)
2083 {
2084 if (list == NULL)
2085 return;
2086
2087 pp_namespace_list(list->nexti);
2088 pp_namespace(list->ns_name, list->comment);
2089 }
2090
2091 /* adjust_namespace --- remove leading namespace or add leading awk:: */
2092
2093 static char *
adjust_namespace(char * name,bool * malloced)2094 adjust_namespace(char *name, bool *malloced)
2095 {
2096 *malloced = false;
2097
2098 // unadorned name from symbol table, add awk:: if not in awk:: n.s.
2099 if (strchr(name, ':') == NULL &&
2100 current_namespace != awk_namespace && // can be equal if namespace never changed
2101 strcmp(current_namespace, awk_namespace) != 0 &&
2102 ! is_all_upper(name)) {
2103 char *buf;
2104 size_t len = 5 + strlen(name) + 1;
2105
2106 emalloc(buf, char *, len, "adjust_namespace");
2107 sprintf(buf, "awk::%s", name);
2108 *malloced = true;
2109
2110 return buf;
2111 }
2112
2113 // qualifed name, remove <ns>:: if in that n.s.
2114 size_t len = strlen(current_namespace);
2115
2116 if (strncmp(current_namespace, name, len) == 0 &&
2117 name[len] == ':' && name[len+1] == ':') {
2118 char *ret = name + len + 2;
2119
2120 return ret;
2121 }
2122
2123 return name;
2124 }
2125