1 /*
2 * Copyright (c) 1988-1990 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that: (1) source code distributions
7 * retain the above copyright notice and this paragraph in its entirety, (2)
8 * distributions including binary code include the above copyright notice and
9 * this paragraph in its entirety in the documentation or other materials
10 * provided with the distribution, and (3) all advertising materials mentioning
11 * features or use of this software display the following acknowledgement:
12 * ``This product includes software developed by the University of California,
13 * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
14 * the University nor the names of its contributors may be used to endorse
15 * or promote products derived from this software without specific prior
16 * written permission.
17 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
18 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
20 *
21 * Optimization module for tcpdump intermediate representation.
22 */
23 #ifndef lint
24 static char rcsid[] =
25 "@(#) $Header: optimize.c,v 1.34 91/06/06 17:32:57 mccanne Exp $ (LBL)";
26 #endif
27
28 #include <stdio.h>
29 #include <sys/types.h>
30
31 #include <sys/time.h>
32 #include <net/bpf.h>
33
34 #include "interface.h"
35 #include "gencode.h"
36
37 #define A_ATOM BPF_MEMWORDS
38 #define X_ATOM (BPF_MEMWORDS+1)
39
40 #define NOP -1
41
42 /*
43 * This define is used to represent *both* the accumulator and
44 * x register in use-def computations.
45 * Currently, the use-def code assumes only one definition per instruction.
46 */
47 #define AX_ATOM N_ATOMS
48
49 /*
50 * A flag to indicate that further optimization is needed.
51 * Iterative passes are continued until a given pass yields no
52 * branch movement.
53 */
54 static int done;
55
56 /*
57 * A block is marked if only if its mark equals the current mark.
58 * Rather than traverse the code array, marking each item, 'cur_mark' is
59 * incremented. This automatically makes each element unmarked.
60 */
61 static int cur_mark;
62 #define isMarked(p) ((p)->mark == cur_mark)
63 #define unMarkAll() cur_mark += 1
64 #define Mark(p) ((p)->mark = cur_mark)
65
66 static void opt_init();
67 static void opt_cleanup();
68
69 static void make_marks();
70 static void mark_code();
71
72 static void intern_blocks();
73
74 static int eq_slist();
75
76 static int n_blocks;
77 struct block **blocks;
78 static int n_edges;
79 struct edge **edges;
80
81 /*
82 * A bit vector set representation of the dominators.
83 * We round up the set size to the next power of two.
84 */
85 static int nodewords;
86 static int edgewords;
87 struct block **levels;
88 u_long *space;
89 #define BITS_PER_WORD (8*sizeof(u_long))
90 /*
91 * True if a is in uset {p}
92 */
93 #define SET_MEMBER(p, a) \
94 ((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD)))
95
96 /*
97 * Add 'a' to uset p.
98 */
99 #define SET_INSERT(p, a) \
100 (p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD))
101
102 /*
103 * Delete 'a' from uset p.
104 */
105 #define SET_DELETE(p, a) \
106 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD))
107
108 /*
109 * a := a intersect b
110 */
111 #define SET_INTERSECT(a, b, n)\
112 {\
113 register u_long *_x = a, *_y = b;\
114 register int _n = n;\
115 while (--_n >= 0) *_x++ &= *_y++;\
116 }
117
118 /*
119 * a := a - b
120 */
121 #define SET_SUBTRACT(a, b, n)\
122 {\
123 register u_long *_x = a, *_y = b;\
124 register int _n = n;\
125 while (--_n >= 0) *_x++ &=~ *_y++;\
126 }
127
128 /*
129 * a := a union b
130 */
131 #define SET_UNION(a, b, n)\
132 {\
133 register u_long *_x = a, *_y = b;\
134 register int _n = n;\
135 while (--_n >= 0) *_x++ |= *_y++;\
136 }
137
138 static uset all_dom_sets;
139 static uset all_closure_sets;
140 static uset all_edge_sets;
141
142 #ifndef MAX
143 #define MAX(a,b) ((a)>(b)?(a):(b))
144 #endif
145
146 static void
find_levels_r(b)147 find_levels_r(b)
148 struct block *b;
149 {
150 int level;
151
152 if (isMarked(b))
153 return;
154
155 Mark(b);
156 b->link = 0;
157
158 if (JT(b)) {
159 find_levels_r(JT(b));
160 find_levels_r(JF(b));
161 level = MAX(JT(b)->level, JF(b)->level) + 1;
162 } else
163 level = 0;
164 b->level = level;
165 b->link = levels[level];
166 levels[level] = b;
167 }
168
169 /*
170 * Level graph. The levels go from 0 at the leaves to
171 * N_LEVELS at the root. The levels[] array points to the
172 * first node of the level list, whose elements are linked
173 * with the 'link' field of the struct block.
174 */
175 static void
find_levels(root)176 find_levels(root)
177 struct block *root;
178 {
179 bzero((char *)levels, n_blocks * sizeof(*levels));
180 unMarkAll();
181 find_levels_r(root);
182 }
183
184 /*
185 * Find dominator relationships.
186 * Assumes graph has been leveled.
187 */
188 static void
find_dom(root)189 find_dom(root)
190 struct block *root;
191 {
192 int i;
193 struct block *b;
194 u_long *x;
195
196 /*
197 * Initialize sets to contain all nodes.
198 */
199 x = all_dom_sets;
200 i = n_blocks * nodewords;
201 while (--i >= 0)
202 *x++ = ~0;
203 /* Root starts off empty. */
204 for (i = nodewords; --i >= 0;)
205 root->dom[i] = 0;
206
207 /* root->level is the highest level no found. */
208 for (i = root->level; i >= 0; --i) {
209 for (b = levels[i]; b; b = b->link) {
210 SET_INSERT(b->dom, b->id);
211 if (JT(b) == 0)
212 continue;
213 SET_INTERSECT(JT(b)->dom, b->dom, nodewords);
214 SET_INTERSECT(JF(b)->dom, b->dom, nodewords);
215 }
216 }
217 }
218
219 static void
propedom(ep)220 propedom(ep)
221 struct edge *ep;
222 {
223 SET_INSERT(ep->edom, ep->id);
224 if (ep->succ) {
225 SET_INTERSECT(ep->succ->et.edom, ep->edom, edgewords);
226 SET_INTERSECT(ep->succ->ef.edom, ep->edom, edgewords);
227 }
228 }
229
230 /*
231 * Compute edge dominators.
232 * Assumes graph has been leveled and predecessors estabished.
233 */
234 static void
find_edom(root)235 find_edom(root)
236 struct block *root;
237 {
238 int i;
239 uset x;
240 struct block *b;
241
242 x = all_edge_sets;
243 for (i = n_edges * edgewords; --i >= 0; )
244 x[i] = ~0;
245
246 /* root->level is the highest level no found. */
247 bzero(root->et.edom, edgewords * sizeof(*(uset)0));
248 bzero(root->ef.edom, edgewords * sizeof(*(uset)0));
249 for (i = root->level; i >= 0; --i) {
250 for (b = levels[i]; b != 0; b = b->link) {
251 propedom(&b->et);
252 propedom(&b->ef);
253 }
254 }
255 }
256
257 /*
258 * Find the backwards transitive closure of the flow graph. These sets
259 * are backwards in the sense that we find the set of nodes that reach
260 * a given node, not the set of nodes that can be reached by a node.
261 *
262 * Assumes graph has been leveled.
263 */
264 static void
find_closure(root)265 find_closure(root)
266 struct block *root;
267 {
268 int i;
269 struct block *b;
270
271 /*
272 * Initialize sets to contain no nodes.
273 */
274 bzero((char *)all_closure_sets,
275 n_blocks * nodewords * sizeof(*all_closure_sets));
276
277 /* root->level is the highest level no found. */
278 for (i = root->level; i >= 0; --i) {
279 for (b = levels[i]; b; b = b->link) {
280 SET_INSERT(b->closure, b->id);
281 if (JT(b) == 0)
282 continue;
283 SET_UNION(JT(b)->closure, b->closure, nodewords);
284 SET_UNION(JF(b)->closure, b->closure, nodewords);
285 }
286 }
287 }
288
289 /*
290 * Return the register number that is used by s. If A and X are both
291 * used, return AX_ATOM. If no register is used, return -1.
292 *
293 * The implementation should probably change to an array access.
294 */
295 static int
atomuse(s)296 atomuse(s)
297 struct stmt *s;
298 {
299 register int c = s->code;
300
301 if (c == NOP)
302 return -1;
303
304 switch (BPF_CLASS(c)) {
305
306 case BPF_RET:
307 return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
308 (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
309
310 case BPF_LD:
311 case BPF_LDX:
312 return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
313 (BPF_MODE(c) == BPF_MEM) ? s->k : -1;
314
315 case BPF_ST:
316 return A_ATOM;
317
318 case BPF_STX:
319 return X_ATOM;
320
321 case BPF_JMP:
322 case BPF_ALU:
323 if (BPF_SRC(c) == BPF_X)
324 return AX_ATOM;
325 return A_ATOM;
326
327 case BPF_MISC:
328 return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
329 }
330 abort();
331 /* NOTREACHED */
332 }
333
334 /*
335 * Return the register number that is defined by 's'. We assume that
336 * a single stmt cannot define more than one register. If no register
337 * is defined, return -1.
338 *
339 * The implementation should probably change to an array access.
340 */
341 static int
atomdef(s)342 atomdef(s)
343 struct stmt *s;
344 {
345 if (s->code == NOP)
346 return -1;
347
348 switch (BPF_CLASS(s->code)) {
349
350 case BPF_LD:
351 case BPF_ALU:
352 return A_ATOM;
353
354 case BPF_LDX:
355 return X_ATOM;
356
357 case BPF_ST:
358 case BPF_STX:
359 return s->k;
360
361 case BPF_MISC:
362 return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
363 }
364 return -1;
365 }
366
367 static void
compute_local_ud(b)368 compute_local_ud(b)
369 struct block *b;
370 {
371 struct slist *s;
372 atomset def = 0, use = 0, kill = 0;
373 int atom;
374
375 for (s = b->stmts; s; s = s->next) {
376 if (s->s.code == NOP)
377 continue;
378 atom = atomuse(&s->s);
379 if (atom >= 0) {
380 if (atom == AX_ATOM) {
381 if (!ATOMELEM(def, X_ATOM))
382 use |= ATOMMASK(X_ATOM);
383 if (!ATOMELEM(def, A_ATOM))
384 use |= ATOMMASK(A_ATOM);
385 }
386 else if (atom < N_ATOMS) {
387 if (!ATOMELEM(def, atom))
388 use |= ATOMMASK(atom);
389 }
390 else
391 abort();
392 }
393 atom = atomdef(&s->s);
394 if (atom >= 0) {
395 if (!ATOMELEM(atom, use))
396 kill |= ATOMMASK(atom);
397 def |= ATOMMASK(atom);
398 }
399 }
400 if (!ATOMELEM(def, A_ATOM) && BPF_CLASS(b->s.code) == BPF_JMP)
401 use |= ATOMMASK(A_ATOM);
402
403 b->def = def;
404 b->kill = kill;
405 b->in_use = use;
406 }
407
408 /*
409 * Assume graph is already leveled.
410 */
411 static void
find_ud(root)412 find_ud(root)
413 struct block *root;
414 {
415 int i, maxlevel;
416 struct block *p;
417
418 /*
419 * root->level is the highest level no found;
420 * count down from there.
421 */
422 maxlevel = root->level;
423 for (i = maxlevel; i >= 0; --i)
424 for (p = levels[i]; p; p = p->link) {
425 compute_local_ud(p);
426 p->out_use = 0;
427 }
428
429 for (i = 1; i <= maxlevel; ++i) {
430 for (p = levels[i]; p; p = p->link) {
431 p->out_use |= JT(p)->in_use | JF(p)->in_use;
432 p->in_use |= p->out_use &~ p->kill;
433 }
434 }
435 }
436
437 /*
438 * These data structures are used in a Cocke and Shwarz style
439 * value numbering scheme. Since the flowgraph is acyclic,
440 * exit values can be propagated from a node's predecessors
441 * provided it is uniquely defined.
442 */
443 struct valnode {
444 int code;
445 long v0, v1;
446 long val;
447 struct valnode *next;
448 };
449
450 #define MODULUS 213
451 static struct valnode *hashtbl[MODULUS];
452 static int curval;
453 static int maxval;
454
455 /* Integer constants mapped with the load immediate opcode. */
456 #define K(i) F(BPF_LD|BPF_IMM|BPF_W, i, 0L)
457
458 struct vmapinfo {
459 int is_const;
460 long const_val;
461 };
462
463 struct vmapinfo *vmap;
464 struct valnode *vnode_base;
465 struct valnode *next_vnode;
466
467 static void
init_val()468 init_val()
469 {
470 curval = 0;
471 next_vnode = vnode_base;
472 bzero((char *)vmap, maxval * sizeof(*vmap));
473 bzero((char *)hashtbl, sizeof hashtbl);
474 }
475
476 /* Because we really don't have an IR, this stuff is a little messy. */
477 static long
F(code,v0,v1)478 F(code, v0, v1)
479 int code;
480 long v0, v1;
481 {
482 u_int hash;
483 int val;
484 struct valnode *p;
485
486 hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8);
487 hash %= MODULUS;
488
489 for (p = hashtbl[hash]; p; p = p->next)
490 if (p->code == code && p->v0 == v0 && p->v1 == v1)
491 return p->val;
492
493 val = ++curval;
494 if (BPF_MODE(code) == BPF_IMM &&
495 (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
496 vmap[val].const_val = v0;
497 vmap[val].is_const = 1;
498 }
499 p = next_vnode++;
500 p->val = val;
501 p->code = code;
502 p->v0 = v0;
503 p->v1 = v1;
504 p->next = hashtbl[hash];
505 hashtbl[hash] = p;
506
507 return val;
508 }
509
510 static inline void
vstore(s,valp,newval,alter)511 vstore(s, valp, newval, alter)
512 struct stmt *s;
513 long *valp;
514 long newval;
515 int alter;
516 {
517 if (alter && *valp == newval)
518 s->code = NOP;
519 else
520 *valp = newval;
521 }
522
523 static void
fold_op(s,v0,v1)524 fold_op(s, v0, v1)
525 struct stmt *s;
526 long v0, v1;
527 {
528 long a, b;
529
530 a = vmap[v0].const_val;
531 b = vmap[v1].const_val;
532
533 switch (BPF_OP(s->code)) {
534 case BPF_ADD:
535 a += b;
536 break;
537
538 case BPF_SUB:
539 a -= b;
540 break;
541
542 case BPF_MUL:
543 a *= b;
544 break;
545
546 case BPF_DIV:
547 if (b == 0)
548 error("division by zero");
549 a /= b;
550 break;
551
552 case BPF_AND:
553 a &= b;
554 break;
555
556 case BPF_OR:
557 a |= b;
558 break;
559
560 case BPF_LSH:
561 a <<= b;
562 break;
563
564 case BPF_RSH:
565 a >>= b;
566 break;
567
568 case BPF_NEG:
569 a = -a;
570 break;
571
572 default:
573 abort();
574 }
575 s->k = a;
576 s->code = BPF_LD|BPF_IMM;
577 done = 0;
578 }
579
580 static inline struct slist *
this_op(s)581 this_op(s)
582 struct slist *s;
583 {
584 while (s != 0 && s->s.code == NOP)
585 s = s->next;
586 return s;
587 }
588
589 static void
opt_not(b)590 opt_not(b)
591 struct block *b;
592 {
593 struct block *tmp = JT(b);
594
595 JT(b) = JF(b);
596 JF(b) = tmp;
597 }
598
599 static void
opt_peep(b)600 opt_peep(b)
601 struct block *b;
602 {
603 struct slist *s;
604 struct slist *next, *last;
605 int val;
606 long v;
607
608 s = b->stmts;
609 if (s == 0)
610 return;
611
612 last = s;
613 while (1) {
614 s = this_op(s);
615 if (s == 0)
616 break;
617 next = this_op(s->next);
618 if (next == 0)
619 break;
620 last = next;
621
622 /*
623 * st M[k] --> st M[k]
624 * ldx M[k] tax
625 */
626 if (s->s.code == BPF_ST &&
627 next->s.code == (BPF_LDX|BPF_MEM) &&
628 s->s.k == next->s.k) {
629 done = 0;
630 next->s.code = BPF_MISC|BPF_TAX;
631 }
632 /*
633 * ld #k --> ldx #k
634 * tax txa
635 */
636 if (s->s.code == (BPF_LD|BPF_IMM) &&
637 next->s.code == (BPF_MISC|BPF_TAX)) {
638 s->s.code = BPF_LDX|BPF_IMM;
639 next->s.code = BPF_MISC|BPF_TXA;
640 done = 0;
641 }
642 /*
643 * This is an ugly special case, but it happens
644 * when you say tcp[k] or udp[k] where k is a constant.
645 */
646 if (s->s.code == (BPF_LD|BPF_IMM)) {
647 struct slist *add, *tax, *ild;
648
649 /*
650 * Check that X isn't used on exit from this
651 * block (which the optimizer might cause).
652 * We know the code generator won't generate
653 * any local dependencies.
654 */
655 if (ATOMELEM(b->out_use, X_ATOM))
656 break;
657
658 if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
659 add = next;
660 else
661 add = this_op(next->next);
662 if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
663 break;
664
665 tax = this_op(add->next);
666 if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
667 break;
668
669 ild = this_op(tax->next);
670 if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
671 BPF_MODE(ild->s.code) != BPF_IND)
672 break;
673 /*
674 * XXX We need to check that X is not
675 * subsequently used. We know we can eliminate the
676 * accumulator modifications since it is defined
677 * by the last stmt of this sequence.
678 *
679 * We want to turn this sequence:
680 *
681 * (004) ldi #0x2 {s}
682 * (005) ldxms [14] {next} -- optional
683 * (006) addx {add}
684 * (007) tax {tax}
685 * (008) ild [x+0] {ild}
686 *
687 * into this sequence:
688 *
689 * (004) nop
690 * (005) ldxms [14]
691 * (006) nop
692 * (007) nop
693 * (008) ild [x+2]
694 *
695 */
696 ild->s.k += s->s.k;
697 s->s.code = NOP;
698 add->s.code = NOP;
699 tax->s.code = NOP;
700 done = 0;
701 }
702 s = next;
703 }
704 /*
705 * If we have a subtract to do a comparsion, and the X register
706 * is a known constant, we can merge this value into the
707 * comparison.
708 */
709 if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X) &&
710 !ATOMELEM(b->out_use, A_ATOM)) {
711 val = b->val[X_ATOM];
712 if (vmap[val].is_const) {
713 b->s.k += vmap[val].const_val;
714 last->s.code = NOP;
715 done = 0;
716 } else if (b->s.k == 0) {
717 /*
718 * sub x -> nop
719 * j #0 j x
720 */
721 last->s.code = NOP;
722 b->s.code = BPF_CLASS(b->s.code) | BPF_OP(b->s.code) |
723 BPF_X;
724 done = 0;
725 }
726 }
727 /*
728 * Likewise, a constant subtract can be simplified.
729 */
730 else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K) &&
731 !ATOMELEM(b->out_use, A_ATOM)) {
732 b->s.k += last->s.k;
733 last->s.code = NOP;
734 done = 0;
735 }
736 /*
737 * and #k nop
738 * jeq #0 -> jset #k
739 */
740 if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
741 !ATOMELEM(b->out_use, A_ATOM) && b->s.k == 0) {
742 b->s.k = last->s.k;
743 b->s.code = BPF_JMP|BPF_K|BPF_JSET;
744 last->s.code = NOP;
745 done = 0;
746 opt_not(b);
747 }
748 /*
749 * If the accumulator is a known constant, we can compute the
750 * comparison result.
751 */
752 val = b->val[A_ATOM];
753 if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
754 v = vmap[val].const_val;
755 switch (BPF_OP(b->s.code)) {
756
757 case BPF_JEQ:
758 v = v == b->s.k;
759 break;
760
761 case BPF_JGT:
762 v = v > b->s.k;
763 break;
764
765 case BPF_JGE:
766 v = v >= b->s.k;
767 break;
768
769 case BPF_JSET:
770 v &= b->s.k;
771 break;
772
773 default:
774 abort();
775 }
776 if (JF(b) != JT(b))
777 done = 0;
778 if (v)
779 JF(b) = JT(b);
780 else
781 JT(b) = JF(b);
782 }
783 }
784
785 /*
786 * Compute the symbolic value of expression of 's', and update
787 * anything it defines in the value table 'val'. If 'alter' is true,
788 * do various optimizations. This code would be cleaner if symblic
789 * evaluation and code transformations weren't folded together.
790 */
791 static void
opt_stmt(s,val,alter)792 opt_stmt(s, val, alter)
793 struct stmt *s;
794 long val[];
795 int alter;
796 {
797 int op;
798 long v;
799
800 switch (s->code) {
801
802 case BPF_LD|BPF_ABS|BPF_W:
803 case BPF_LD|BPF_ABS|BPF_H:
804 case BPF_LD|BPF_ABS|BPF_B:
805 v = F(s->code, s->k, 0L);
806 vstore(s, &val[A_ATOM], v, alter);
807 break;
808
809 case BPF_LD|BPF_IND|BPF_W:
810 case BPF_LD|BPF_IND|BPF_H:
811 case BPF_LD|BPF_IND|BPF_B:
812 v = val[X_ATOM];
813 if (alter && vmap[v].is_const) {
814 s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
815 s->k += vmap[v].const_val;
816 v = F(s->code, s->k, 0L);
817 done = 0;
818 }
819 else
820 v = F(s->code, s->k, v);
821 vstore(s, &val[A_ATOM], v, alter);
822 break;
823
824 case BPF_LD|BPF_LEN:
825 v = F(s->code, 0L, 0L);
826 vstore(s, &val[A_ATOM], v, alter);
827 break;
828
829 case BPF_LD|BPF_IMM:
830 v = K(s->k);
831 vstore(s, &val[A_ATOM], v, alter);
832 break;
833
834 case BPF_LDX|BPF_IMM:
835 v = K(s->k);
836 vstore(s, &val[X_ATOM], v, alter);
837 break;
838
839 case BPF_LDX|BPF_MSH|BPF_B:
840 v = F(s->code, s->k, 0L);
841 vstore(s, &val[X_ATOM], v, alter);
842 break;
843
844 case BPF_ALU|BPF_NEG:
845 if (alter && vmap[val[A_ATOM]].is_const) {
846 s->code = BPF_LD|BPF_IMM;
847 s->k = -vmap[val[A_ATOM]].const_val;
848 val[A_ATOM] = K(s->k);
849 }
850 else
851 val[A_ATOM] = F(s->code, val[A_ATOM], 0L);
852 break;
853
854 case BPF_ALU|BPF_ADD|BPF_K:
855 case BPF_ALU|BPF_SUB|BPF_K:
856 case BPF_ALU|BPF_MUL|BPF_K:
857 case BPF_ALU|BPF_DIV|BPF_K:
858 case BPF_ALU|BPF_AND|BPF_K:
859 case BPF_ALU|BPF_OR|BPF_K:
860 case BPF_ALU|BPF_LSH|BPF_K:
861 case BPF_ALU|BPF_RSH|BPF_K:
862 op = BPF_OP(s->code);
863 if (alter) {
864 if (s->k == 0) {
865 if (op == BPF_ADD || op == BPF_SUB ||
866 op == BPF_LSH || op == BPF_RSH ||
867 op == BPF_OR) {
868 s->code = NOP;
869 break;
870 }
871 if (op == BPF_MUL || op == BPF_AND) {
872 s->code = BPF_LD|BPF_IMM;
873 val[A_ATOM] = K(s->k);
874 break;
875 }
876 }
877 if (vmap[val[A_ATOM]].is_const) {
878 fold_op(s, val[A_ATOM], K(s->k));
879 val[A_ATOM] = K(s->k);
880 break;
881 }
882 }
883 val[A_ATOM] = F(s->code, val[A_ATOM], K(s->k));
884 break;
885
886 case BPF_ALU|BPF_ADD|BPF_X:
887 case BPF_ALU|BPF_SUB|BPF_X:
888 case BPF_ALU|BPF_MUL|BPF_X:
889 case BPF_ALU|BPF_DIV|BPF_X:
890 case BPF_ALU|BPF_AND|BPF_X:
891 case BPF_ALU|BPF_OR|BPF_X:
892 case BPF_ALU|BPF_LSH|BPF_X:
893 case BPF_ALU|BPF_RSH|BPF_X:
894 op = BPF_OP(s->code);
895 if (alter && vmap[val[X_ATOM]].is_const) {
896 if (vmap[val[A_ATOM]].is_const) {
897 fold_op(s, val[A_ATOM], val[X_ATOM]);
898 val[A_ATOM] = K(s->k);
899 }
900 else {
901 s->code = BPF_ALU|BPF_K|op;
902 s->k = vmap[val[X_ATOM]].const_val;
903 done = 0;
904 val[A_ATOM] =
905 F(s->code, val[A_ATOM], K(s->k));
906 }
907 break;
908 }
909 /*
910 * Check if we're doing something to an accumulator
911 * that is 0, and simplify. This may not seem like
912 * much of a simplification but it could open up further
913 * optimizations.
914 * XXX We could also check for mul by 1, and -1, etc.
915 */
916 if (alter && vmap[val[A_ATOM]].is_const
917 && vmap[val[A_ATOM]].const_val == 0) {
918 if (op == BPF_ADD || op == BPF_OR ||
919 op == BPF_LSH || op == BPF_RSH || op == BPF_SUB) {
920 s->code = BPF_MISC|BPF_TXA;
921 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
922 break;
923 }
924 else if (op == BPF_MUL || op == BPF_DIV ||
925 op == BPF_AND) {
926 s->code = BPF_LD|BPF_IMM;
927 s->k = 0;
928 vstore(s, &val[A_ATOM], K(s->k), alter);
929 break;
930 }
931 else if (op == BPF_NEG) {
932 s->code = NOP;
933 break;
934 }
935 }
936 val[A_ATOM] = F(s->code, val[A_ATOM], val[X_ATOM]);
937 break;
938
939 case BPF_MISC|BPF_TXA:
940 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
941 break;
942
943 case BPF_LD|BPF_MEM:
944 v = val[s->k];
945 if (alter && vmap[v].is_const) {
946 s->code = BPF_LD|BPF_IMM;
947 s->k = vmap[v].const_val;
948 done = 0;
949 }
950 vstore(s, &val[A_ATOM], v, alter);
951 break;
952
953 case BPF_MISC|BPF_TAX:
954 vstore(s, &val[X_ATOM], val[A_ATOM], alter);
955 break;
956
957 case BPF_LDX|BPF_MEM:
958 v = val[s->k];
959 if (alter && vmap[v].is_const) {
960 s->code = BPF_LDX|BPF_IMM;
961 s->k = vmap[v].const_val;
962 done = 0;
963 }
964 vstore(s, &val[X_ATOM], v, alter);
965 break;
966
967 case BPF_ST:
968 vstore(s, &val[s->k], val[A_ATOM], alter);
969 break;
970
971 case BPF_STX:
972 vstore(s, &val[s->k], val[X_ATOM], alter);
973 break;
974 }
975 }
976
977 static void
deadstmt(s,last)978 deadstmt(s, last)
979 register struct stmt *s;
980 register struct stmt *last[];
981 {
982 register int atom;
983
984 atom = atomuse(s);
985 if (atom >= 0) {
986 if (atom == AX_ATOM) {
987 last[X_ATOM] = 0;
988 last[A_ATOM] = 0;
989 }
990 else
991 last[atom] = 0;
992 }
993 atom = atomdef(s);
994 if (atom >= 0) {
995 if (last[atom]) {
996 done = 0;
997 last[atom]->code = NOP;
998 }
999 last[atom] = s;
1000 }
1001 }
1002
1003 static void
opt_deadstores(b)1004 opt_deadstores(b)
1005 register struct block *b;
1006 {
1007 register struct slist *s;
1008 register int atom;
1009 struct stmt *last[N_ATOMS];
1010
1011 bzero((char *)last, sizeof last);
1012
1013 for (s = b->stmts; s != 0; s = s->next)
1014 deadstmt(&s->s, last);
1015 deadstmt(&b->s, last);
1016
1017 for (atom = 0; atom < N_ATOMS; ++atom)
1018 if (last[atom] && !ATOMELEM(b->out_use, atom)) {
1019 last[atom]->code = NOP;
1020 done = 0;
1021 }
1022 }
1023
1024 static void
opt_blk(b,do_stmts)1025 opt_blk(b, do_stmts)
1026 struct block *b;
1027 {
1028 struct slist *s;
1029 struct edge *p;
1030 int i;
1031 long aval;
1032
1033 /*
1034 * Initialize the atom values.
1035 * If we have no predecessors, everything is undefined.
1036 * Otherwise, we inherent our values from our predecessors.
1037 * If any register has an ambiguous value (i.e. control paths are
1038 * merging) give it the undefined value of 0.
1039 */
1040 p = b->in_edges;
1041 if (p == 0)
1042 bzero((char *)b->val, sizeof(b->val));
1043 else {
1044 bcopy((char *)p->pred->val, (char *)b->val, sizeof(b->val));
1045 while (p = p->next) {
1046 for (i = 0; i < N_ATOMS; ++i)
1047 if (b->val[i] != p->pred->val[i])
1048 b->val[i] = 0;
1049 }
1050 }
1051 aval = b->val[A_ATOM];
1052 for (s = b->stmts; s; s = s->next)
1053 opt_stmt(&s->s, b->val, do_stmts);
1054
1055 /*
1056 * This is a special case: if we don't use anything from this
1057 * block, and we load the accumulator with value that is
1058 * already there, eliminate all the statements.
1059 */
1060 if (do_stmts && b->out_use == 0 && aval != 0 &&
1061 b->val[A_ATOM] == aval)
1062 b->stmts = 0;
1063 else {
1064 opt_peep(b);
1065 opt_deadstores(b);
1066 }
1067 /*
1068 * Set up values for branch optimizer.
1069 */
1070 if (BPF_SRC(b->s.code) == BPF_K)
1071 b->oval = K(b->s.k);
1072 else
1073 b->oval = b->val[X_ATOM];
1074 b->et.code = b->s.code;
1075 b->ef.code = -b->s.code;
1076 }
1077
1078 /*
1079 * Return true if any register that is used on exit from 'succ', has
1080 * an exit value that is different from the corresponding exit value
1081 * from 'b'.
1082 */
1083 static int
use_conflict(b,succ)1084 use_conflict(b, succ)
1085 struct block *b, *succ;
1086 {
1087 int atom;
1088 atomset use = succ->out_use;
1089
1090 if (use == 0)
1091 return 0;
1092
1093 for (atom = 0; atom < N_ATOMS; ++atom)
1094 if (ATOMELEM(use, atom))
1095 if (b->val[atom] != succ->val[atom])
1096 return 1;
1097 return 0;
1098 }
1099
1100 struct block *
fold_edge(child,ep)1101 fold_edge(child, ep)
1102 struct block *child;
1103 struct edge *ep;
1104 {
1105 int sense;
1106 int aval0, aval1, oval0, oval1;
1107 int code = ep->code;
1108
1109 if (code < 0) {
1110 code = -code;
1111 sense = 0;
1112 } else
1113 sense = 1;
1114
1115 if (child->s.code != code)
1116 return 0;
1117
1118 aval0 = child->val[A_ATOM];
1119 oval0 = child->oval;
1120 aval1 = ep->pred->val[A_ATOM];
1121 oval1 = ep->pred->oval;
1122
1123 if (aval0 != aval1)
1124 return 0;
1125
1126 if (oval0 == oval1)
1127 /*
1128 * The operands are identical, so the
1129 * result is true if a true branch was
1130 * taken to get here, otherwise false.
1131 */
1132 return sense ? JT(child) : JF(child);
1133
1134 if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
1135 /*
1136 * At this point, we only know the comparison if we
1137 * came down the true branch, and it was an equility
1138 * comparison with a constant. We rely on the fact that
1139 * distinct constants have distinct value numbers.
1140 */
1141 return JF(child);
1142
1143 return 0;
1144 }
1145
1146 static void
opt_j(ep)1147 opt_j(ep)
1148 struct edge *ep;
1149 {
1150 register int i, k;
1151 register struct block *target;
1152
1153 if (JT(ep->succ) == 0)
1154 return;
1155
1156 if (JT(ep->succ) == JF(ep->succ)) {
1157 /*
1158 * Common branch targets can be eliminated, provided
1159 * there is no data dependency.
1160 */
1161 if (!use_conflict(ep->pred, ep->succ->et.succ)) {
1162 done = 0;
1163 ep->succ = JT(ep->succ);
1164 }
1165 }
1166 /*
1167 * For each edge dominator that matches the successor of this
1168 * edge, promote the edge succesor to the its grandchild.
1169 *
1170 * XXX We violate the set abstraction here in favor a reasonbly
1171 * efficient loop.
1172 */
1173 top:
1174 for (i = 0; i < edgewords; ++i) {
1175 register u_long x = ep->edom[i];
1176
1177 while (x != 0) {
1178 k = ffs(x) - 1;
1179 x &=~ 1 << k;
1180 k += i * BITS_PER_WORD;
1181
1182 target = fold_edge(ep->succ, edges[k]);
1183 /*
1184 * Check that there is no data dependency between
1185 * nodes that will be violated if we move the edge.
1186 */
1187 if (target != 0 && !use_conflict(ep->pred, target)) {
1188 done = 0;
1189 ep->succ = target;
1190 if (JT(target) != 0)
1191 /*
1192 * Start over unless we hit a leaf.
1193 */
1194 goto top;
1195 return;
1196 }
1197 }
1198 }
1199 }
1200
1201
1202 static void
or_pullup(b)1203 or_pullup(b)
1204 struct block *b;
1205 {
1206 int val, at_top;
1207 struct block *pull;
1208 struct block **diffp, **samep;
1209 struct edge *ep;
1210
1211 ep = b->in_edges;
1212 if (ep == 0)
1213 return;
1214
1215 /*
1216 * Make sure each predecessor loads the same value.
1217 * XXX why?
1218 */
1219 val = ep->pred->val[A_ATOM];
1220 for (ep = ep->next; ep != 0; ep = ep->next)
1221 if (val != ep->pred->val[A_ATOM])
1222 return;
1223
1224 if (JT(b->in_edges->pred) == b)
1225 diffp = &JT(b->in_edges->pred);
1226 else
1227 diffp = &JF(b->in_edges->pred);
1228
1229 at_top = 1;
1230 while (1) {
1231 if (*diffp == 0)
1232 return;
1233
1234 if (JT(*diffp) != JT(b))
1235 return;
1236
1237 if (!SET_MEMBER((*diffp)->dom, b->id))
1238 return;
1239
1240 if ((*diffp)->val[A_ATOM] != val)
1241 break;
1242
1243 diffp = &JF(*diffp);
1244 at_top = 0;
1245 }
1246 samep = &JF(*diffp);
1247 while (1) {
1248 if (*samep == 0)
1249 return;
1250
1251 if (JT(*samep) != JT(b))
1252 return;
1253
1254 if (!SET_MEMBER((*samep)->dom, b->id))
1255 return;
1256
1257 if ((*samep)->val[A_ATOM] == val)
1258 break;
1259
1260 /* XXX Need to check that there are no data dependencies
1261 between dp0 and dp1. Currently, the code generator
1262 will not produce such dependencies. */
1263 samep = &JF(*samep);
1264 }
1265 #ifdef notdef
1266 /* XXX This doesn't cover everything. */
1267 for (i = 0; i < N_ATOMS; ++i)
1268 if ((*samep)->val[i] != pred->val[i])
1269 return;
1270 #endif
1271 /* Pull up the node. */
1272 pull = *samep;
1273 *samep = JF(pull);
1274 JF(pull) = *diffp;
1275
1276 /*
1277 * At the top of the chain, each predecessor needs to point at the
1278 * pulled up node. Inside the chain, there is only one predecessor
1279 * to worry about.
1280 */
1281 if (at_top) {
1282 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1283 if (JT(ep->pred) == b)
1284 JT(ep->pred) = pull;
1285 else
1286 JF(ep->pred) = pull;
1287 }
1288 }
1289 else
1290 *diffp = pull;
1291
1292 done = 0;
1293 }
1294
1295 static void
and_pullup(b)1296 and_pullup(b)
1297 struct block *b;
1298 {
1299 int val, at_top;
1300 struct block *pull;
1301 struct block **diffp, **samep;
1302 struct edge *ep;
1303
1304 ep = b->in_edges;
1305 if (ep == 0)
1306 return;
1307
1308 /*
1309 * Make sure each predecessor loads the same value.
1310 */
1311 val = ep->pred->val[A_ATOM];
1312 for (ep = ep->next; ep != 0; ep = ep->next)
1313 if (val != ep->pred->val[A_ATOM])
1314 return;
1315
1316 if (JT(b->in_edges->pred) == b)
1317 diffp = &JT(b->in_edges->pred);
1318 else
1319 diffp = &JF(b->in_edges->pred);
1320
1321 at_top = 1;
1322 while (1) {
1323 if (*diffp == 0)
1324 return;
1325
1326 if (JF(*diffp) != JF(b))
1327 return;
1328
1329 if (!SET_MEMBER((*diffp)->dom, b->id))
1330 return;
1331
1332 if ((*diffp)->val[A_ATOM] != val)
1333 break;
1334
1335 diffp = &JT(*diffp);
1336 at_top = 0;
1337 }
1338 samep = &JT(*diffp);
1339 while (1) {
1340 if (*samep == 0)
1341 return;
1342
1343 if (JF(*samep) != JF(b))
1344 return;
1345
1346 if (!SET_MEMBER((*samep)->dom, b->id))
1347 return;
1348
1349 if ((*samep)->val[A_ATOM] == val)
1350 break;
1351
1352 /* XXX Need to check that there are no data dependencies
1353 between diffp and samep. Currently, the code generator
1354 will not produce such dependencies. */
1355 samep = &JT(*samep);
1356 }
1357 #ifdef notdef
1358 /* XXX This doesn't cover everything. */
1359 for (i = 0; i < N_ATOMS; ++i)
1360 if ((*samep)->val[i] != pred->val[i])
1361 return;
1362 #endif
1363 /* Pull up the node. */
1364 pull = *samep;
1365 *samep = JT(pull);
1366 JT(pull) = *diffp;
1367
1368 /*
1369 * At the top of the chain, each predecessor needs to point at the
1370 * pulled up node. Inside the chain, there is only one predecessor
1371 * to worry about.
1372 */
1373 if (at_top) {
1374 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1375 if (JT(ep->pred) == b)
1376 JT(ep->pred) = pull;
1377 else
1378 JF(ep->pred) = pull;
1379 }
1380 }
1381 else
1382 *diffp = pull;
1383
1384 done = 0;
1385 }
1386
1387 static void
opt_blks(root,do_stmts)1388 opt_blks(root, do_stmts)
1389 struct block *root;
1390 {
1391 int i, maxlevel;
1392 struct block *p;
1393
1394 init_val();
1395 maxlevel = root->level;
1396 for (i = maxlevel; i >= 0; --i)
1397 for (p = levels[i]; p; p = p->link)
1398 opt_blk(p, do_stmts);
1399
1400 if (do_stmts)
1401 /*
1402 * No point trying to move branches; it can't possibly
1403 * make a difference at this point.
1404 */
1405 return;
1406
1407 for (i = 1; i <= maxlevel; ++i) {
1408 for (p = levels[i]; p; p = p->link) {
1409 opt_j(&p->et);
1410 opt_j(&p->ef);
1411 }
1412 }
1413 for (i = 1; i <= maxlevel; ++i) {
1414 for (p = levels[i]; p; p = p->link) {
1415 or_pullup(p);
1416 and_pullup(p);
1417 }
1418 }
1419 }
1420
1421 static inline void
link_inedge(parent,child)1422 link_inedge(parent, child)
1423 struct edge *parent;
1424 struct block *child;
1425 {
1426 parent->next = child->in_edges;
1427 child->in_edges = parent;
1428 }
1429
1430 static void
find_inedges(root)1431 find_inedges(root)
1432 struct block *root;
1433 {
1434 int i;
1435 struct block *b;
1436 struct edge *ep;
1437
1438 for (i = 0; i < n_blocks; ++i)
1439 blocks[i]->in_edges = 0;
1440
1441 /*
1442 * Traverse the graph, adding each edge to the predecessor
1443 * list of its sucessors. Skip the leaves (i.e. level 0).
1444 */
1445 for (i = root->level; i > 0; --i) {
1446 for (b = levels[i]; b != 0; b = b->link) {
1447 link_inedge(&b->et, JT(b));
1448 link_inedge(&b->ef, JF(b));
1449 }
1450 }
1451 }
1452
1453 static void
opt_root(b)1454 opt_root(b)
1455 struct block **b;
1456 {
1457 while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
1458 *b = JT(*b);
1459 }
1460
1461 static void
opt_loop(root,do_stmts)1462 opt_loop(root, do_stmts)
1463 struct block *root;
1464 int do_stmts;
1465 {
1466 int passes = 0;
1467 #ifdef BDEBUG
1468 if (dflag > 1)
1469 opt_dump(root);
1470 #endif
1471 do {
1472 done = 1;
1473 find_levels(root);
1474 find_dom(root);
1475 find_closure(root);
1476 find_inedges(root);
1477 find_ud(root);
1478 find_edom(root);
1479 opt_blks(root, do_stmts);
1480 #ifdef BDEBUG
1481 if (dflag > 1)
1482 opt_dump(root);
1483 #endif
1484 } while (!done);
1485 }
1486
1487 /*
1488 * Optimize the filter code in its dag representation.
1489 */
1490 void
optimize(rootp)1491 optimize(rootp)
1492 struct block **rootp;
1493 {
1494 struct block *root;
1495
1496 root = *rootp;
1497
1498 opt_init(root);
1499 opt_loop(root, 0);
1500 opt_loop(root, 1);
1501 intern_blocks(root);
1502 opt_root(rootp);
1503 opt_cleanup();
1504 }
1505
1506 static void
make_marks(p)1507 make_marks(p)
1508 struct block *p;
1509 {
1510 if (!isMarked(p)) {
1511 Mark(p);
1512 if (BPF_CLASS(p->s.code) != BPF_RET) {
1513 make_marks(JT(p));
1514 make_marks(JF(p));
1515 }
1516 }
1517 }
1518
1519 /*
1520 * Mark code array such that isMarked(i) is true
1521 * only for nodes that are alive.
1522 */
1523 static void
mark_code(p)1524 mark_code(p)
1525 struct block *p;
1526 {
1527 cur_mark += 1;
1528 make_marks(p);
1529 }
1530
1531 /*
1532 * True iff the two stmt lists load the same value from the packet into
1533 * the accumulator.
1534 */
1535 static int
eq_slist(x,y)1536 eq_slist(x, y)
1537 struct slist *x, *y;
1538 {
1539 while (1) {
1540 while (x && x->s.code == NOP)
1541 x = x->next;
1542 while (y && y->s.code == NOP)
1543 y = y->next;
1544 if (x == 0)
1545 return y == 0;
1546 if (y == 0)
1547 return x == 0;
1548 if (x->s.code != y->s.code || x->s.k != y->s.k)
1549 return 0;
1550 x = x->next;
1551 y = y->next;
1552 }
1553 }
1554
1555 static inline int
eq_blk(b0,b1)1556 eq_blk(b0, b1)
1557 struct block *b0, *b1;
1558 {
1559 if (b0->s.code == b1->s.code &&
1560 b0->s.k == b1->s.k &&
1561 b0->et.succ == b1->et.succ &&
1562 b0->ef.succ == b1->ef.succ)
1563 return eq_slist(b0->stmts, b1->stmts);
1564 return 0;
1565 }
1566
1567 static void
intern_blocks(root)1568 intern_blocks(root)
1569 struct block *root;
1570 {
1571 struct block *p;
1572 int i, j;
1573 int done;
1574 top:
1575 done = 1;
1576 for (i = 0; i < n_blocks; ++i)
1577 blocks[i]->link = 0;
1578
1579 mark_code(root);
1580
1581 for (i = n_blocks - 1; --i >= 0; ) {
1582 if (!isMarked(blocks[i]))
1583 continue;
1584 for (j = i + 1; j < n_blocks; ++j) {
1585 if (!isMarked(blocks[j]))
1586 continue;
1587 if (eq_blk(blocks[i], blocks[j])) {
1588 blocks[i]->link = blocks[j]->link ?
1589 blocks[j]->link : blocks[j];
1590 break;
1591 }
1592 }
1593 }
1594 for (i = 0; i < n_blocks; ++i) {
1595 p = blocks[i];
1596 if (JT(p) == 0)
1597 continue;
1598 if (JT(p)->link) {
1599 done = 0;
1600 JT(p) = JT(p)->link;
1601 }
1602 if (JF(p)->link) {
1603 done = 0;
1604 JF(p) = JF(p)->link;
1605 }
1606 }
1607 if (!done)
1608 goto top;
1609 }
1610
1611 static void
opt_cleanup()1612 opt_cleanup()
1613 {
1614 free((void *)vnode_base);
1615 free((void *)vmap);
1616 free((void *)edges);
1617 free((void *)space);
1618 free((void *)levels);
1619 free((void *)blocks);
1620 }
1621
1622 /*
1623 * Return the number of stmts in 's'.
1624 */
1625 static int
slength(s)1626 slength(s)
1627 struct slist *s;
1628 {
1629 int n = 0;
1630
1631 for (; s; s = s->next)
1632 if (s->s.code != NOP)
1633 ++n;
1634 return n;
1635 }
1636
1637 /*
1638 * Return the number of nodes reachable by 'p'.
1639 * All nodes should be initially unmarked.
1640 */
1641 static int
count_blocks(p)1642 count_blocks(p)
1643 struct block *p;
1644 {
1645 if (p == 0 || isMarked(p))
1646 return 0;
1647 Mark(p);
1648 return count_blocks(JT(p)) + count_blocks(JF(p)) + 1;
1649 }
1650
1651 /*
1652 * Do a depth first search on the flow graph, numbering the
1653 * the basic blocks, and entering them into the 'blocks' array.`
1654 */
1655 static void
number_blks_r(p)1656 number_blks_r(p)
1657 struct block *p;
1658 {
1659 int n;
1660
1661 if (p == 0 || isMarked(p))
1662 return;
1663
1664 Mark(p);
1665 n = n_blocks++;
1666 p->id = n;
1667 blocks[n] = p;
1668
1669 number_blks_r(JT(p));
1670 number_blks_r(JF(p));
1671 }
1672
1673 /*
1674 * Return the number of stmts in the flowgraph reachable by 'p'.
1675 * The nodes should be unmarked before calling.
1676 */
1677 static int
count_stmts(p)1678 count_stmts(p)
1679 struct block *p;
1680 {
1681 int n;
1682
1683 if (p == 0 || isMarked(p))
1684 return 0;
1685 Mark(p);
1686 n = count_stmts(JT(p)) + count_stmts(JF(p));
1687 return slength(p->stmts) + n + 1;
1688 }
1689
1690 /*
1691 * Allocate memory. All allocation is done before optimization
1692 * is begun. A linear bound on the size of all data structures is computed
1693 * from the total number of blocks and/or statements.
1694 */
1695 static void
opt_init(root)1696 opt_init(root)
1697 struct block *root;
1698 {
1699 u_long *p;
1700 int i, n, max_stmts;
1701
1702 /*
1703 * First, count the blocks, so we can malloc an array to map
1704 * block number to block. Then, put the blocks into the array.
1705 */
1706 unMarkAll();
1707 n = count_blocks(root);
1708 blocks = (struct block **)malloc(n * sizeof(*blocks));
1709 unMarkAll();
1710 n_blocks = 0;
1711 number_blks_r(root);
1712
1713 n_edges = 2 * n_blocks;
1714 edges = (struct edge **)malloc(n_edges * sizeof(*edges));
1715
1716 /*
1717 * The number of levels is bounded by the number of nodes.
1718 */
1719 levels = (struct block **)malloc(n_blocks * sizeof(*levels));
1720
1721 edgewords = n_edges / (8 * sizeof(u_long)) + 1;
1722 nodewords = n_blocks / (8 * sizeof(u_long)) + 1;
1723
1724 /* XXX */
1725 space = (u_long *)malloc(2 * n_blocks * nodewords * sizeof(*space)
1726 + n_edges * edgewords * sizeof(*space));
1727 p = space;
1728 all_dom_sets = p;
1729 for (i = 0; i < n; ++i) {
1730 blocks[i]->dom = p;
1731 p += nodewords;
1732 }
1733 all_closure_sets = p;
1734 for (i = 0; i < n; ++i) {
1735 blocks[i]->closure = p;
1736 p += nodewords;
1737 }
1738 all_edge_sets = p;
1739 for (i = 0; i < n; ++i) {
1740 register struct block *b = blocks[i];
1741
1742 b->et.edom = p;
1743 p += edgewords;
1744 b->ef.edom = p;
1745 p += edgewords;
1746 b->et.id = i;
1747 edges[i] = &b->et;
1748 b->ef.id = n_blocks + i;
1749 edges[n_blocks + i] = &b->ef;
1750 b->et.pred = b;
1751 b->ef.pred = b;
1752 }
1753 max_stmts = 0;
1754 for (i = 0; i < n; ++i)
1755 max_stmts += slength(blocks[i]->stmts) + 1;
1756 /*
1757 * We allocate at most 3 value numbers per statement,
1758 * so this is an upper bound on the number of valnodes
1759 * we'll need.
1760 */
1761 maxval = 3 * max_stmts;
1762 vmap = (struct vmapinfo *)malloc(maxval * sizeof(*vmap));
1763 vnode_base = (struct valnode *)malloc(maxval * sizeof(*vmap));
1764 }
1765
1766 /*
1767 * Some pointers used to convert the basic block form of the code,
1768 * into the array form that BPF requires. 'fstart' will point to
1769 * the malloc'd array while 'ftail' is used during the recursive traversal.
1770 */
1771 static struct bpf_insn *fstart;
1772 static struct bpf_insn *ftail;
1773
1774 #ifdef BDEBUG
1775 int bids[1000];
1776 #endif
1777
1778 static void
convert_code_r(p)1779 convert_code_r(p)
1780 struct block *p;
1781 {
1782 struct bpf_insn *dst;
1783 struct slist *src;
1784 int slen;
1785 u_int off;
1786
1787 if (p == 0 || isMarked(p))
1788 return;
1789 Mark(p);
1790
1791 convert_code_r(JF(p));
1792 convert_code_r(JT(p));
1793
1794 slen = slength(p->stmts);
1795 dst = ftail -= slen + 1;
1796
1797 p->offset = dst - fstart;
1798
1799 for (src = p->stmts; src; src = src->next) {
1800 if (src->s.code == NOP)
1801 continue;
1802 dst->code = (u_short)src->s.code;
1803 dst->k = src->s.k;
1804 ++dst;
1805 }
1806 #ifdef BDEBUG
1807 bids[dst - fstart] = p->id + 1;
1808 #endif
1809 dst->code = (u_short)p->s.code;
1810 dst->k = p->s.k;
1811 if (JT(p)) {
1812 off = JT(p)->offset - (p->offset + slen) - 1;
1813 if (off >= 256)
1814 error("long jumps not supported");
1815 dst->jt = off;
1816 off = JF(p)->offset - (p->offset + slen) - 1;
1817 if (off >= 256)
1818 error("long jumps not supported");
1819 dst->jf = off;
1820 }
1821 }
1822
1823
1824 /*
1825 * Convert flowgraph intermediate representation to the
1826 * BPF array representation. Set *lenp to the number of instructions.
1827 */
1828 struct bpf_insn *
icode_to_fcode(root,lenp)1829 icode_to_fcode(root, lenp)
1830 struct block *root;
1831 int *lenp;
1832 {
1833 int n;
1834 struct bpf_insn *fp;
1835
1836 unMarkAll();
1837 n = *lenp = count_stmts(root);
1838
1839 fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
1840 bzero((char *)fp, sizeof(*fp) * n);
1841 fstart = fp;
1842 ftail = fp + n;
1843
1844 unMarkAll();
1845 convert_code_r(root);
1846
1847 return fp;
1848 }
1849
1850 #ifdef BDEBUG
1851 opt_dump(root)
1852 struct block *root;
1853 {
1854 struct bpf_program f;
1855
1856 bzero(bids, sizeof bids);
1857 f.bf_insns = icode_to_fcode(root, &f.bf_len);
1858 bpf_dump(&f, 1);
1859 putchar('\n');
1860 free((char *)f.bf_insns);
1861 }
1862 #endif
1863