1 /*
2  * Copyright (c) 1988-1990 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that: (1) source code distributions
7  * retain the above copyright notice and this paragraph in its entirety, (2)
8  * distributions including binary code include the above copyright notice and
9  * this paragraph in its entirety in the documentation or other materials
10  * provided with the distribution, and (3) all advertising materials mentioning
11  * features or use of this software display the following acknowledgement:
12  * ``This product includes software developed by the University of California,
13  * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
14  * the University nor the names of its contributors may be used to endorse
15  * or promote products derived from this software without specific prior
16  * written permission.
17  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
18  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
19  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
20  *
21  *  Optimization module for tcpdump intermediate representation.
22  */
23 #ifndef lint
24 static char rcsid[] =
25     "@(#) $Header: /usr/staff/martinh/tcpview/RCS/optimize.c,v 1.1 1992/11/23 18:51:32 martinh Exp $ (LBL)";
26 #endif
27 
28 #include <stdio.h>
29 #include <sys/types.h>
30 
31 #include <sys/time.h>
32 #include <net/bpf.h>
33 
34 #include "interface.h"
35 #include "gencode.h"
36 
37 #define A_ATOM BPF_MEMWORDS
38 #define X_ATOM (BPF_MEMWORDS+1)
39 
40 #define NOP -1
41 
42 /*
43  * This define is used to represent *both* the accumulator and
44  * x register in use-def computations.
45  * Currently, the use-def code assumes only one definition per instruction.
46  */
47 #define AX_ATOM N_ATOMS
48 
49 /*
50  * A flag to indicate that further optimization is needed.
51  * Iterative passes are continued until a given pass yields no
52  * branch movement.
53  */
54 static int done;
55 
56 /*
57  * A block is marked if only if its mark equals the current mark.
58  * Rather than traverse the code array, marking each item, 'cur_mark' is
59  * incremented.  This automatically makes each element unmarked.
60  */
61 static int cur_mark;
62 #define isMarked(p) ((p)->mark == cur_mark)
63 #define unMarkAll() cur_mark += 1
64 #define Mark(p) ((p)->mark = cur_mark)
65 
66 static void opt_init();
67 static void opt_cleanup();
68 
69 static void make_marks();
70 static void mark_code();
71 
72 static void intern_blocks();
73 
74 static int eq_slist();
75 
76 static int n_blocks;
77 struct block **blocks;
78 static int n_edges;
79 struct edge **edges;
80 
81 /*
82  * A bit vector set representation of the dominators.
83  * We round up the set size to the next power of two.
84  */
85 static int nodewords;
86 static int edgewords;
87 struct block **levels;
88 u_long *space;
89 #define BITS_PER_WORD (8*sizeof(u_long))
90 /*
91  * True if a is in uset {p}
92  */
93 #define SET_MEMBER(p, a) \
94 ((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD)))
95 
96 /*
97  * Add 'a' to uset p.
98  */
99 #define SET_INSERT(p, a) \
100 (p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD))
101 
102 /*
103  * Delete 'a' from uset p.
104  */
105 #define SET_DELETE(p, a) \
106 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD))
107 
108 /*
109  * a := a intersect b
110  */
111 #define SET_INTERSECT(a, b, n)\
112 {\
113 	register u_long *_x = a, *_y = b;\
114 	register int _n = n;\
115 	while (--_n >= 0) *_x++ &= *_y++;\
116 }
117 
118 /*
119  * a := a - b
120  */
121 #define SET_SUBTRACT(a, b, n)\
122 {\
123 	register u_long *_x = a, *_y = b;\
124 	register int _n = n;\
125 	while (--_n >= 0) *_x++ &=~ *_y++;\
126 }
127 
128 /*
129  * a := a union b
130  */
131 #define SET_UNION(a, b, n)\
132 {\
133 	register u_long *_x = a, *_y = b;\
134 	register int _n = n;\
135 	while (--_n >= 0) *_x++ |= *_y++;\
136 }
137 
138 static uset all_dom_sets;
139 static uset all_closure_sets;
140 static uset all_edge_sets;
141 
142 #ifndef MAX
143 #define MAX(a,b) ((a)>(b)?(a):(b))
144 #endif
145 
146 static void
find_levels_r(b)147 find_levels_r(b)
148 	struct block *b;
149 {
150 	int level;
151 
152 	if (isMarked(b))
153 		return;
154 
155 	Mark(b);
156 	b->link = 0;
157 
158 	if (JT(b)) {
159 		find_levels_r(JT(b));
160 		find_levels_r(JF(b));
161 		level = MAX(JT(b)->level, JF(b)->level) + 1;
162 	} else
163 		level = 0;
164 	b->level = level;
165 	b->link = levels[level];
166 	levels[level] = b;
167 }
168 
169 /*
170  * Level graph.  The levels go from 0 at the leaves to
171  * N_LEVELS at the root.  The levels[] array points to the
172  * first node of the level list, whose elements are linked
173  * with the 'link' field of the struct block.
174  */
175 static void
find_levels(root)176 find_levels(root)
177 	struct block *root;
178 {
179 	bzero((char *)levels, n_blocks * sizeof(*levels));
180 	unMarkAll();
181 	find_levels_r(root);
182 }
183 
184 /*
185  * Find dominator relationships.
186  * Assumes graph has been leveled.
187  */
188 static void
find_dom(root)189 find_dom(root)
190 	struct block *root;
191 {
192 	int i;
193 	struct block *b;
194 	u_long *x;
195 
196 	/*
197 	 * Initialize sets to contain all nodes.
198 	 */
199 	x = all_dom_sets;
200 	i = n_blocks * nodewords;
201 	while (--i >= 0)
202 		*x++ = ~0;
203 	/* Root starts off empty. */
204 	for (i = nodewords; --i >= 0;)
205 		root->dom[i] = 0;
206 
207 	/* root->level is the highest level no found. */
208 	for (i = root->level; i >= 0; --i) {
209 		for (b = levels[i]; b; b = b->link) {
210 			SET_INSERT(b->dom, b->id);
211 			if (JT(b) == 0)
212 				continue;
213 			SET_INTERSECT(JT(b)->dom, b->dom, nodewords);
214 			SET_INTERSECT(JF(b)->dom, b->dom, nodewords);
215 		}
216 	}
217 }
218 
219 static void
propedom(ep)220 propedom(ep)
221 	struct edge *ep;
222 {
223 	SET_INSERT(ep->edom, ep->id);
224 	if (ep->succ) {
225 		SET_INTERSECT(ep->succ->et.edom, ep->edom, edgewords);
226 		SET_INTERSECT(ep->succ->ef.edom, ep->edom, edgewords);
227 	}
228 }
229 
230 /*
231  * Compute edge dominators.
232  * Assumes graph has been leveled and predecessors estabished.
233  */
234 static void
find_edom(root)235 find_edom(root)
236 	struct block *root;
237 {
238 	int i;
239 	uset x;
240 	struct block *b;
241 
242 	x = all_edge_sets;
243 	for (i = n_edges * edgewords; --i >= 0; )
244 		x[i] = ~0;
245 
246 	/* root->level is the highest level no found. */
247 	bzero(root->et.edom, edgewords * sizeof(*(uset)0));
248 	bzero(root->ef.edom, edgewords * sizeof(*(uset)0));
249 	for (i = root->level; i >= 0; --i) {
250 		for (b = levels[i]; b != 0; b = b->link) {
251 			propedom(&b->et);
252 			propedom(&b->ef);
253 		}
254 	}
255 }
256 
257 /*
258  * Find the backwards transitive closure of the flow graph.  These sets
259  * are backwards in the sense that we find the set of nodes that reach
260  * a given node, not the set of nodes that can be reached by a node.
261  *
262  * Assumes graph has been leveled.
263  */
264 static void
find_closure(root)265 find_closure(root)
266 	struct block *root;
267 {
268 	int i;
269 	struct block *b;
270 
271 	/*
272 	 * Initialize sets to contain no nodes.
273 	 */
274 	bzero((char *)all_closure_sets,
275 	      n_blocks * nodewords * sizeof(*all_closure_sets));
276 
277 	/* root->level is the highest level no found. */
278 	for (i = root->level; i >= 0; --i) {
279 		for (b = levels[i]; b; b = b->link) {
280 			SET_INSERT(b->closure, b->id);
281 			if (JT(b) == 0)
282 				continue;
283 			SET_UNION(JT(b)->closure, b->closure, nodewords);
284 			SET_UNION(JF(b)->closure, b->closure, nodewords);
285 		}
286 	}
287 }
288 
289 /*
290  * Return the register number that is used by s.  If A and X are both
291  * used, return AX_ATOM.  If no register is used, return -1.
292  *
293  * The implementation should probably change to an array access.
294  */
295 static int
atomuse(s)296 atomuse(s)
297 	struct stmt *s;
298 {
299 	register int c = s->code;
300 
301 	if (c == NOP)
302 		return -1;
303 
304 	switch (BPF_CLASS(c)) {
305 
306 	case BPF_RET:
307 		return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
308 			(BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
309 
310 	case BPF_LD:
311 	case BPF_LDX:
312 		return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
313 			(BPF_MODE(c) == BPF_MEM) ? s->k : -1;
314 
315 	case BPF_ST:
316 		return A_ATOM;
317 
318 	case BPF_STX:
319 		return X_ATOM;
320 
321 	case BPF_JMP:
322 	case BPF_ALU:
323 		if (BPF_SRC(c) == BPF_X)
324 			return AX_ATOM;
325 		return A_ATOM;
326 
327 	case BPF_MISC:
328 		return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
329 	}
330 	abort();
331 	/* NOTREACHED */
332 }
333 
334 /*
335  * Return the register number that is defined by 's'.  We assume that
336  * a single stmt cannot define more than one register.  If no register
337  * is defined, return -1.
338  *
339  * The implementation should probably change to an array access.
340  */
341 static int
atomdef(s)342 atomdef(s)
343 	struct stmt *s;
344 {
345 	if (s->code == NOP)
346 		return -1;
347 
348 	switch (BPF_CLASS(s->code)) {
349 
350 	case BPF_LD:
351 	case BPF_ALU:
352 		return A_ATOM;
353 
354 	case BPF_LDX:
355 		return X_ATOM;
356 
357 	case BPF_ST:
358 	case BPF_STX:
359 		return s->k;
360 
361 	case BPF_MISC:
362 		return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
363 	}
364 	return -1;
365 }
366 
367 static void
compute_local_ud(b)368 compute_local_ud(b)
369 	struct block *b;
370 {
371 	struct slist *s;
372 	atomset def = 0, use = 0, kill = 0;
373 	int atom;
374 
375 	for (s = b->stmts; s; s = s->next) {
376 		if (s->s.code == NOP)
377 			continue;
378 		atom = atomuse(&s->s);
379 		if (atom >= 0) {
380 			if (atom == AX_ATOM) {
381 				if (!ATOMELEM(def, X_ATOM))
382 					use |= ATOMMASK(X_ATOM);
383 				if (!ATOMELEM(def, A_ATOM))
384 					use |= ATOMMASK(A_ATOM);
385 			}
386 			else if (atom < N_ATOMS) {
387 				if (!ATOMELEM(def, atom))
388 					use |= ATOMMASK(atom);
389 			}
390 			else
391 				abort();
392 		}
393 		atom = atomdef(&s->s);
394 		if (atom >= 0) {
395 			if (!ATOMELEM(atom, use))
396 				kill |= ATOMMASK(atom);
397 			def |= ATOMMASK(atom);
398 		}
399 	}
400 	if (!ATOMELEM(def, A_ATOM) && BPF_CLASS(b->s.code) == BPF_JMP)
401 		use |= ATOMMASK(A_ATOM);
402 
403 	b->def = def;
404 	b->kill = kill;
405 	b->in_use = use;
406 }
407 
408 /*
409  * Assume graph is already leveled.
410  */
411 static void
find_ud(root)412 find_ud(root)
413 	struct block *root;
414 {
415 	int i, maxlevel;
416 	struct block *p;
417 
418 	/*
419 	 * root->level is the highest level no found;
420 	 * count down from there.
421 	 */
422 	maxlevel = root->level;
423 	for (i = maxlevel; i >= 0; --i)
424 		for (p = levels[i]; p; p = p->link) {
425 			compute_local_ud(p);
426 			p->out_use = 0;
427 		}
428 
429 	for (i = 1; i <= maxlevel; ++i) {
430 		for (p = levels[i]; p; p = p->link) {
431 			p->out_use |= JT(p)->in_use | JF(p)->in_use;
432 			p->in_use |= p->out_use &~ p->kill;
433 		}
434 	}
435 }
436 
437 /*
438  * These data structures are used in a Cocke and Shwarz style
439  * value numbering scheme.  Since the flowgraph is acyclic,
440  * exit values can be propagated from a node's predecessors
441  * provided it is uniquely defined.
442  */
443 struct valnode {
444 	int code;
445 	long v0, v1;
446 	long val;
447 	struct valnode *next;
448 };
449 
450 #define MODULUS 213
451 static struct valnode *hashtbl[MODULUS];
452 static int curval;
453 static int maxval;
454 
455 /* Integer constants mapped with the load immediate opcode. */
456 #define K(i) F(BPF_LD|BPF_IMM|BPF_W, i, 0L)
457 
458 struct vmapinfo {
459 	int is_const;
460 	long const_val;
461 };
462 
463 struct vmapinfo *vmap;
464 struct valnode *vnode_base;
465 struct valnode *next_vnode;
466 
467 static void
init_val()468 init_val()
469 {
470 	curval = 0;
471 	next_vnode = vnode_base;
472 	bzero((char *)vmap, maxval * sizeof(*vmap));
473 	bzero((char *)hashtbl, sizeof hashtbl);
474 }
475 
476 /* Because we really don't have an IR, this stuff is a little messy. */
477 static long
F(code,v0,v1)478 F(code, v0, v1)
479 	int code;
480 	long v0, v1;
481 {
482 	u_int hash;
483 	int val;
484 	struct valnode *p;
485 
486 	hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8);
487 	hash %= MODULUS;
488 
489 	for (p = hashtbl[hash]; p; p = p->next)
490 		if (p->code == code && p->v0 == v0 && p->v1 == v1)
491 			return p->val;
492 
493 	val = ++curval;
494 	if (BPF_MODE(code) == BPF_IMM &&
495 	    (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
496 		vmap[val].const_val = v0;
497 		vmap[val].is_const = 1;
498 	}
499 	p = next_vnode++;
500 	p->val = val;
501 	p->code = code;
502 	p->v0 = v0;
503 	p->v1 = v1;
504 	p->next = hashtbl[hash];
505 	hashtbl[hash] = p;
506 
507 	return val;
508 }
509 
510 static inline void
vstore(s,valp,newval,alter)511 vstore(s, valp, newval, alter)
512 	struct stmt *s;
513 	long *valp;
514 	long newval;
515 	int alter;
516 {
517 	if (alter && *valp == newval)
518 		s->code = NOP;
519 	else
520 		*valp = newval;
521 }
522 
523 static void
fold_op(s,v0,v1)524 fold_op(s, v0, v1)
525 	struct stmt *s;
526 	long v0, v1;
527 {
528 	long a, b;
529 
530 	a = vmap[v0].const_val;
531 	b = vmap[v1].const_val;
532 
533 	switch (BPF_OP(s->code)) {
534 	case BPF_ADD:
535 		a += b;
536 		break;
537 
538 	case BPF_SUB:
539 		a -= b;
540 		break;
541 
542 	case BPF_MUL:
543 		a *= b;
544 		break;
545 
546 	case BPF_DIV:
547 		if (b == 0)
548 			error("division by zero");
549 		a /= b;
550 		break;
551 
552 	case BPF_AND:
553 		a &= b;
554 		break;
555 
556 	case BPF_OR:
557 		a |= b;
558 		break;
559 
560 	case BPF_LSH:
561 		a <<= b;
562 		break;
563 
564 	case BPF_RSH:
565 		a >>= b;
566 		break;
567 
568 	case BPF_NEG:
569 		a = -a;
570 		break;
571 
572 	default:
573 		abort();
574 	}
575 	s->k = a;
576 	s->code = BPF_LD|BPF_IMM;
577 	done = 0;
578 }
579 
580 static inline struct slist *
this_op(s)581 this_op(s)
582 	struct slist *s;
583 {
584 	while (s != 0 && s->s.code == NOP)
585 		s = s->next;
586 	return s;
587 }
588 
589 static void
opt_not(b)590 opt_not(b)
591 	struct block *b;
592 {
593 	struct block *tmp = JT(b);
594 
595 	JT(b) = JF(b);
596 	JF(b) = tmp;
597 }
598 
599 static void
opt_peep(b)600 opt_peep(b)
601 	struct block *b;
602 {
603 	struct slist *s;
604 	struct slist *next, *last;
605 	int val;
606 	long v;
607 
608 	s = b->stmts;
609 	if (s == 0)
610 		return;
611 
612 	last = s;
613 	while (1) {
614 		s = this_op(s);
615 		if (s == 0)
616 			break;
617 		next = this_op(s->next);
618 		if (next == 0)
619 			break;
620 		last = next;
621 
622 		/*
623 		 * st  M[k]	-->	st  M[k]
624 		 * ldx M[k]		tax
625 		 */
626 		if (s->s.code == BPF_ST &&
627 		    next->s.code == (BPF_LDX|BPF_MEM) &&
628 		    s->s.k == next->s.k) {
629 			done = 0;
630 			next->s.code = BPF_MISC|BPF_TAX;
631 		}
632 		/*
633 		 * ld  #k	-->	ldx  #k
634 		 * tax			txa
635 		 */
636 		if (s->s.code == (BPF_LD|BPF_IMM) &&
637 		    next->s.code == (BPF_MISC|BPF_TAX)) {
638 			s->s.code = BPF_LDX|BPF_IMM;
639 			next->s.code = BPF_MISC|BPF_TXA;
640 			done = 0;
641 		}
642 		/*
643 		 * This is an ugly special case, but it happens
644 		 * when you say tcp[k] or udp[k] where k is a constant.
645 		 */
646 		if (s->s.code == (BPF_LD|BPF_IMM)) {
647 			struct slist *add, *tax, *ild;
648 
649 			/*
650 			 * Check that X isn't used on exit from this
651 			 * block (which the optimizer might cause).
652 			 * We know the code generator won't generate
653 			 * any local dependencies.
654 			 */
655 			if (ATOMELEM(b->out_use, X_ATOM))
656 				break;
657 
658 			if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
659 				add = next;
660 			else
661 				add = this_op(next->next);
662 			if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
663 				break;
664 
665 			tax = this_op(add->next);
666 			if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
667 				break;
668 
669 			ild = this_op(tax->next);
670 			if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
671 			    BPF_MODE(ild->s.code) != BPF_IND)
672 				break;
673 			/*
674 			 * XXX We need to check that X is not
675 			 * subsequently used.  We know we can eliminate the
676 			 * accumulator modifications since it is defined
677 			 * by the last stmt of this sequence.
678 			 *
679 			 * We want to turn this sequence:
680 			 *
681 			 * (004) ldi     #0x2		{s}
682 			 * (005) ldxms   [14]		{next}  -- optional
683 			 * (006) addx			{add}
684 			 * (007) tax			{tax}
685 			 * (008) ild     [x+0]		{ild}
686 			 *
687 			 * into this sequence:
688 			 *
689 			 * (004) nop
690 			 * (005) ldxms   [14]
691 			 * (006) nop
692 			 * (007) nop
693 			 * (008) ild     [x+2]
694 			 *
695 			 */
696 			ild->s.k += s->s.k;
697 			s->s.code = NOP;
698 			add->s.code = NOP;
699 			tax->s.code = NOP;
700 			done = 0;
701 		}
702 		s = next;
703 	}
704 	/*
705 	 * If we have a subtract to do a comparsion, and the X register
706 	 * is a known constant, we can merge this value into the
707 	 * comparison.
708 	 */
709 	if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X) &&
710 	    !ATOMELEM(b->out_use, A_ATOM)) {
711 		val = b->val[X_ATOM];
712 		if (vmap[val].is_const) {
713 			b->s.k += vmap[val].const_val;
714 			last->s.code = NOP;
715 			done = 0;
716 		} else if (b->s.k == 0) {
717 			/*
718 			 * sub x  ->	nop
719 			 * j  #0	j  x
720 			 */
721 			last->s.code = NOP;
722 			b->s.code = BPF_CLASS(b->s.code) | BPF_OP(b->s.code) |
723 				BPF_X;
724 			done = 0;
725 		}
726 	}
727 	/*
728 	 * Likewise, a constant subtract can be simplified.
729 	 */
730 	else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K) &&
731 		 !ATOMELEM(b->out_use, A_ATOM)) {
732 		b->s.k += last->s.k;
733 		last->s.code = NOP;
734 		done = 0;
735 	}
736 	/*
737 	 * and #k	nop
738 	 * jeq #0  ->	jset #k
739 	 */
740 	if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
741 	    !ATOMELEM(b->out_use, A_ATOM) && b->s.k == 0) {
742 		b->s.k = last->s.k;
743 		b->s.code = BPF_JMP|BPF_K|BPF_JSET;
744 		last->s.code = NOP;
745 		done = 0;
746 		opt_not(b);
747 	}
748 	/*
749 	 * If the accumulator is a known constant, we can compute the
750 	 * comparison result.
751 	 */
752 	val = b->val[A_ATOM];
753 	if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
754 		v = vmap[val].const_val;
755 		switch (BPF_OP(b->s.code)) {
756 
757 		case BPF_JEQ:
758 			v = v == b->s.k;
759 			break;
760 
761 		case BPF_JGT:
762 			v = v > b->s.k;
763 			break;
764 
765 		case BPF_JGE:
766 			v = v >= b->s.k;
767 			break;
768 
769 		case BPF_JSET:
770 			v &= b->s.k;
771 			break;
772 
773 		default:
774 			abort();
775 		}
776 		if (JF(b) != JT(b))
777 			done = 0;
778 		if (v)
779 			JF(b) = JT(b);
780 		else
781 			JT(b) = JF(b);
782 	}
783 }
784 
785 /*
786  * Compute the symbolic value of expression of 's', and update
787  * anything it defines in the value table 'val'.  If 'alter' is true,
788  * do various optimizations.  This code would be cleaner if symblic
789  * evaluation and code transformations weren't folded together.
790  */
791 static void
opt_stmt(s,val,alter)792 opt_stmt(s, val, alter)
793 	struct stmt *s;
794 	long val[];
795 	int alter;
796 {
797 	int op;
798 	long v;
799 
800 	switch (s->code) {
801 
802 	case BPF_LD|BPF_ABS|BPF_W:
803 	case BPF_LD|BPF_ABS|BPF_H:
804 	case BPF_LD|BPF_ABS|BPF_B:
805 		v = F(s->code, s->k, 0L);
806 		vstore(s, &val[A_ATOM], v, alter);
807 		break;
808 
809 	case BPF_LD|BPF_IND|BPF_W:
810 	case BPF_LD|BPF_IND|BPF_H:
811 	case BPF_LD|BPF_IND|BPF_B:
812 		v = val[X_ATOM];
813 		if (alter && vmap[v].is_const) {
814 			s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
815 			s->k += vmap[v].const_val;
816 			v = F(s->code, s->k, 0L);
817 			done = 0;
818 		}
819 		else
820 			v = F(s->code, s->k, v);
821 		vstore(s, &val[A_ATOM], v, alter);
822 		break;
823 
824 	case BPF_LD|BPF_LEN:
825 		v = F(s->code, 0L, 0L);
826 		vstore(s, &val[A_ATOM], v, alter);
827 		break;
828 
829 	case BPF_LD|BPF_IMM:
830 		v = K(s->k);
831 		vstore(s, &val[A_ATOM], v, alter);
832 		break;
833 
834 	case BPF_LDX|BPF_IMM:
835 		v = K(s->k);
836 		vstore(s, &val[X_ATOM], v, alter);
837 		break;
838 
839 	case BPF_LDX|BPF_MSH|BPF_B:
840 		v = F(s->code, s->k, 0L);
841 		vstore(s, &val[X_ATOM], v, alter);
842 		break;
843 
844 	case BPF_ALU|BPF_NEG:
845 		if (alter && vmap[val[A_ATOM]].is_const) {
846 			s->code = BPF_LD|BPF_IMM;
847 			s->k = -vmap[val[A_ATOM]].const_val;
848 			val[A_ATOM] = K(s->k);
849 		}
850 		else
851 			val[A_ATOM] = F(s->code, val[A_ATOM], 0L);
852 		break;
853 
854 	case BPF_ALU|BPF_ADD|BPF_K:
855 	case BPF_ALU|BPF_SUB|BPF_K:
856 	case BPF_ALU|BPF_MUL|BPF_K:
857 	case BPF_ALU|BPF_DIV|BPF_K:
858 	case BPF_ALU|BPF_AND|BPF_K:
859 	case BPF_ALU|BPF_OR|BPF_K:
860 	case BPF_ALU|BPF_LSH|BPF_K:
861 	case BPF_ALU|BPF_RSH|BPF_K:
862 		op = BPF_OP(s->code);
863 		if (alter) {
864 			if (s->k == 0) {
865 				if (op == BPF_ADD || op == BPF_SUB ||
866 				    op == BPF_LSH || op == BPF_RSH ||
867 				    op == BPF_OR) {
868 					s->code = NOP;
869 					break;
870 				}
871 				if (op == BPF_MUL || op == BPF_AND) {
872 					s->code = BPF_LD|BPF_IMM;
873 					val[A_ATOM] = K(s->k);
874 					break;
875 				}
876 			}
877 			if (vmap[val[A_ATOM]].is_const) {
878 				fold_op(s, val[A_ATOM], K(s->k));
879 				val[A_ATOM] = K(s->k);
880 				break;
881 			}
882 		}
883 		val[A_ATOM] = F(s->code, val[A_ATOM], K(s->k));
884 		break;
885 
886 	case BPF_ALU|BPF_ADD|BPF_X:
887 	case BPF_ALU|BPF_SUB|BPF_X:
888 	case BPF_ALU|BPF_MUL|BPF_X:
889 	case BPF_ALU|BPF_DIV|BPF_X:
890 	case BPF_ALU|BPF_AND|BPF_X:
891 	case BPF_ALU|BPF_OR|BPF_X:
892 	case BPF_ALU|BPF_LSH|BPF_X:
893 	case BPF_ALU|BPF_RSH|BPF_X:
894 		op = BPF_OP(s->code);
895 		if (alter && vmap[val[X_ATOM]].is_const) {
896 			if (vmap[val[A_ATOM]].is_const) {
897 				fold_op(s, val[A_ATOM], val[X_ATOM]);
898 				val[A_ATOM] = K(s->k);
899 			}
900 			else {
901 				s->code = BPF_ALU|BPF_K|op;
902 				s->k = vmap[val[X_ATOM]].const_val;
903 				done = 0;
904 				val[A_ATOM] =
905 					F(s->code, val[A_ATOM], K(s->k));
906 			}
907 			break;
908 		}
909 		/*
910 		 * Check if we're doing something to an accumulator
911 		 * that is 0, and simplify.  This may not seem like
912 		 * much of a simplification but it could open up further
913 		 * optimizations.
914 		 * XXX We could also check for mul by 1, and -1, etc.
915 		 */
916 		if (alter && vmap[val[A_ATOM]].is_const
917 		    && vmap[val[A_ATOM]].const_val == 0) {
918 			if (op == BPF_ADD || op == BPF_OR ||
919 			    op == BPF_LSH || op == BPF_RSH || op == BPF_SUB) {
920 				s->code = BPF_MISC|BPF_TXA;
921 				vstore(s, &val[A_ATOM], val[X_ATOM], alter);
922 				break;
923 			}
924 			else if (op == BPF_MUL || op == BPF_DIV ||
925 				 op == BPF_AND) {
926 				s->code = BPF_LD|BPF_IMM;
927 				s->k = 0;
928 				vstore(s, &val[A_ATOM], K(s->k), alter);
929 				break;
930 			}
931 			else if (op == BPF_NEG) {
932 				s->code = NOP;
933 				break;
934 			}
935 		}
936 		val[A_ATOM] = F(s->code, val[A_ATOM], val[X_ATOM]);
937 		break;
938 
939 	case BPF_MISC|BPF_TXA:
940 		vstore(s, &val[A_ATOM], val[X_ATOM], alter);
941 		break;
942 
943 	case BPF_LD|BPF_MEM:
944 		v = val[s->k];
945 		if (alter && vmap[v].is_const) {
946 			s->code = BPF_LD|BPF_IMM;
947 			s->k = vmap[v].const_val;
948 			done = 0;
949 		}
950 		vstore(s, &val[A_ATOM], v, alter);
951 		break;
952 
953 	case BPF_MISC|BPF_TAX:
954 		vstore(s, &val[X_ATOM], val[A_ATOM], alter);
955 		break;
956 
957 	case BPF_LDX|BPF_MEM:
958 		v = val[s->k];
959 		if (alter && vmap[v].is_const) {
960 			s->code = BPF_LDX|BPF_IMM;
961 			s->k = vmap[v].const_val;
962 			done = 0;
963 		}
964 		vstore(s, &val[X_ATOM], v, alter);
965 		break;
966 
967 	case BPF_ST:
968 		vstore(s, &val[s->k], val[A_ATOM], alter);
969 		break;
970 
971 	case BPF_STX:
972 		vstore(s, &val[s->k], val[X_ATOM], alter);
973 		break;
974 	}
975 }
976 
977 static void
deadstmt(s,last)978 deadstmt(s, last)
979 	register struct stmt *s;
980 	register struct stmt *last[];
981 {
982 	register int atom;
983 
984 	atom = atomuse(s);
985 	if (atom >= 0) {
986 		if (atom == AX_ATOM) {
987 			last[X_ATOM] = 0;
988 			last[A_ATOM] = 0;
989 		}
990 		else
991 			last[atom] = 0;
992 	}
993 	atom = atomdef(s);
994 	if (atom >= 0) {
995 		if (last[atom]) {
996 			done = 0;
997 			last[atom]->code = NOP;
998 		}
999 		last[atom] = s;
1000 	}
1001 }
1002 
1003 static void
opt_deadstores(b)1004 opt_deadstores(b)
1005 	register struct block *b;
1006 {
1007 	register struct slist *s;
1008 	register int atom;
1009 	struct stmt *last[N_ATOMS];
1010 
1011 	bzero((char *)last, sizeof last);
1012 
1013 	for (s = b->stmts; s != 0; s = s->next)
1014 		deadstmt(&s->s, last);
1015 	deadstmt(&b->s, last);
1016 
1017 	for (atom = 0; atom < N_ATOMS; ++atom)
1018 		if (last[atom] && !ATOMELEM(b->out_use, atom)) {
1019 			last[atom]->code = NOP;
1020 			done = 0;
1021 		}
1022 }
1023 
1024 static void
opt_blk(b,do_stmts)1025 opt_blk(b, do_stmts)
1026 	struct block *b;
1027 {
1028 	struct slist *s;
1029 	struct edge *p;
1030 	int i;
1031 	long aval;
1032 
1033 	/*
1034 	 * Initialize the atom values.
1035 	 * If we have no predecessors, everything is undefined.
1036 	 * Otherwise, we inherent our values from our predecessors.
1037 	 * If any register has an ambiguous value (i.e. control paths are
1038 	 * merging) give it the undefined value of 0.
1039 	 */
1040 	p = b->in_edges;
1041 	if (p == 0)
1042 		bzero((char *)b->val, sizeof(b->val));
1043 	else {
1044 		bcopy((char *)p->pred->val, (char *)b->val, sizeof(b->val));
1045 		while (p = p->next) {
1046 			for (i = 0; i < N_ATOMS; ++i)
1047 				if (b->val[i] != p->pred->val[i])
1048 					b->val[i] = 0;
1049 		}
1050 	}
1051 	aval = b->val[A_ATOM];
1052 	for (s = b->stmts; s; s = s->next)
1053 		opt_stmt(&s->s, b->val, do_stmts);
1054 
1055 	/*
1056 	 * This is a special case: if we don't use anything from this
1057 	 * block, and we load the accumulator with value that is
1058 	 * already there, eliminate all the statements.
1059 	 */
1060 	if (do_stmts && b->out_use == 0 && aval != 0 &&
1061 	    b->val[A_ATOM] == aval)
1062 		b->stmts = 0;
1063 	else {
1064 		opt_peep(b);
1065 		opt_deadstores(b);
1066 	}
1067 	/*
1068 	 * Set up values for branch optimizer.
1069 	 */
1070 	if (BPF_SRC(b->s.code) == BPF_K)
1071 		b->oval = K(b->s.k);
1072 	else
1073 		b->oval = b->val[X_ATOM];
1074 	b->et.code = b->s.code;
1075 	b->ef.code = -b->s.code;
1076 }
1077 
1078 /*
1079  * Return true if any register that is used on exit from 'succ', has
1080  * an exit value that is different from the corresponding exit value
1081  * from 'b'.
1082  */
1083 static int
use_conflict(b,succ)1084 use_conflict(b, succ)
1085 	struct block *b, *succ;
1086 {
1087 	int atom;
1088 	atomset use = succ->out_use;
1089 
1090 	if (use == 0)
1091 		return 0;
1092 
1093 	for (atom = 0; atom < N_ATOMS; ++atom)
1094 		if (ATOMELEM(use, atom))
1095 			if (b->val[atom] != succ->val[atom])
1096 				return 1;
1097 	return 0;
1098 }
1099 
1100 struct block *
fold_edge(child,ep)1101 fold_edge(child, ep)
1102 	struct block *child;
1103 	struct edge *ep;
1104 {
1105 	int sense;
1106 	int aval0, aval1, oval0, oval1;
1107 	int code = ep->code;
1108 
1109 	if (code < 0) {
1110 		code = -code;
1111 		sense = 0;
1112 	} else
1113 		sense = 1;
1114 
1115 	if (child->s.code != code)
1116 		return 0;
1117 
1118 	aval0 = child->val[A_ATOM];
1119 	oval0 = child->oval;
1120 	aval1 = ep->pred->val[A_ATOM];
1121 	oval1 = ep->pred->oval;
1122 
1123 	if (aval0 != aval1)
1124 		return 0;
1125 
1126 	if (oval0 == oval1)
1127 		/*
1128 		 * The operands are identical, so the
1129 		 * result is true if a true branch was
1130 		 * taken to get here, otherwise false.
1131 		 */
1132 		return sense ? JT(child) : JF(child);
1133 
1134 	if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
1135 		/*
1136 		 * At this point, we only know the comparison if we
1137 		 * came down the true branch, and it was an equility
1138 		 * comparison with a constant.  We rely on the fact that
1139 		 * distinct constants have distinct value numbers.
1140 		 */
1141 		return JF(child);
1142 
1143 	return 0;
1144 }
1145 
1146 static void
opt_j(ep)1147 opt_j(ep)
1148 	struct edge *ep;
1149 {
1150 	register int i, k;
1151 	register struct block *target;
1152 
1153 	if (JT(ep->succ) == 0)
1154 		return;
1155 
1156 	if (JT(ep->succ) == JF(ep->succ)) {
1157 		/*
1158 		 * Common branch targets can be eliminated, provided
1159 		 * there is no data dependency.
1160 		 */
1161 		if (!use_conflict(ep->pred, ep->succ->et.succ)) {
1162 			done = 0;
1163 			ep->succ = JT(ep->succ);
1164 		}
1165 	}
1166 	/*
1167 	 * For each edge dominator that matches the successor of this
1168 	 * edge, promote the edge succesor to the its grandchild.
1169 	 *
1170 	 * XXX We violate the set abstraction here in favor a reasonbly
1171 	 * efficient loop.
1172 	 */
1173  top:
1174 	for (i = 0; i < edgewords; ++i) {
1175 		register u_long x = ep->edom[i];
1176 
1177 		while (x != 0) {
1178 			k = ffs(x) - 1;
1179 			x &=~ 1 << k;
1180 			k += i * BITS_PER_WORD;
1181 
1182 			target = fold_edge(ep->succ, edges[k]);
1183 			/*
1184 			 * Check that there is no data dependency between
1185 			 * nodes that will be violated if we move the edge.
1186 			 */
1187 			if (target != 0 && !use_conflict(ep->pred, target)) {
1188 				done = 0;
1189 				ep->succ = target;
1190 				if (JT(target) != 0)
1191 					/*
1192 					 * Start over unless we hit a leaf.
1193 					 */
1194 					goto top;
1195 				return;
1196 			}
1197 		}
1198 	}
1199 }
1200 
1201 
1202 static void
or_pullup(b)1203 or_pullup(b)
1204 	struct block *b;
1205 {
1206 	int val, at_top;
1207 	struct block *pull;
1208 	struct block **diffp, **samep;
1209 	struct edge *ep;
1210 
1211 	ep = b->in_edges;
1212 	if (ep == 0)
1213 		return;
1214 
1215 	/*
1216 	 * Make sure each predecessor loads the same value.
1217 	 * XXX why?
1218 	 */
1219 	val = ep->pred->val[A_ATOM];
1220 	for (ep = ep->next; ep != 0; ep = ep->next)
1221 		if (val != ep->pred->val[A_ATOM])
1222 			return;
1223 
1224 	if (JT(b->in_edges->pred) == b)
1225 		diffp = &JT(b->in_edges->pred);
1226 	else
1227 		diffp = &JF(b->in_edges->pred);
1228 
1229 	at_top = 1;
1230 	while (1) {
1231 		if (*diffp == 0)
1232 			return;
1233 
1234 		if (JT(*diffp) != JT(b))
1235 			return;
1236 
1237 		if (!SET_MEMBER((*diffp)->dom, b->id))
1238 			return;
1239 
1240 		if ((*diffp)->val[A_ATOM] != val)
1241 			break;
1242 
1243 		diffp = &JF(*diffp);
1244 		at_top = 0;
1245 	}
1246 	samep = &JF(*diffp);
1247 	while (1) {
1248 		if (*samep == 0)
1249 			return;
1250 
1251 		if (JT(*samep) != JT(b))
1252 			return;
1253 
1254 		if (!SET_MEMBER((*samep)->dom, b->id))
1255 			return;
1256 
1257 		if ((*samep)->val[A_ATOM] == val)
1258 			break;
1259 
1260 		/* XXX Need to check that there are no data dependencies
1261 		   between dp0 and dp1.  Currently, the code generator
1262 		   will not produce such dependencies. */
1263 		samep = &JF(*samep);
1264 	}
1265 #ifdef notdef
1266 	/* XXX This doesn't cover everything. */
1267 	for (i = 0; i < N_ATOMS; ++i)
1268 		if ((*samep)->val[i] != pred->val[i])
1269 			return;
1270 #endif
1271 	/* Pull up the node. */
1272 	pull = *samep;
1273 	*samep = JF(pull);
1274 	JF(pull) = *diffp;
1275 
1276 	/*
1277 	 * At the top of the chain, each predecessor needs to point at the
1278 	 * pulled up node.  Inside the chain, there is only one predecessor
1279 	 * to worry about.
1280 	 */
1281 	if (at_top) {
1282 		for (ep = b->in_edges; ep != 0; ep = ep->next) {
1283 			if (JT(ep->pred) == b)
1284 				JT(ep->pred) = pull;
1285 			else
1286 				JF(ep->pred) = pull;
1287 		}
1288 	}
1289 	else
1290 		*diffp = pull;
1291 
1292 	done = 0;
1293 }
1294 
1295 static void
and_pullup(b)1296 and_pullup(b)
1297 	struct block *b;
1298 {
1299 	int val, at_top;
1300 	struct block *pull;
1301 	struct block **diffp, **samep;
1302 	struct edge *ep;
1303 
1304 	ep = b->in_edges;
1305 	if (ep == 0)
1306 		return;
1307 
1308 	/*
1309 	 * Make sure each predecessor loads the same value.
1310 	 */
1311 	val = ep->pred->val[A_ATOM];
1312 	for (ep = ep->next; ep != 0; ep = ep->next)
1313 		if (val != ep->pred->val[A_ATOM])
1314 			return;
1315 
1316 	if (JT(b->in_edges->pred) == b)
1317 		diffp = &JT(b->in_edges->pred);
1318 	else
1319 		diffp = &JF(b->in_edges->pred);
1320 
1321 	at_top = 1;
1322 	while (1) {
1323 		if (*diffp == 0)
1324 			return;
1325 
1326 		if (JF(*diffp) != JF(b))
1327 			return;
1328 
1329 		if (!SET_MEMBER((*diffp)->dom, b->id))
1330 			return;
1331 
1332 		if ((*diffp)->val[A_ATOM] != val)
1333 			break;
1334 
1335 		diffp = &JT(*diffp);
1336 		at_top = 0;
1337 	}
1338 	samep = &JT(*diffp);
1339 	while (1) {
1340 		if (*samep == 0)
1341 			return;
1342 
1343 		if (JF(*samep) != JF(b))
1344 			return;
1345 
1346 		if (!SET_MEMBER((*samep)->dom, b->id))
1347 			return;
1348 
1349 		if ((*samep)->val[A_ATOM] == val)
1350 			break;
1351 
1352 		/* XXX Need to check that there are no data dependencies
1353 		   between diffp and samep.  Currently, the code generator
1354 		   will not produce such dependencies. */
1355 		samep = &JT(*samep);
1356 	}
1357 #ifdef notdef
1358 	/* XXX This doesn't cover everything. */
1359 	for (i = 0; i < N_ATOMS; ++i)
1360 		if ((*samep)->val[i] != pred->val[i])
1361 			return;
1362 #endif
1363 	/* Pull up the node. */
1364 	pull = *samep;
1365 	*samep = JT(pull);
1366 	JT(pull) = *diffp;
1367 
1368 	/*
1369 	 * At the top of the chain, each predecessor needs to point at the
1370 	 * pulled up node.  Inside the chain, there is only one predecessor
1371 	 * to worry about.
1372 	 */
1373 	if (at_top) {
1374 		for (ep = b->in_edges; ep != 0; ep = ep->next) {
1375 			if (JT(ep->pred) == b)
1376 				JT(ep->pred) = pull;
1377 			else
1378 				JF(ep->pred) = pull;
1379 		}
1380 	}
1381 	else
1382 		*diffp = pull;
1383 
1384 	done = 0;
1385 }
1386 
1387 static void
opt_blks(root,do_stmts)1388 opt_blks(root, do_stmts)
1389 	struct block *root;
1390 {
1391 	int i, maxlevel;
1392 	struct block *p;
1393 
1394 	init_val();
1395 	maxlevel = root->level;
1396 	for (i = maxlevel; i >= 0; --i)
1397 		for (p = levels[i]; p; p = p->link)
1398 			opt_blk(p, do_stmts);
1399 
1400 	if (do_stmts)
1401 		/*
1402 		 * No point trying to move branches; it can't possibly
1403 		 * make a difference at this point.
1404 		 */
1405 		return;
1406 
1407 	for (i = 1; i <= maxlevel; ++i) {
1408 		for (p = levels[i]; p; p = p->link) {
1409 			opt_j(&p->et);
1410 			opt_j(&p->ef);
1411 		}
1412 	}
1413 	for (i = 1; i <= maxlevel; ++i) {
1414 		for (p = levels[i]; p; p = p->link) {
1415 			or_pullup(p);
1416 			and_pullup(p);
1417 		}
1418 	}
1419 }
1420 
1421 static inline void
link_inedge(parent,child)1422 link_inedge(parent, child)
1423 	struct edge *parent;
1424 	struct block *child;
1425 {
1426 	parent->next = child->in_edges;
1427 	child->in_edges = parent;
1428 }
1429 
1430 static void
find_inedges(root)1431 find_inedges(root)
1432 	struct block *root;
1433 {
1434 	int i;
1435 	struct block *b;
1436 	struct edge *ep;
1437 
1438 	for (i = 0; i < n_blocks; ++i)
1439 		blocks[i]->in_edges = 0;
1440 
1441 	/*
1442 	 * Traverse the graph, adding each edge to the predecessor
1443 	 * list of its sucessors.  Skip the leaves (i.e. level 0).
1444 	 */
1445 	for (i = root->level; i > 0; --i) {
1446 		for (b = levels[i]; b != 0; b = b->link) {
1447 			link_inedge(&b->et, JT(b));
1448 			link_inedge(&b->ef, JF(b));
1449 		}
1450 	}
1451 }
1452 
1453 static void
opt_root(b)1454 opt_root(b)
1455 	struct block **b;
1456 {
1457 	struct slist *tmp, *s;
1458 
1459 	s = (*b)->stmts;
1460 	(*b)->stmts = 0;
1461 	while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
1462 		*b = JT(*b);
1463 
1464 	tmp = (*b)->stmts;
1465 	if (tmp != 0)
1466 		sappend(s, tmp);
1467 	(*b)->stmts = s;
1468 }
1469 
1470 static void
opt_loop(root,do_stmts)1471 opt_loop(root, do_stmts)
1472 	struct block *root;
1473 	int do_stmts;
1474 {
1475 	int passes = 0;
1476 #ifdef BDEBUG
1477 	if (dflag > 1)
1478 		opt_dump(root);
1479 #endif
1480 	do {
1481 		done = 1;
1482 		find_levels(root);
1483 		find_dom(root);
1484 		find_closure(root);
1485 		find_inedges(root);
1486 		find_ud(root);
1487 		find_edom(root);
1488 		opt_blks(root, do_stmts);
1489 #ifdef BDEBUG
1490 		if (dflag > 1)
1491 			opt_dump(root);
1492 #endif
1493 	} while (!done);
1494 }
1495 
1496 /*
1497  * Optimize the filter code in its dag representation.
1498  */
1499 void
optimize(rootp)1500 optimize(rootp)
1501 	struct block **rootp;
1502 {
1503 	struct block *root;
1504 
1505 	root = *rootp;
1506 
1507 	opt_init(root);
1508 	opt_loop(root, 0);
1509 	opt_loop(root, 1);
1510 	intern_blocks(root);
1511 	opt_root(rootp);
1512 	opt_cleanup();
1513 }
1514 
1515 static void
make_marks(p)1516 make_marks(p)
1517 	struct block *p;
1518 {
1519 	if (!isMarked(p)) {
1520 		Mark(p);
1521 		if (BPF_CLASS(p->s.code) != BPF_RET) {
1522 			make_marks(JT(p));
1523 			make_marks(JF(p));
1524 		}
1525 	}
1526 }
1527 
1528 /*
1529  * Mark code array such that isMarked(i) is true
1530  * only for nodes that are alive.
1531  */
1532 static void
mark_code(p)1533 mark_code(p)
1534 	struct block *p;
1535 {
1536 	cur_mark += 1;
1537 	make_marks(p);
1538 }
1539 
1540 /*
1541  * True iff the two stmt lists load the same value from the packet into
1542  * the accumulator.
1543  */
1544 static int
eq_slist(x,y)1545 eq_slist(x, y)
1546 	struct slist *x, *y;
1547 {
1548 	while (1) {
1549 		while (x && x->s.code == NOP)
1550 			x = x->next;
1551 		while (y && y->s.code == NOP)
1552 			y = y->next;
1553 		if (x == 0)
1554 			return y == 0;
1555 		if (y == 0)
1556 			return x == 0;
1557 		if (x->s.code != y->s.code || x->s.k != y->s.k)
1558 			return 0;
1559 		x = x->next;
1560 		y = y->next;
1561 	}
1562 }
1563 
1564 static inline int
eq_blk(b0,b1)1565 eq_blk(b0, b1)
1566 	struct block *b0, *b1;
1567 {
1568 	if (b0->s.code == b1->s.code &&
1569 	    b0->s.k == b1->s.k &&
1570 	    b0->et.succ == b1->et.succ &&
1571 	    b0->ef.succ == b1->ef.succ)
1572 		return eq_slist(b0->stmts, b1->stmts);
1573 	return 0;
1574 }
1575 
1576 static void
intern_blocks(root)1577 intern_blocks(root)
1578 	struct block *root;
1579 {
1580 	struct block *p;
1581 	int i, j;
1582 	int done;
1583  top:
1584 	done = 1;
1585 	for (i = 0; i < n_blocks; ++i)
1586 		blocks[i]->link = 0;
1587 
1588 	mark_code(root);
1589 
1590 	for (i = n_blocks - 1; --i >= 0; ) {
1591 		if (!isMarked(blocks[i]))
1592 			continue;
1593 		for (j = i + 1; j < n_blocks; ++j) {
1594 			if (!isMarked(blocks[j]))
1595 				continue;
1596 			if (eq_blk(blocks[i], blocks[j])) {
1597 				blocks[i]->link = blocks[j]->link ?
1598 					blocks[j]->link : blocks[j];
1599 				break;
1600 			}
1601 		}
1602 	}
1603 	for (i = 0; i < n_blocks; ++i) {
1604 		p = blocks[i];
1605 		if (JT(p) == 0)
1606 			continue;
1607 		if (JT(p)->link) {
1608 			done = 0;
1609 			JT(p) = JT(p)->link;
1610 		}
1611 		if (JF(p)->link) {
1612 			done = 0;
1613 			JF(p) = JF(p)->link;
1614 		}
1615 	}
1616 	if (!done)
1617 		goto top;
1618 }
1619 
1620 static void
opt_cleanup()1621 opt_cleanup()
1622 {
1623 	free((void *)vnode_base);
1624 	free((void *)vmap);
1625 	free((void *)edges);
1626 	free((void *)space);
1627 	free((void *)levels);
1628 	free((void *)blocks);
1629 }
1630 
1631 /*
1632  * Return the number of stmts in 's'.
1633  */
1634 static int
slength(s)1635 slength(s)
1636 	struct slist *s;
1637 {
1638 	int n = 0;
1639 
1640 	for (; s; s = s->next)
1641 		if (s->s.code != NOP)
1642 			++n;
1643 	return n;
1644 }
1645 
1646 /*
1647  * Return the number of nodes reachable by 'p'.
1648  * All nodes should be initially unmarked.
1649  */
1650 static int
count_blocks(p)1651 count_blocks(p)
1652 	struct block *p;
1653 {
1654 	if (p == 0 || isMarked(p))
1655 		return 0;
1656 	Mark(p);
1657 	return count_blocks(JT(p)) + count_blocks(JF(p)) + 1;
1658 }
1659 
1660 /*
1661  * Do a depth first search on the flow graph, numbering the
1662  * the basic blocks, and entering them into the 'blocks' array.`
1663  */
1664 static void
number_blks_r(p)1665 number_blks_r(p)
1666 	struct block *p;
1667 {
1668 	int n;
1669 
1670 	if (p == 0 || isMarked(p))
1671 		return;
1672 
1673 	Mark(p);
1674 	n = n_blocks++;
1675 	p->id = n;
1676 	blocks[n] = p;
1677 
1678 	number_blks_r(JT(p));
1679 	number_blks_r(JF(p));
1680 }
1681 
1682 /*
1683  * Return the number of stmts in the flowgraph reachable by 'p'.
1684  * The nodes should be unmarked before calling.
1685  */
1686 static int
count_stmts(p)1687 count_stmts(p)
1688 	struct block *p;
1689 {
1690 	int n;
1691 
1692 	if (p == 0 || isMarked(p))
1693 		return 0;
1694 	Mark(p);
1695 	n = count_stmts(JT(p)) + count_stmts(JF(p));
1696 	return slength(p->stmts) + n + 1;
1697 }
1698 
1699 /*
1700  * Allocate memory.  All allocation is done before optimization
1701  * is begun.  A linear bound on the size of all data structures is computed
1702  * from the total number of blocks and/or statements.
1703  */
1704 static void
opt_init(root)1705 opt_init(root)
1706 	struct block *root;
1707 {
1708 	u_long *p;
1709 	int i, n, max_stmts;
1710 
1711 	/*
1712 	 * First, count the blocks, so we can malloc an array to map
1713 	 * block number to block.  Then, put the blocks into the array.
1714 	 */
1715 	unMarkAll();
1716 	n = count_blocks(root);
1717 	blocks = (struct block **)malloc(n * sizeof(*blocks));
1718 	unMarkAll();
1719 	n_blocks = 0;
1720 	number_blks_r(root);
1721 
1722 	n_edges = 2 * n_blocks;
1723 	edges = (struct edge **)malloc(n_edges * sizeof(*edges));
1724 
1725 	/*
1726 	 * The number of levels is bounded by the number of nodes.
1727 	 */
1728 	levels = (struct block **)malloc(n_blocks * sizeof(*levels));
1729 
1730 	edgewords = n_edges / (8 * sizeof(u_long)) + 1;
1731 	nodewords = n_blocks / (8 * sizeof(u_long)) + 1;
1732 
1733 	/* XXX */
1734 	space = (u_long *)malloc(2 * n_blocks * nodewords * sizeof(*space)
1735 				 + n_edges * edgewords * sizeof(*space));
1736 	p = space;
1737 	all_dom_sets = p;
1738 	for (i = 0; i < n; ++i) {
1739 		blocks[i]->dom = p;
1740 		p += nodewords;
1741 	}
1742 	all_closure_sets = p;
1743 	for (i = 0; i < n; ++i) {
1744 		blocks[i]->closure = p;
1745 		p += nodewords;
1746 	}
1747 	all_edge_sets = p;
1748 	for (i = 0; i < n; ++i) {
1749 		register struct block *b = blocks[i];
1750 
1751 		b->et.edom = p;
1752 		p += edgewords;
1753 		b->ef.edom = p;
1754 		p += edgewords;
1755 		b->et.id = i;
1756 		edges[i] = &b->et;
1757 		b->ef.id = n_blocks + i;
1758 		edges[n_blocks + i] = &b->ef;
1759 		b->et.pred = b;
1760 		b->ef.pred = b;
1761 	}
1762 	max_stmts = 0;
1763 	for (i = 0; i < n; ++i)
1764 		max_stmts += slength(blocks[i]->stmts) + 1;
1765 	/*
1766 	 * We allocate at most 3 value numbers per statement,
1767 	 * so this is an upper bound on the number of valnodes
1768 	 * we'll need.
1769 	 */
1770 	maxval = 3 * max_stmts;
1771 	vmap = (struct vmapinfo *)malloc(maxval * sizeof(*vmap));
1772 	vnode_base = (struct valnode *)malloc(maxval * sizeof(*vmap));
1773 }
1774 
1775 /*
1776  * Some pointers used to convert the basic block form of the code,
1777  * into the array form that BPF requires.  'fstart' will point to
1778  * the malloc'd array while 'ftail' is used during the recursive traversal.
1779  */
1780 static struct bpf_insn *fstart;
1781 static struct bpf_insn *ftail;
1782 
1783 #ifdef BDEBUG
1784 int bids[1000];
1785 #endif
1786 
1787 static void
convert_code_r(p)1788 convert_code_r(p)
1789 	struct block *p;
1790 {
1791 	struct bpf_insn *dst;
1792 	struct slist *src;
1793 	int slen;
1794 	u_int off;
1795 
1796 	if (p == 0 || isMarked(p))
1797 		return;
1798 	Mark(p);
1799 
1800 	convert_code_r(JF(p));
1801 	convert_code_r(JT(p));
1802 
1803 	slen = slength(p->stmts);
1804 	dst = ftail -= slen + 1;
1805 
1806 	p->offset = dst - fstart;
1807 
1808 	for (src = p->stmts; src; src = src->next) {
1809 		if (src->s.code == NOP)
1810 			continue;
1811 		dst->code = (u_short)src->s.code;
1812 		dst->k = src->s.k;
1813 		++dst;
1814 	}
1815 #ifdef BDEBUG
1816 	bids[dst - fstart] = p->id + 1;
1817 #endif
1818 	dst->code = (u_short)p->s.code;
1819 	dst->k = p->s.k;
1820 	if (JT(p)) {
1821 		off = JT(p)->offset - (p->offset + slen) - 1;
1822 		if (off >= 256)
1823 			error("long jumps not supported");
1824 		dst->jt = off;
1825 		off = JF(p)->offset - (p->offset + slen) - 1;
1826 		if (off >= 256)
1827 			error("long jumps not supported");
1828 		dst->jf = off;
1829 	}
1830 }
1831 
1832 
1833 /*
1834  * Convert flowgraph intermediate representation to the
1835  * BPF array representation.  Set *lenp to the number of instructions.
1836  */
1837 struct bpf_insn *
icode_to_fcode(root,lenp)1838 icode_to_fcode(root, lenp)
1839 	struct block *root;
1840 	int *lenp;
1841 {
1842 	int n;
1843 	struct bpf_insn *fp;
1844 
1845 	unMarkAll();
1846 	n = *lenp = count_stmts(root);
1847 
1848 	fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
1849 	bzero((char *)fp, sizeof(*fp) * n);
1850 	fstart = fp;
1851 	ftail = fp + n;
1852 
1853 	unMarkAll();
1854 	convert_code_r(root);
1855 
1856 	return fp;
1857 }
1858 
1859 #ifdef BDEBUG
1860 opt_dump(root)
1861 	struct block *root;
1862 {
1863 	struct bpf_program f;
1864 
1865 	bzero(bids, sizeof bids);
1866 	f.bf_insns = icode_to_fcode(root, &f.bf_len);
1867 	bpf_dump(&f, 1);
1868 	putchar('\n');
1869 	free((char *)f.bf_insns);
1870 }
1871 #endif
1872