xref: /freebsd/contrib/libpcap/optimize.c (revision dd744a89)
18cf6c252SPaul Traina /*
28cf6c252SPaul Traina  * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
38cf6c252SPaul Traina  *	The Regents of the University of California.  All rights reserved.
48cf6c252SPaul Traina  *
58cf6c252SPaul Traina  * Redistribution and use in source and binary forms, with or without
68cf6c252SPaul Traina  * modification, are permitted provided that: (1) source code distributions
78cf6c252SPaul Traina  * retain the above copyright notice and this paragraph in its entirety, (2)
88cf6c252SPaul Traina  * distributions including binary code include the above copyright notice and
98cf6c252SPaul Traina  * this paragraph in its entirety in the documentation or other materials
108cf6c252SPaul Traina  * provided with the distribution, and (3) all advertising materials mentioning
118cf6c252SPaul Traina  * features or use of this software display the following acknowledgement:
128cf6c252SPaul Traina  * ``This product includes software developed by the University of California,
138cf6c252SPaul Traina  * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
148cf6c252SPaul Traina  * the University nor the names of its contributors may be used to endorse
158cf6c252SPaul Traina  * or promote products derived from this software without specific prior
168cf6c252SPaul Traina  * written permission.
178cf6c252SPaul Traina  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
188cf6c252SPaul Traina  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
198cf6c252SPaul Traina  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
208cf6c252SPaul Traina  *
21b00ab754SHans Petter Selasky  *  Optimization module for BPF code intermediate representation.
228cf6c252SPaul Traina  */
23dc2c7305SBill Fenner 
24dc2c7305SBill Fenner #ifdef HAVE_CONFIG_H
25b00ab754SHans Petter Selasky #include <config.h>
268cf6c252SPaul Traina #endif
278cf6c252SPaul Traina 
28b00ab754SHans Petter Selasky #include <pcap-types.h>
29a0ee43a1SRui Paulo 
308cf6c252SPaul Traina #include <stdio.h>
318cf6c252SPaul Traina #include <stdlib.h>
328cf6c252SPaul Traina #include <memory.h>
3357e22627SCy Schubert #include <setjmp.h>
3404fb2745SSam Leffler #include <string.h>
356f9cba8fSJoseph Mingrone #include <limits.h> /* for SIZE_MAX */
36dc2c7305SBill Fenner #include <errno.h>
37dc2c7305SBill Fenner 
388cf6c252SPaul Traina #include "pcap-int.h"
398cf6c252SPaul Traina 
408cf6c252SPaul Traina #include "gencode.h"
41b00ab754SHans Petter Selasky #include "optimize.h"
426f9cba8fSJoseph Mingrone #include "diag-control.h"
438cf6c252SPaul Traina 
448cf6c252SPaul Traina #ifdef HAVE_OS_PROTO_H
458cf6c252SPaul Traina #include "os-proto.h"
468cf6c252SPaul Traina #endif
478cf6c252SPaul Traina 
488cf6c252SPaul Traina #ifdef BDEBUG
49ada6f083SXin LI /*
50b00ab754SHans Petter Selasky  * The internal "debug printout" flag for the filter expression optimizer.
51b00ab754SHans Petter Selasky  * The code to print that stuff is present only if BDEBUG is defined, so
52b00ab754SHans Petter Selasky  * the flag, and the routine to set it, are defined only if BDEBUG is
53b00ab754SHans Petter Selasky  * defined.
54ada6f083SXin LI  */
55b00ab754SHans Petter Selasky static int pcap_optimizer_debug;
56b00ab754SHans Petter Selasky 
57ada6f083SXin LI /*
58b00ab754SHans Petter Selasky  * Routine to set that flag.
59ada6f083SXin LI  *
60b00ab754SHans Petter Selasky  * This is intended for libpcap developers, not for general use.
61b00ab754SHans Petter Selasky  * If you want to set these in a program, you'll have to declare this
62b00ab754SHans Petter Selasky  * routine yourself, with the appropriate DLL import attribute on Windows;
63b00ab754SHans Petter Selasky  * it's not declared in any header file, and won't be declared in any
64b00ab754SHans Petter Selasky  * header file provided by libpcap.
65b00ab754SHans Petter Selasky  */
66b00ab754SHans Petter Selasky PCAP_API void pcap_set_optimizer_debug(int value);
67b00ab754SHans Petter Selasky 
68b00ab754SHans Petter Selasky PCAP_API_DEF void
pcap_set_optimizer_debug(int value)69b00ab754SHans Petter Selasky pcap_set_optimizer_debug(int value)
70b00ab754SHans Petter Selasky {
71b00ab754SHans Petter Selasky 	pcap_optimizer_debug = value;
72b00ab754SHans Petter Selasky }
73b00ab754SHans Petter Selasky 
74b00ab754SHans Petter Selasky /*
75b00ab754SHans Petter Selasky  * The internal "print dot graph" flag for the filter expression optimizer.
76b00ab754SHans Petter Selasky  * The code to print that stuff is present only if BDEBUG is defined, so
77b00ab754SHans Petter Selasky  * the flag, and the routine to set it, are defined only if BDEBUG is
78b00ab754SHans Petter Selasky  * defined.
79b00ab754SHans Petter Selasky  */
80b00ab754SHans Petter Selasky static int pcap_print_dot_graph;
81b00ab754SHans Petter Selasky 
82b00ab754SHans Petter Selasky /*
83b00ab754SHans Petter Selasky  * Routine to set that flag.
84b00ab754SHans Petter Selasky  *
85b00ab754SHans Petter Selasky  * This is intended for libpcap developers, not for general use.
86b00ab754SHans Petter Selasky  * If you want to set these in a program, you'll have to declare this
87b00ab754SHans Petter Selasky  * routine yourself, with the appropriate DLL import attribute on Windows;
88b00ab754SHans Petter Selasky  * it's not declared in any header file, and won't be declared in any
89b00ab754SHans Petter Selasky  * header file provided by libpcap.
90b00ab754SHans Petter Selasky  */
91b00ab754SHans Petter Selasky PCAP_API void pcap_set_print_dot_graph(int value);
92b00ab754SHans Petter Selasky 
93b00ab754SHans Petter Selasky PCAP_API_DEF void
pcap_set_print_dot_graph(int value)94b00ab754SHans Petter Selasky pcap_set_print_dot_graph(int value)
95b00ab754SHans Petter Selasky {
96b00ab754SHans Petter Selasky 	pcap_print_dot_graph = value;
97b00ab754SHans Petter Selasky }
98b00ab754SHans Petter Selasky 
99b00ab754SHans Petter Selasky #endif
100b00ab754SHans Petter Selasky 
101b00ab754SHans Petter Selasky /*
102b00ab754SHans Petter Selasky  * lowest_set_bit().
103b00ab754SHans Petter Selasky  *
104b00ab754SHans Petter Selasky  * Takes a 32-bit integer as an argument.
105b00ab754SHans Petter Selasky  *
106b00ab754SHans Petter Selasky  * If handed a non-zero value, returns the index of the lowest set bit,
1076f9cba8fSJoseph Mingrone  * counting upwards from zero.
108b00ab754SHans Petter Selasky  *
109b00ab754SHans Petter Selasky  * If handed zero, the results are platform- and compiler-dependent.
110b00ab754SHans Petter Selasky  * Keep it out of the light, don't give it any water, don't feed it
111b00ab754SHans Petter Selasky  * after midnight, and don't pass zero to it.
112b00ab754SHans Petter Selasky  *
113b00ab754SHans Petter Selasky  * This is the same as the count of trailing zeroes in the word.
114b00ab754SHans Petter Selasky  */
115b00ab754SHans Petter Selasky #if PCAP_IS_AT_LEAST_GNUC_VERSION(3,4)
116b00ab754SHans Petter Selasky   /*
117b00ab754SHans Petter Selasky    * GCC 3.4 and later; we have __builtin_ctz().
118b00ab754SHans Petter Selasky    */
1196f9cba8fSJoseph Mingrone   #define lowest_set_bit(mask) ((u_int)__builtin_ctz(mask))
120b00ab754SHans Petter Selasky #elif defined(_MSC_VER)
121b00ab754SHans Petter Selasky   /*
122b00ab754SHans Petter Selasky    * Visual Studio; we support only 2005 and later, so use
123b00ab754SHans Petter Selasky    * _BitScanForward().
124b00ab754SHans Petter Selasky    */
125b00ab754SHans Petter Selasky #include <intrin.h>
126b00ab754SHans Petter Selasky 
127b00ab754SHans Petter Selasky #ifndef __clang__
128b00ab754SHans Petter Selasky #pragma intrinsic(_BitScanForward)
129b00ab754SHans Petter Selasky #endif
130b00ab754SHans Petter Selasky 
1316f9cba8fSJoseph Mingrone static __forceinline u_int
lowest_set_bit(int mask)132b00ab754SHans Petter Selasky lowest_set_bit(int mask)
133b00ab754SHans Petter Selasky {
134b00ab754SHans Petter Selasky 	unsigned long bit;
135b00ab754SHans Petter Selasky 
136b00ab754SHans Petter Selasky 	/*
137b00ab754SHans Petter Selasky 	 * Don't sign-extend mask if long is longer than int.
138b00ab754SHans Petter Selasky 	 * (It's currently not, in MSVC, even on 64-bit platforms, but....)
139b00ab754SHans Petter Selasky 	 */
140b00ab754SHans Petter Selasky 	if (_BitScanForward(&bit, (unsigned int)mask) == 0)
1416f9cba8fSJoseph Mingrone 		abort();	/* mask is zero */
1426f9cba8fSJoseph Mingrone 	return (u_int)bit;
143b00ab754SHans Petter Selasky }
144b00ab754SHans Petter Selasky #elif defined(MSDOS) && defined(__DJGPP__)
145b00ab754SHans Petter Selasky   /*
146b00ab754SHans Petter Selasky    * MS-DOS with DJGPP, which declares ffs() in <string.h>, which
147b00ab754SHans Petter Selasky    * we've already included.
148b00ab754SHans Petter Selasky    */
1496f9cba8fSJoseph Mingrone   #define lowest_set_bit(mask)	((u_int)(ffs((mask)) - 1))
150b00ab754SHans Petter Selasky #elif (defined(MSDOS) && defined(__WATCOMC__)) || defined(STRINGS_H_DECLARES_FFS)
151b00ab754SHans Petter Selasky   /*
152b00ab754SHans Petter Selasky    * MS-DOS with Watcom C, which has <strings.h> and declares ffs() there,
153b00ab754SHans Petter Selasky    * or some other platform (UN*X conforming to a sufficient recent version
154b00ab754SHans Petter Selasky    * of the Single UNIX Specification).
155b00ab754SHans Petter Selasky    */
156b00ab754SHans Petter Selasky   #include <strings.h>
1576f9cba8fSJoseph Mingrone   #define lowest_set_bit(mask)	(u_int)((ffs((mask)) - 1))
158b00ab754SHans Petter Selasky #else
159b00ab754SHans Petter Selasky /*
160b00ab754SHans Petter Selasky  * None of the above.
161b00ab754SHans Petter Selasky  * Use a perfect-hash-function-based function.
162ada6f083SXin LI  */
1636f9cba8fSJoseph Mingrone static u_int
lowest_set_bit(int mask)164b00ab754SHans Petter Selasky lowest_set_bit(int mask)
165ada6f083SXin LI {
166b00ab754SHans Petter Selasky 	unsigned int v = (unsigned int)mask;
167ada6f083SXin LI 
1686f9cba8fSJoseph Mingrone 	static const u_int MultiplyDeBruijnBitPosition[32] = {
169b00ab754SHans Petter Selasky 		0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
170b00ab754SHans Petter Selasky 		31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
171b00ab754SHans Petter Selasky 	};
172b00ab754SHans Petter Selasky 
173b00ab754SHans Petter Selasky 	/*
174b00ab754SHans Petter Selasky 	 * We strip off all but the lowermost set bit (v & ~v),
175b00ab754SHans Petter Selasky 	 * and perform a minimal perfect hash on it to look up the
176b00ab754SHans Petter Selasky 	 * number of low-order zero bits in a table.
177b00ab754SHans Petter Selasky 	 *
178b00ab754SHans Petter Selasky 	 * See:
179b00ab754SHans Petter Selasky 	 *
180b00ab754SHans Petter Selasky 	 *	http://7ooo.mooo.com/text/ComputingTrailingZerosHOWTO.pdf
181b00ab754SHans Petter Selasky 	 *
182b00ab754SHans Petter Selasky 	 *	http://supertech.csail.mit.edu/papers/debruijn.pdf
183b00ab754SHans Petter Selasky 	 */
184b00ab754SHans Petter Selasky 	return (MultiplyDeBruijnBitPosition[((v & -v) * 0x077CB531U) >> 27]);
185ada6f083SXin LI }
186a8e07101SRui Paulo #endif
187a8e07101SRui Paulo 
18804fb2745SSam Leffler /*
18904fb2745SSam Leffler  * Represents a deleted instruction.
19004fb2745SSam Leffler  */
19104fb2745SSam Leffler #define NOP -1
19204fb2745SSam Leffler 
19304fb2745SSam Leffler /*
19404fb2745SSam Leffler  * Register numbers for use-def values.
19504fb2745SSam Leffler  * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory
19604fb2745SSam Leffler  * location.  A_ATOM is the accumulator and X_ATOM is the index
19704fb2745SSam Leffler  * register.
19804fb2745SSam Leffler  */
1998cf6c252SPaul Traina #define A_ATOM BPF_MEMWORDS
2008cf6c252SPaul Traina #define X_ATOM (BPF_MEMWORDS+1)
2018cf6c252SPaul Traina 
2028cf6c252SPaul Traina /*
2038cf6c252SPaul Traina  * This define is used to represent *both* the accumulator and
2048cf6c252SPaul Traina  * x register in use-def computations.
2058cf6c252SPaul Traina  * Currently, the use-def code assumes only one definition per instruction.
2068cf6c252SPaul Traina  */
2078cf6c252SPaul Traina #define AX_ATOM N_ATOMS
2088cf6c252SPaul Traina 
2098cf6c252SPaul Traina /*
210ada6f083SXin LI  * These data structures are used in a Cocke and Shwarz style
211ada6f083SXin LI  * value numbering scheme.  Since the flowgraph is acyclic,
212ada6f083SXin LI  * exit values can be propagated from a node's predecessors
213ada6f083SXin LI  * provided it is uniquely defined.
214ada6f083SXin LI  */
215ada6f083SXin LI struct valnode {
216ada6f083SXin LI 	int code;
2176f9cba8fSJoseph Mingrone 	bpf_u_int32 v0, v1;
2186f9cba8fSJoseph Mingrone 	int val;		/* the value number */
219ada6f083SXin LI 	struct valnode *next;
220ada6f083SXin LI };
221ada6f083SXin LI 
222ada6f083SXin LI /* Integer constants mapped with the load immediate opcode. */
2236f9cba8fSJoseph Mingrone #define K(i) F(opt_state, BPF_LD|BPF_IMM|BPF_W, i, 0U)
224ada6f083SXin LI 
225ada6f083SXin LI struct vmapinfo {
226ada6f083SXin LI 	int is_const;
2276f9cba8fSJoseph Mingrone 	bpf_u_int32 const_val;
228ada6f083SXin LI };
229ada6f083SXin LI 
230b00ab754SHans Petter Selasky typedef struct {
231ada6f083SXin LI 	/*
23257e22627SCy Schubert 	 * Place to longjmp to on an error.
23357e22627SCy Schubert 	 */
23457e22627SCy Schubert 	jmp_buf top_ctx;
23557e22627SCy Schubert 
23657e22627SCy Schubert 	/*
23757e22627SCy Schubert 	 * The buffer into which to put error message.
23857e22627SCy Schubert 	 */
23957e22627SCy Schubert 	char *errbuf;
24057e22627SCy Schubert 
24157e22627SCy Schubert 	/*
2428cf6c252SPaul Traina 	 * A flag to indicate that further optimization is needed.
2438cf6c252SPaul Traina 	 * Iterative passes are continued until a given pass yields no
2446f9cba8fSJoseph Mingrone 	 * code simplification or branch movement.
2458cf6c252SPaul Traina 	 */
246ada6f083SXin LI 	int done;
2478cf6c252SPaul Traina 
2486f9cba8fSJoseph Mingrone 	/*
2496f9cba8fSJoseph Mingrone 	 * XXX - detect loops that do nothing but repeated AND/OR pullups
2506f9cba8fSJoseph Mingrone 	 * and edge moves.
2516f9cba8fSJoseph Mingrone 	 * If 100 passes in a row do nothing but that, treat that as a
2526f9cba8fSJoseph Mingrone 	 * sign that we're in a loop that just shuffles in a cycle in
2536f9cba8fSJoseph Mingrone 	 * which each pass just shuffles the code and we eventually
2546f9cba8fSJoseph Mingrone 	 * get back to the original configuration.
2556f9cba8fSJoseph Mingrone 	 *
2566f9cba8fSJoseph Mingrone 	 * XXX - we need a non-heuristic way of detecting, or preventing,
2576f9cba8fSJoseph Mingrone 	 * such a cycle.
2586f9cba8fSJoseph Mingrone 	 */
2596f9cba8fSJoseph Mingrone 	int non_branch_movement_performed;
2606f9cba8fSJoseph Mingrone 
2616f9cba8fSJoseph Mingrone 	u_int n_blocks;		/* number of blocks in the CFG; guaranteed to be > 0, as it's a RET instruction at a minimum */
2628cf6c252SPaul Traina 	struct block **blocks;
2636f9cba8fSJoseph Mingrone 	u_int n_edges;		/* twice n_blocks, so guaranteed to be > 0 */
2648cf6c252SPaul Traina 	struct edge **edges;
2658cf6c252SPaul Traina 
2668cf6c252SPaul Traina 	/*
2678cf6c252SPaul Traina 	 * A bit vector set representation of the dominators.
2688cf6c252SPaul Traina 	 * We round up the set size to the next power of two.
2698cf6c252SPaul Traina 	 */
2706f9cba8fSJoseph Mingrone 	u_int nodewords;	/* number of 32-bit words for a bit vector of "number of nodes" bits; guaranteed to be > 0 */
2716f9cba8fSJoseph Mingrone 	u_int edgewords;	/* number of 32-bit words for a bit vector of "number of edges" bits; guaranteed to be > 0 */
2728cf6c252SPaul Traina 	struct block **levels;
2738cf6c252SPaul Traina 	bpf_u_int32 *space;
274ada6f083SXin LI 
2758cf6c252SPaul Traina #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
2768cf6c252SPaul Traina /*
2778cf6c252SPaul Traina  * True if a is in uset {p}
2788cf6c252SPaul Traina  */
2798cf6c252SPaul Traina #define SET_MEMBER(p, a) \
28057e22627SCy Schubert ((p)[(unsigned)(a) / BITS_PER_WORD] & ((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD)))
2818cf6c252SPaul Traina 
2828cf6c252SPaul Traina /*
2838cf6c252SPaul Traina  * Add 'a' to uset p.
2848cf6c252SPaul Traina  */
2858cf6c252SPaul Traina #define SET_INSERT(p, a) \
28657e22627SCy Schubert (p)[(unsigned)(a) / BITS_PER_WORD] |= ((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD))
2878cf6c252SPaul Traina 
2888cf6c252SPaul Traina /*
2898cf6c252SPaul Traina  * Delete 'a' from uset p.
2908cf6c252SPaul Traina  */
2918cf6c252SPaul Traina #define SET_DELETE(p, a) \
29257e22627SCy Schubert (p)[(unsigned)(a) / BITS_PER_WORD] &= ~((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD))
2938cf6c252SPaul Traina 
2948cf6c252SPaul Traina /*
2958cf6c252SPaul Traina  * a := a intersect b
2966f9cba8fSJoseph Mingrone  * n must be guaranteed to be > 0
2978cf6c252SPaul Traina  */
2988cf6c252SPaul Traina #define SET_INTERSECT(a, b, n)\
2998cf6c252SPaul Traina {\
3008cf6c252SPaul Traina 	register bpf_u_int32 *_x = a, *_y = b;\
3016f9cba8fSJoseph Mingrone 	register u_int _n = n;\
3026f9cba8fSJoseph Mingrone 	do *_x++ &= *_y++; while (--_n != 0);\
3038cf6c252SPaul Traina }
3048cf6c252SPaul Traina 
3058cf6c252SPaul Traina /*
3068cf6c252SPaul Traina  * a := a - b
3076f9cba8fSJoseph Mingrone  * n must be guaranteed to be > 0
3088cf6c252SPaul Traina  */
3098cf6c252SPaul Traina #define SET_SUBTRACT(a, b, n)\
3108cf6c252SPaul Traina {\
3118cf6c252SPaul Traina 	register bpf_u_int32 *_x = a, *_y = b;\
3126f9cba8fSJoseph Mingrone 	register u_int _n = n;\
3136f9cba8fSJoseph Mingrone 	do *_x++ &=~ *_y++; while (--_n != 0);\
3148cf6c252SPaul Traina }
3158cf6c252SPaul Traina 
3168cf6c252SPaul Traina /*
3178cf6c252SPaul Traina  * a := a union b
3186f9cba8fSJoseph Mingrone  * n must be guaranteed to be > 0
3198cf6c252SPaul Traina  */
3208cf6c252SPaul Traina #define SET_UNION(a, b, n)\
3218cf6c252SPaul Traina {\
3228cf6c252SPaul Traina 	register bpf_u_int32 *_x = a, *_y = b;\
3236f9cba8fSJoseph Mingrone 	register u_int _n = n;\
3246f9cba8fSJoseph Mingrone 	do *_x++ |= *_y++; while (--_n != 0);\
3258cf6c252SPaul Traina }
3268cf6c252SPaul Traina 
327ada6f083SXin LI 	uset all_dom_sets;
328ada6f083SXin LI 	uset all_closure_sets;
329ada6f083SXin LI 	uset all_edge_sets;
330ada6f083SXin LI 
331ada6f083SXin LI #define MODULUS 213
332ada6f083SXin LI 	struct valnode *hashtbl[MODULUS];
3336f9cba8fSJoseph Mingrone 	bpf_u_int32 curval;
3346f9cba8fSJoseph Mingrone 	bpf_u_int32 maxval;
335ada6f083SXin LI 
336ada6f083SXin LI 	struct vmapinfo *vmap;
337ada6f083SXin LI 	struct valnode *vnode_base;
338ada6f083SXin LI 	struct valnode *next_vnode;
339b00ab754SHans Petter Selasky } opt_state_t;
340ada6f083SXin LI 
341ada6f083SXin LI typedef struct {
342ada6f083SXin LI 	/*
34357e22627SCy Schubert 	 * Place to longjmp to on an error.
34457e22627SCy Schubert 	 */
34557e22627SCy Schubert 	jmp_buf top_ctx;
34657e22627SCy Schubert 
34757e22627SCy Schubert 	/*
34857e22627SCy Schubert 	 * The buffer into which to put error message.
34957e22627SCy Schubert 	 */
35057e22627SCy Schubert 	char *errbuf;
35157e22627SCy Schubert 
35257e22627SCy Schubert 	/*
353ada6f083SXin LI 	 * Some pointers used to convert the basic block form of the code,
354ada6f083SXin LI 	 * into the array form that BPF requires.  'fstart' will point to
355ada6f083SXin LI 	 * the malloc'd array while 'ftail' is used during the recursive
356ada6f083SXin LI 	 * traversal.
357ada6f083SXin LI 	 */
358ada6f083SXin LI 	struct bpf_insn *fstart;
359ada6f083SXin LI 	struct bpf_insn *ftail;
360ada6f083SXin LI } conv_state_t;
361ada6f083SXin LI 
36257e22627SCy Schubert static void opt_init(opt_state_t *, struct icode *);
363ada6f083SXin LI static void opt_cleanup(opt_state_t *);
36457e22627SCy Schubert static void PCAP_NORETURN opt_error(opt_state_t *, const char *, ...)
36557e22627SCy Schubert     PCAP_PRINTFLIKE(2, 3);
366ada6f083SXin LI 
367ada6f083SXin LI static void intern_blocks(opt_state_t *, struct icode *);
368ada6f083SXin LI 
369ada6f083SXin LI static void find_inedges(opt_state_t *, struct block *);
370ada6f083SXin LI #ifdef BDEBUG
37157e22627SCy Schubert static void opt_dump(opt_state_t *, struct icode *);
372ada6f083SXin LI #endif
3738cf6c252SPaul Traina 
3748cf6c252SPaul Traina #ifndef MAX
3758cf6c252SPaul Traina #define MAX(a,b) ((a)>(b)?(a):(b))
3768cf6c252SPaul Traina #endif
3778cf6c252SPaul Traina 
3788cf6c252SPaul Traina static void
find_levels_r(opt_state_t * opt_state,struct icode * ic,struct block * b)379ada6f083SXin LI find_levels_r(opt_state_t *opt_state, struct icode *ic, struct block *b)
3808cf6c252SPaul Traina {
3818cf6c252SPaul Traina 	int level;
3828cf6c252SPaul Traina 
383ada6f083SXin LI 	if (isMarked(ic, b))
3848cf6c252SPaul Traina 		return;
3858cf6c252SPaul Traina 
386ada6f083SXin LI 	Mark(ic, b);
3878cf6c252SPaul Traina 	b->link = 0;
3888cf6c252SPaul Traina 
3898cf6c252SPaul Traina 	if (JT(b)) {
390ada6f083SXin LI 		find_levels_r(opt_state, ic, JT(b));
391ada6f083SXin LI 		find_levels_r(opt_state, ic, JF(b));
3928cf6c252SPaul Traina 		level = MAX(JT(b)->level, JF(b)->level) + 1;
3938cf6c252SPaul Traina 	} else
3948cf6c252SPaul Traina 		level = 0;
3958cf6c252SPaul Traina 	b->level = level;
396ada6f083SXin LI 	b->link = opt_state->levels[level];
397ada6f083SXin LI 	opt_state->levels[level] = b;
3988cf6c252SPaul Traina }
3998cf6c252SPaul Traina 
4008cf6c252SPaul Traina /*
4018cf6c252SPaul Traina  * Level graph.  The levels go from 0 at the leaves to
402ada6f083SXin LI  * N_LEVELS at the root.  The opt_state->levels[] array points to the
4038cf6c252SPaul Traina  * first node of the level list, whose elements are linked
4048cf6c252SPaul Traina  * with the 'link' field of the struct block.
4058cf6c252SPaul Traina  */
4068cf6c252SPaul Traina static void
find_levels(opt_state_t * opt_state,struct icode * ic)407ada6f083SXin LI find_levels(opt_state_t *opt_state, struct icode *ic)
4088cf6c252SPaul Traina {
409ada6f083SXin LI 	memset((char *)opt_state->levels, 0, opt_state->n_blocks * sizeof(*opt_state->levels));
410ada6f083SXin LI 	unMarkAll(ic);
411ada6f083SXin LI 	find_levels_r(opt_state, ic, ic->root);
4128cf6c252SPaul Traina }
4138cf6c252SPaul Traina 
4148cf6c252SPaul Traina /*
4158cf6c252SPaul Traina  * Find dominator relationships.
4168cf6c252SPaul Traina  * Assumes graph has been leveled.
4178cf6c252SPaul Traina  */
4188cf6c252SPaul Traina static void
find_dom(opt_state_t * opt_state,struct block * root)419ada6f083SXin LI find_dom(opt_state_t *opt_state, struct block *root)
4208cf6c252SPaul Traina {
4216f9cba8fSJoseph Mingrone 	u_int i;
4226f9cba8fSJoseph Mingrone 	int level;
4238cf6c252SPaul Traina 	struct block *b;
4248cf6c252SPaul Traina 	bpf_u_int32 *x;
4258cf6c252SPaul Traina 
4268cf6c252SPaul Traina 	/*
4278cf6c252SPaul Traina 	 * Initialize sets to contain all nodes.
4288cf6c252SPaul Traina 	 */
429ada6f083SXin LI 	x = opt_state->all_dom_sets;
4306f9cba8fSJoseph Mingrone 	/*
4316f9cba8fSJoseph Mingrone 	 * In opt_init(), we've made sure the product doesn't overflow.
4326f9cba8fSJoseph Mingrone 	 */
433ada6f083SXin LI 	i = opt_state->n_blocks * opt_state->nodewords;
4346f9cba8fSJoseph Mingrone 	while (i != 0) {
4356f9cba8fSJoseph Mingrone 		--i;
436b00ab754SHans Petter Selasky 		*x++ = 0xFFFFFFFFU;
4376f9cba8fSJoseph Mingrone 	}
4388cf6c252SPaul Traina 	/* Root starts off empty. */
4396f9cba8fSJoseph Mingrone 	for (i = opt_state->nodewords; i != 0;) {
4406f9cba8fSJoseph Mingrone 		--i;
4418cf6c252SPaul Traina 		root->dom[i] = 0;
4426f9cba8fSJoseph Mingrone 	}
4438cf6c252SPaul Traina 
4448cf6c252SPaul Traina 	/* root->level is the highest level no found. */
4456f9cba8fSJoseph Mingrone 	for (level = root->level; level >= 0; --level) {
4466f9cba8fSJoseph Mingrone 		for (b = opt_state->levels[level]; b; b = b->link) {
4478cf6c252SPaul Traina 			SET_INSERT(b->dom, b->id);
4488cf6c252SPaul Traina 			if (JT(b) == 0)
4498cf6c252SPaul Traina 				continue;
450ada6f083SXin LI 			SET_INTERSECT(JT(b)->dom, b->dom, opt_state->nodewords);
451ada6f083SXin LI 			SET_INTERSECT(JF(b)->dom, b->dom, opt_state->nodewords);
4528cf6c252SPaul Traina 		}
4538cf6c252SPaul Traina 	}
4548cf6c252SPaul Traina }
4558cf6c252SPaul Traina 
4568cf6c252SPaul Traina static void
propedom(opt_state_t * opt_state,struct edge * ep)457ada6f083SXin LI propedom(opt_state_t *opt_state, struct edge *ep)
4588cf6c252SPaul Traina {
4598cf6c252SPaul Traina 	SET_INSERT(ep->edom, ep->id);
4608cf6c252SPaul Traina 	if (ep->succ) {
461ada6f083SXin LI 		SET_INTERSECT(ep->succ->et.edom, ep->edom, opt_state->edgewords);
462ada6f083SXin LI 		SET_INTERSECT(ep->succ->ef.edom, ep->edom, opt_state->edgewords);
4638cf6c252SPaul Traina 	}
4648cf6c252SPaul Traina }
4658cf6c252SPaul Traina 
4668cf6c252SPaul Traina /*
4678cf6c252SPaul Traina  * Compute edge dominators.
4688cf6c252SPaul Traina  * Assumes graph has been leveled and predecessors established.
4698cf6c252SPaul Traina  */
4708cf6c252SPaul Traina static void
find_edom(opt_state_t * opt_state,struct block * root)471ada6f083SXin LI find_edom(opt_state_t *opt_state, struct block *root)
4728cf6c252SPaul Traina {
4736f9cba8fSJoseph Mingrone 	u_int i;
4748cf6c252SPaul Traina 	uset x;
4756f9cba8fSJoseph Mingrone 	int level;
4768cf6c252SPaul Traina 	struct block *b;
4778cf6c252SPaul Traina 
478ada6f083SXin LI 	x = opt_state->all_edge_sets;
4796f9cba8fSJoseph Mingrone 	/*
4806f9cba8fSJoseph Mingrone 	 * In opt_init(), we've made sure the product doesn't overflow.
4816f9cba8fSJoseph Mingrone 	 */
4826f9cba8fSJoseph Mingrone 	for (i = opt_state->n_edges * opt_state->edgewords; i != 0; ) {
4836f9cba8fSJoseph Mingrone 		--i;
484b00ab754SHans Petter Selasky 		x[i] = 0xFFFFFFFFU;
4856f9cba8fSJoseph Mingrone 	}
4868cf6c252SPaul Traina 
4878cf6c252SPaul Traina 	/* root->level is the highest level no found. */
488ada6f083SXin LI 	memset(root->et.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
489ada6f083SXin LI 	memset(root->ef.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
4906f9cba8fSJoseph Mingrone 	for (level = root->level; level >= 0; --level) {
4916f9cba8fSJoseph Mingrone 		for (b = opt_state->levels[level]; b != 0; b = b->link) {
492ada6f083SXin LI 			propedom(opt_state, &b->et);
493ada6f083SXin LI 			propedom(opt_state, &b->ef);
4948cf6c252SPaul Traina 		}
4958cf6c252SPaul Traina 	}
4968cf6c252SPaul Traina }
4978cf6c252SPaul Traina 
4988cf6c252SPaul Traina /*
4998cf6c252SPaul Traina  * Find the backwards transitive closure of the flow graph.  These sets
5008cf6c252SPaul Traina  * are backwards in the sense that we find the set of nodes that reach
5018cf6c252SPaul Traina  * a given node, not the set of nodes that can be reached by a node.
5028cf6c252SPaul Traina  *
5038cf6c252SPaul Traina  * Assumes graph has been leveled.
5048cf6c252SPaul Traina  */
5058cf6c252SPaul Traina static void
find_closure(opt_state_t * opt_state,struct block * root)506ada6f083SXin LI find_closure(opt_state_t *opt_state, struct block *root)
5078cf6c252SPaul Traina {
5086f9cba8fSJoseph Mingrone 	int level;
5098cf6c252SPaul Traina 	struct block *b;
5108cf6c252SPaul Traina 
5118cf6c252SPaul Traina 	/*
5128cf6c252SPaul Traina 	 * Initialize sets to contain no nodes.
5138cf6c252SPaul Traina 	 */
514ada6f083SXin LI 	memset((char *)opt_state->all_closure_sets, 0,
515ada6f083SXin LI 	      opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->all_closure_sets));
5168cf6c252SPaul Traina 
5178cf6c252SPaul Traina 	/* root->level is the highest level no found. */
5186f9cba8fSJoseph Mingrone 	for (level = root->level; level >= 0; --level) {
5196f9cba8fSJoseph Mingrone 		for (b = opt_state->levels[level]; b; b = b->link) {
5208cf6c252SPaul Traina 			SET_INSERT(b->closure, b->id);
5218cf6c252SPaul Traina 			if (JT(b) == 0)
5228cf6c252SPaul Traina 				continue;
523ada6f083SXin LI 			SET_UNION(JT(b)->closure, b->closure, opt_state->nodewords);
524ada6f083SXin LI 			SET_UNION(JF(b)->closure, b->closure, opt_state->nodewords);
5258cf6c252SPaul Traina 		}
5268cf6c252SPaul Traina 	}
5278cf6c252SPaul Traina }
5288cf6c252SPaul Traina 
5298cf6c252SPaul Traina /*
5306f9cba8fSJoseph Mingrone  * Return the register number that is used by s.
5316f9cba8fSJoseph Mingrone  *
5326f9cba8fSJoseph Mingrone  * Returns ATOM_A if A is used, ATOM_X if X is used, AX_ATOM if both A and X
5336f9cba8fSJoseph Mingrone  * are used, the scratch memory location's number if a scratch memory
5346f9cba8fSJoseph Mingrone  * location is used (e.g., 0 for M[0]), or -1 if none of those are used.
5358cf6c252SPaul Traina  *
5368cf6c252SPaul Traina  * The implementation should probably change to an array access.
5378cf6c252SPaul Traina  */
5388cf6c252SPaul Traina static int
atomuse(struct stmt * s)539edc89b24SXin LI atomuse(struct stmt *s)
5408cf6c252SPaul Traina {
5418cf6c252SPaul Traina 	register int c = s->code;
5428cf6c252SPaul Traina 
5438cf6c252SPaul Traina 	if (c == NOP)
5448cf6c252SPaul Traina 		return -1;
5458cf6c252SPaul Traina 
5468cf6c252SPaul Traina 	switch (BPF_CLASS(c)) {
5478cf6c252SPaul Traina 
5488cf6c252SPaul Traina 	case BPF_RET:
5498cf6c252SPaul Traina 		return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
5508cf6c252SPaul Traina 			(BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
5518cf6c252SPaul Traina 
5528cf6c252SPaul Traina 	case BPF_LD:
5538cf6c252SPaul Traina 	case BPF_LDX:
5546f9cba8fSJoseph Mingrone 		/*
5556f9cba8fSJoseph Mingrone 		 * As there are fewer than 2^31 memory locations,
5566f9cba8fSJoseph Mingrone 		 * s->k should be convertible to int without problems.
5576f9cba8fSJoseph Mingrone 		 */
5588cf6c252SPaul Traina 		return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
5596f9cba8fSJoseph Mingrone 			(BPF_MODE(c) == BPF_MEM) ? (int)s->k : -1;
5608cf6c252SPaul Traina 
5618cf6c252SPaul Traina 	case BPF_ST:
5628cf6c252SPaul Traina 		return A_ATOM;
5638cf6c252SPaul Traina 
5648cf6c252SPaul Traina 	case BPF_STX:
5658cf6c252SPaul Traina 		return X_ATOM;
5668cf6c252SPaul Traina 
5678cf6c252SPaul Traina 	case BPF_JMP:
5688cf6c252SPaul Traina 	case BPF_ALU:
5698cf6c252SPaul Traina 		if (BPF_SRC(c) == BPF_X)
5708cf6c252SPaul Traina 			return AX_ATOM;
5718cf6c252SPaul Traina 		return A_ATOM;
5728cf6c252SPaul Traina 
5738cf6c252SPaul Traina 	case BPF_MISC:
5748cf6c252SPaul Traina 		return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
5758cf6c252SPaul Traina 	}
5768cf6c252SPaul Traina 	abort();
5778cf6c252SPaul Traina 	/* NOTREACHED */
5788cf6c252SPaul Traina }
5798cf6c252SPaul Traina 
5808cf6c252SPaul Traina /*
5818cf6c252SPaul Traina  * Return the register number that is defined by 's'.  We assume that
5828cf6c252SPaul Traina  * a single stmt cannot define more than one register.  If no register
5838cf6c252SPaul Traina  * is defined, return -1.
5848cf6c252SPaul Traina  *
5858cf6c252SPaul Traina  * The implementation should probably change to an array access.
5868cf6c252SPaul Traina  */
5878cf6c252SPaul Traina static int
atomdef(struct stmt * s)588edc89b24SXin LI atomdef(struct stmt *s)
5898cf6c252SPaul Traina {
5908cf6c252SPaul Traina 	if (s->code == NOP)
5918cf6c252SPaul Traina 		return -1;
5928cf6c252SPaul Traina 
5938cf6c252SPaul Traina 	switch (BPF_CLASS(s->code)) {
5948cf6c252SPaul Traina 
5958cf6c252SPaul Traina 	case BPF_LD:
5968cf6c252SPaul Traina 	case BPF_ALU:
5978cf6c252SPaul Traina 		return A_ATOM;
5988cf6c252SPaul Traina 
5998cf6c252SPaul Traina 	case BPF_LDX:
6008cf6c252SPaul Traina 		return X_ATOM;
6018cf6c252SPaul Traina 
6028cf6c252SPaul Traina 	case BPF_ST:
6038cf6c252SPaul Traina 	case BPF_STX:
6048cf6c252SPaul Traina 		return s->k;
6058cf6c252SPaul Traina 
6068cf6c252SPaul Traina 	case BPF_MISC:
6078cf6c252SPaul Traina 		return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
6088cf6c252SPaul Traina 	}
6098cf6c252SPaul Traina 	return -1;
6108cf6c252SPaul Traina }
6118cf6c252SPaul Traina 
61204fb2745SSam Leffler /*
61304fb2745SSam Leffler  * Compute the sets of registers used, defined, and killed by 'b'.
61404fb2745SSam Leffler  *
61504fb2745SSam Leffler  * "Used" means that a statement in 'b' uses the register before any
61604fb2745SSam Leffler  * statement in 'b' defines it, i.e. it uses the value left in
61704fb2745SSam Leffler  * that register by a predecessor block of this block.
61804fb2745SSam Leffler  * "Defined" means that a statement in 'b' defines it.
61904fb2745SSam Leffler  * "Killed" means that a statement in 'b' defines it before any
62004fb2745SSam Leffler  * statement in 'b' uses it, i.e. it kills the value left in that
62104fb2745SSam Leffler  * register by a predecessor block of this block.
62204fb2745SSam Leffler  */
6238cf6c252SPaul Traina static void
compute_local_ud(struct block * b)624edc89b24SXin LI compute_local_ud(struct block *b)
6258cf6c252SPaul Traina {
6268cf6c252SPaul Traina 	struct slist *s;
627ada6f083SXin LI 	atomset def = 0, use = 0, killed = 0;
6288cf6c252SPaul Traina 	int atom;
6298cf6c252SPaul Traina 
6308cf6c252SPaul Traina 	for (s = b->stmts; s; s = s->next) {
6318cf6c252SPaul Traina 		if (s->s.code == NOP)
6328cf6c252SPaul Traina 			continue;
6338cf6c252SPaul Traina 		atom = atomuse(&s->s);
6348cf6c252SPaul Traina 		if (atom >= 0) {
6358cf6c252SPaul Traina 			if (atom == AX_ATOM) {
6368cf6c252SPaul Traina 				if (!ATOMELEM(def, X_ATOM))
6378cf6c252SPaul Traina 					use |= ATOMMASK(X_ATOM);
6388cf6c252SPaul Traina 				if (!ATOMELEM(def, A_ATOM))
6398cf6c252SPaul Traina 					use |= ATOMMASK(A_ATOM);
6408cf6c252SPaul Traina 			}
6418cf6c252SPaul Traina 			else if (atom < N_ATOMS) {
6428cf6c252SPaul Traina 				if (!ATOMELEM(def, atom))
6438cf6c252SPaul Traina 					use |= ATOMMASK(atom);
6448cf6c252SPaul Traina 			}
6458cf6c252SPaul Traina 			else
6468cf6c252SPaul Traina 				abort();
6478cf6c252SPaul Traina 		}
6488cf6c252SPaul Traina 		atom = atomdef(&s->s);
6498cf6c252SPaul Traina 		if (atom >= 0) {
6508cf6c252SPaul Traina 			if (!ATOMELEM(use, atom))
651ada6f083SXin LI 				killed |= ATOMMASK(atom);
6528cf6c252SPaul Traina 			def |= ATOMMASK(atom);
6538cf6c252SPaul Traina 		}
6548cf6c252SPaul Traina 	}
65504fb2745SSam Leffler 	if (BPF_CLASS(b->s.code) == BPF_JMP) {
65604fb2745SSam Leffler 		/*
65704fb2745SSam Leffler 		 * XXX - what about RET?
65804fb2745SSam Leffler 		 */
65904fb2745SSam Leffler 		atom = atomuse(&b->s);
66004fb2745SSam Leffler 		if (atom >= 0) {
66104fb2745SSam Leffler 			if (atom == AX_ATOM) {
66204fb2745SSam Leffler 				if (!ATOMELEM(def, X_ATOM))
66304fb2745SSam Leffler 					use |= ATOMMASK(X_ATOM);
66404fb2745SSam Leffler 				if (!ATOMELEM(def, A_ATOM))
6658cf6c252SPaul Traina 					use |= ATOMMASK(A_ATOM);
66604fb2745SSam Leffler 			}
66704fb2745SSam Leffler 			else if (atom < N_ATOMS) {
66804fb2745SSam Leffler 				if (!ATOMELEM(def, atom))
66904fb2745SSam Leffler 					use |= ATOMMASK(atom);
67004fb2745SSam Leffler 			}
67104fb2745SSam Leffler 			else
67204fb2745SSam Leffler 				abort();
67304fb2745SSam Leffler 		}
67404fb2745SSam Leffler 	}
6758cf6c252SPaul Traina 
6768cf6c252SPaul Traina 	b->def = def;
677ada6f083SXin LI 	b->kill = killed;
6788cf6c252SPaul Traina 	b->in_use = use;
6798cf6c252SPaul Traina }
6808cf6c252SPaul Traina 
6818cf6c252SPaul Traina /*
6828cf6c252SPaul Traina  * Assume graph is already leveled.
6838cf6c252SPaul Traina  */
6848cf6c252SPaul Traina static void
find_ud(opt_state_t * opt_state,struct block * root)685ada6f083SXin LI find_ud(opt_state_t *opt_state, struct block *root)
6868cf6c252SPaul Traina {
6878cf6c252SPaul Traina 	int i, maxlevel;
6888cf6c252SPaul Traina 	struct block *p;
6898cf6c252SPaul Traina 
6908cf6c252SPaul Traina 	/*
6918cf6c252SPaul Traina 	 * root->level is the highest level no found;
6928cf6c252SPaul Traina 	 * count down from there.
6938cf6c252SPaul Traina 	 */
6948cf6c252SPaul Traina 	maxlevel = root->level;
6958cf6c252SPaul Traina 	for (i = maxlevel; i >= 0; --i)
696ada6f083SXin LI 		for (p = opt_state->levels[i]; p; p = p->link) {
6978cf6c252SPaul Traina 			compute_local_ud(p);
6988cf6c252SPaul Traina 			p->out_use = 0;
6998cf6c252SPaul Traina 		}
7008cf6c252SPaul Traina 
7018cf6c252SPaul Traina 	for (i = 1; i <= maxlevel; ++i) {
702ada6f083SXin LI 		for (p = opt_state->levels[i]; p; p = p->link) {
7038cf6c252SPaul Traina 			p->out_use |= JT(p)->in_use | JF(p)->in_use;
7048cf6c252SPaul Traina 			p->in_use |= p->out_use &~ p->kill;
7058cf6c252SPaul Traina 		}
7068cf6c252SPaul Traina 	}
7078cf6c252SPaul Traina }
7088cf6c252SPaul Traina static void
init_val(opt_state_t * opt_state)709ada6f083SXin LI init_val(opt_state_t *opt_state)
7108cf6c252SPaul Traina {
711ada6f083SXin LI 	opt_state->curval = 0;
712ada6f083SXin LI 	opt_state->next_vnode = opt_state->vnode_base;
713ada6f083SXin LI 	memset((char *)opt_state->vmap, 0, opt_state->maxval * sizeof(*opt_state->vmap));
714ada6f083SXin LI 	memset((char *)opt_state->hashtbl, 0, sizeof opt_state->hashtbl);
7158cf6c252SPaul Traina }
7168cf6c252SPaul Traina 
7176f9cba8fSJoseph Mingrone /*
7186f9cba8fSJoseph Mingrone  * Because we really don't have an IR, this stuff is a little messy.
7196f9cba8fSJoseph Mingrone  *
7206f9cba8fSJoseph Mingrone  * This routine looks in the table of existing value number for a value
7216f9cba8fSJoseph Mingrone  * with generated from an operation with the specified opcode and
7226f9cba8fSJoseph Mingrone  * the specified values.  If it finds it, it returns its value number,
7236f9cba8fSJoseph Mingrone  * otherwise it makes a new entry in the table and returns the
7246f9cba8fSJoseph Mingrone  * value number of that entry.
7256f9cba8fSJoseph Mingrone  */
7266f9cba8fSJoseph Mingrone static bpf_u_int32
F(opt_state_t * opt_state,int code,bpf_u_int32 v0,bpf_u_int32 v1)7276f9cba8fSJoseph Mingrone F(opt_state_t *opt_state, int code, bpf_u_int32 v0, bpf_u_int32 v1)
7288cf6c252SPaul Traina {
7298cf6c252SPaul Traina 	u_int hash;
7306f9cba8fSJoseph Mingrone 	bpf_u_int32 val;
7318cf6c252SPaul Traina 	struct valnode *p;
7328cf6c252SPaul Traina 
7336f9cba8fSJoseph Mingrone 	hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8);
7348cf6c252SPaul Traina 	hash %= MODULUS;
7358cf6c252SPaul Traina 
736ada6f083SXin LI 	for (p = opt_state->hashtbl[hash]; p; p = p->next)
7378cf6c252SPaul Traina 		if (p->code == code && p->v0 == v0 && p->v1 == v1)
7388cf6c252SPaul Traina 			return p->val;
7398cf6c252SPaul Traina 
7406f9cba8fSJoseph Mingrone 	/*
7416f9cba8fSJoseph Mingrone 	 * Not found.  Allocate a new value, and assign it a new
7426f9cba8fSJoseph Mingrone 	 * value number.
7436f9cba8fSJoseph Mingrone 	 *
7446f9cba8fSJoseph Mingrone 	 * opt_state->curval starts out as 0, which means VAL_UNKNOWN; we
7456f9cba8fSJoseph Mingrone 	 * increment it before using it as the new value number, which
7466f9cba8fSJoseph Mingrone 	 * means we never assign VAL_UNKNOWN.
7476f9cba8fSJoseph Mingrone 	 *
7486f9cba8fSJoseph Mingrone 	 * XXX - unless we overflow, but we probably won't have 2^32-1
7496f9cba8fSJoseph Mingrone 	 * values; we treat 32 bits as effectively infinite.
7506f9cba8fSJoseph Mingrone 	 */
751ada6f083SXin LI 	val = ++opt_state->curval;
7528cf6c252SPaul Traina 	if (BPF_MODE(code) == BPF_IMM &&
7538cf6c252SPaul Traina 	    (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
754ada6f083SXin LI 		opt_state->vmap[val].const_val = v0;
755ada6f083SXin LI 		opt_state->vmap[val].is_const = 1;
7568cf6c252SPaul Traina 	}
757ada6f083SXin LI 	p = opt_state->next_vnode++;
7588cf6c252SPaul Traina 	p->val = val;
7598cf6c252SPaul Traina 	p->code = code;
7608cf6c252SPaul Traina 	p->v0 = v0;
7618cf6c252SPaul Traina 	p->v1 = v1;
762ada6f083SXin LI 	p->next = opt_state->hashtbl[hash];
763ada6f083SXin LI 	opt_state->hashtbl[hash] = p;
7648cf6c252SPaul Traina 
7658cf6c252SPaul Traina 	return val;
7668cf6c252SPaul Traina }
7678cf6c252SPaul Traina 
7688cf6c252SPaul Traina static inline void
vstore(struct stmt * s,bpf_u_int32 * valp,bpf_u_int32 newval,int alter)7696f9cba8fSJoseph Mingrone vstore(struct stmt *s, bpf_u_int32 *valp, bpf_u_int32 newval, int alter)
7708cf6c252SPaul Traina {
771b00ab754SHans Petter Selasky 	if (alter && newval != VAL_UNKNOWN && *valp == newval)
7728cf6c252SPaul Traina 		s->code = NOP;
7738cf6c252SPaul Traina 	else
7748cf6c252SPaul Traina 		*valp = newval;
7758cf6c252SPaul Traina }
7768cf6c252SPaul Traina 
777681ed54cSXin LI /*
778681ed54cSXin LI  * Do constant-folding on binary operators.
779681ed54cSXin LI  * (Unary operators are handled elsewhere.)
780681ed54cSXin LI  */
7818cf6c252SPaul Traina static void
fold_op(opt_state_t * opt_state,struct stmt * s,bpf_u_int32 v0,bpf_u_int32 v1)7826f9cba8fSJoseph Mingrone fold_op(opt_state_t *opt_state, struct stmt *s, bpf_u_int32 v0, bpf_u_int32 v1)
7838cf6c252SPaul Traina {
784ef96d74fSMax Laier 	bpf_u_int32 a, b;
7858cf6c252SPaul Traina 
786ada6f083SXin LI 	a = opt_state->vmap[v0].const_val;
787ada6f083SXin LI 	b = opt_state->vmap[v1].const_val;
7888cf6c252SPaul Traina 
7898cf6c252SPaul Traina 	switch (BPF_OP(s->code)) {
7908cf6c252SPaul Traina 	case BPF_ADD:
7918cf6c252SPaul Traina 		a += b;
7928cf6c252SPaul Traina 		break;
7938cf6c252SPaul Traina 
7948cf6c252SPaul Traina 	case BPF_SUB:
7958cf6c252SPaul Traina 		a -= b;
7968cf6c252SPaul Traina 		break;
7978cf6c252SPaul Traina 
7988cf6c252SPaul Traina 	case BPF_MUL:
7998cf6c252SPaul Traina 		a *= b;
8008cf6c252SPaul Traina 		break;
8018cf6c252SPaul Traina 
8028cf6c252SPaul Traina 	case BPF_DIV:
8038cf6c252SPaul Traina 		if (b == 0)
80457e22627SCy Schubert 			opt_error(opt_state, "division by zero");
8058cf6c252SPaul Traina 		a /= b;
8068cf6c252SPaul Traina 		break;
8078cf6c252SPaul Traina 
808681ed54cSXin LI 	case BPF_MOD:
809681ed54cSXin LI 		if (b == 0)
81057e22627SCy Schubert 			opt_error(opt_state, "modulus by zero");
811681ed54cSXin LI 		a %= b;
812681ed54cSXin LI 		break;
813681ed54cSXin LI 
8148cf6c252SPaul Traina 	case BPF_AND:
8158cf6c252SPaul Traina 		a &= b;
8168cf6c252SPaul Traina 		break;
8178cf6c252SPaul Traina 
8188cf6c252SPaul Traina 	case BPF_OR:
8198cf6c252SPaul Traina 		a |= b;
8208cf6c252SPaul Traina 		break;
8218cf6c252SPaul Traina 
822681ed54cSXin LI 	case BPF_XOR:
823681ed54cSXin LI 		a ^= b;
824681ed54cSXin LI 		break;
825681ed54cSXin LI 
8268cf6c252SPaul Traina 	case BPF_LSH:
82757e22627SCy Schubert 		/*
82857e22627SCy Schubert 		 * A left shift of more than the width of the type
82957e22627SCy Schubert 		 * is undefined in C; we'll just treat it as shifting
83057e22627SCy Schubert 		 * all the bits out.
83157e22627SCy Schubert 		 *
83257e22627SCy Schubert 		 * XXX - the BPF interpreter doesn't check for this,
83357e22627SCy Schubert 		 * so its behavior is dependent on the behavior of
83457e22627SCy Schubert 		 * the processor on which it's running.  There are
83557e22627SCy Schubert 		 * processors on which it shifts all the bits out
83657e22627SCy Schubert 		 * and processors on which it does no shift.
83757e22627SCy Schubert 		 */
83857e22627SCy Schubert 		if (b < 32)
8398cf6c252SPaul Traina 			a <<= b;
84057e22627SCy Schubert 		else
84157e22627SCy Schubert 			a = 0;
8428cf6c252SPaul Traina 		break;
8438cf6c252SPaul Traina 
8448cf6c252SPaul Traina 	case BPF_RSH:
84557e22627SCy Schubert 		/*
84657e22627SCy Schubert 		 * A right shift of more than the width of the type
84757e22627SCy Schubert 		 * is undefined in C; we'll just treat it as shifting
84857e22627SCy Schubert 		 * all the bits out.
84957e22627SCy Schubert 		 *
85057e22627SCy Schubert 		 * XXX - the BPF interpreter doesn't check for this,
85157e22627SCy Schubert 		 * so its behavior is dependent on the behavior of
85257e22627SCy Schubert 		 * the processor on which it's running.  There are
85357e22627SCy Schubert 		 * processors on which it shifts all the bits out
85457e22627SCy Schubert 		 * and processors on which it does no shift.
85557e22627SCy Schubert 		 */
85657e22627SCy Schubert 		if (b < 32)
8578cf6c252SPaul Traina 			a >>= b;
85857e22627SCy Schubert 		else
85957e22627SCy Schubert 			a = 0;
8608cf6c252SPaul Traina 		break;
8618cf6c252SPaul Traina 
8628cf6c252SPaul Traina 	default:
8638cf6c252SPaul Traina 		abort();
8648cf6c252SPaul Traina 	}
8658cf6c252SPaul Traina 	s->k = a;
8668cf6c252SPaul Traina 	s->code = BPF_LD|BPF_IMM;
8676f9cba8fSJoseph Mingrone 	/*
8686f9cba8fSJoseph Mingrone 	 * XXX - optimizer loop detection.
8696f9cba8fSJoseph Mingrone 	 */
8706f9cba8fSJoseph Mingrone 	opt_state->non_branch_movement_performed = 1;
871ada6f083SXin LI 	opt_state->done = 0;
8728cf6c252SPaul Traina }
8738cf6c252SPaul Traina 
8748cf6c252SPaul Traina static inline struct slist *
this_op(struct slist * s)875edc89b24SXin LI this_op(struct slist *s)
8768cf6c252SPaul Traina {
8778cf6c252SPaul Traina 	while (s != 0 && s->s.code == NOP)
8788cf6c252SPaul Traina 		s = s->next;
8798cf6c252SPaul Traina 	return s;
8808cf6c252SPaul Traina }
8818cf6c252SPaul Traina 
8828cf6c252SPaul Traina static void
opt_not(struct block * b)883edc89b24SXin LI opt_not(struct block *b)
8848cf6c252SPaul Traina {
8858cf6c252SPaul Traina 	struct block *tmp = JT(b);
8868cf6c252SPaul Traina 
8878cf6c252SPaul Traina 	JT(b) = JF(b);
8888cf6c252SPaul Traina 	JF(b) = tmp;
8898cf6c252SPaul Traina }
8908cf6c252SPaul Traina 
8918cf6c252SPaul Traina static void
opt_peep(opt_state_t * opt_state,struct block * b)892ada6f083SXin LI opt_peep(opt_state_t *opt_state, struct block *b)
8938cf6c252SPaul Traina {
8948cf6c252SPaul Traina 	struct slist *s;
8958cf6c252SPaul Traina 	struct slist *next, *last;
8966f9cba8fSJoseph Mingrone 	bpf_u_int32 val;
8978cf6c252SPaul Traina 
8988cf6c252SPaul Traina 	s = b->stmts;
8998cf6c252SPaul Traina 	if (s == 0)
9008cf6c252SPaul Traina 		return;
9018cf6c252SPaul Traina 
9028cf6c252SPaul Traina 	last = s;
90304fb2745SSam Leffler 	for (/*empty*/; /*empty*/; s = next) {
90404fb2745SSam Leffler 		/*
90504fb2745SSam Leffler 		 * Skip over nops.
90604fb2745SSam Leffler 		 */
9078cf6c252SPaul Traina 		s = this_op(s);
9088cf6c252SPaul Traina 		if (s == 0)
90904fb2745SSam Leffler 			break;	/* nothing left in the block */
91004fb2745SSam Leffler 
91104fb2745SSam Leffler 		/*
91204fb2745SSam Leffler 		 * Find the next real instruction after that one
91304fb2745SSam Leffler 		 * (skipping nops).
91404fb2745SSam Leffler 		 */
9158cf6c252SPaul Traina 		next = this_op(s->next);
9168cf6c252SPaul Traina 		if (next == 0)
91704fb2745SSam Leffler 			break;	/* no next instruction */
9188cf6c252SPaul Traina 		last = next;
9198cf6c252SPaul Traina 
9208cf6c252SPaul Traina 		/*
9218cf6c252SPaul Traina 		 * st  M[k]	-->	st  M[k]
9228cf6c252SPaul Traina 		 * ldx M[k]		tax
9238cf6c252SPaul Traina 		 */
9248cf6c252SPaul Traina 		if (s->s.code == BPF_ST &&
9258cf6c252SPaul Traina 		    next->s.code == (BPF_LDX|BPF_MEM) &&
9268cf6c252SPaul Traina 		    s->s.k == next->s.k) {
9276f9cba8fSJoseph Mingrone 			/*
9286f9cba8fSJoseph Mingrone 			 * XXX - optimizer loop detection.
9296f9cba8fSJoseph Mingrone 			 */
9306f9cba8fSJoseph Mingrone 			opt_state->non_branch_movement_performed = 1;
931ada6f083SXin LI 			opt_state->done = 0;
9328cf6c252SPaul Traina 			next->s.code = BPF_MISC|BPF_TAX;
9338cf6c252SPaul Traina 		}
9348cf6c252SPaul Traina 		/*
9358cf6c252SPaul Traina 		 * ld  #k	-->	ldx  #k
9368cf6c252SPaul Traina 		 * tax			txa
9378cf6c252SPaul Traina 		 */
9388cf6c252SPaul Traina 		if (s->s.code == (BPF_LD|BPF_IMM) &&
9398cf6c252SPaul Traina 		    next->s.code == (BPF_MISC|BPF_TAX)) {
9408cf6c252SPaul Traina 			s->s.code = BPF_LDX|BPF_IMM;
9418cf6c252SPaul Traina 			next->s.code = BPF_MISC|BPF_TXA;
9426f9cba8fSJoseph Mingrone 			/*
9436f9cba8fSJoseph Mingrone 			 * XXX - optimizer loop detection.
9446f9cba8fSJoseph Mingrone 			 */
9456f9cba8fSJoseph Mingrone 			opt_state->non_branch_movement_performed = 1;
946ada6f083SXin LI 			opt_state->done = 0;
9478cf6c252SPaul Traina 		}
9488cf6c252SPaul Traina 		/*
9498cf6c252SPaul Traina 		 * This is an ugly special case, but it happens
9508cf6c252SPaul Traina 		 * when you say tcp[k] or udp[k] where k is a constant.
9518cf6c252SPaul Traina 		 */
9528cf6c252SPaul Traina 		if (s->s.code == (BPF_LD|BPF_IMM)) {
9538cf6c252SPaul Traina 			struct slist *add, *tax, *ild;
9548cf6c252SPaul Traina 
9558cf6c252SPaul Traina 			/*
9568cf6c252SPaul Traina 			 * Check that X isn't used on exit from this
9578cf6c252SPaul Traina 			 * block (which the optimizer might cause).
9588cf6c252SPaul Traina 			 * We know the code generator won't generate
9598cf6c252SPaul Traina 			 * any local dependencies.
9608cf6c252SPaul Traina 			 */
9618cf6c252SPaul Traina 			if (ATOMELEM(b->out_use, X_ATOM))
96204fb2745SSam Leffler 				continue;
9638cf6c252SPaul Traina 
96404fb2745SSam Leffler 			/*
96504fb2745SSam Leffler 			 * Check that the instruction following the ldi
96604fb2745SSam Leffler 			 * is an addx, or it's an ldxms with an addx
96704fb2745SSam Leffler 			 * following it (with 0 or more nops between the
96804fb2745SSam Leffler 			 * ldxms and addx).
96904fb2745SSam Leffler 			 */
9708cf6c252SPaul Traina 			if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
9718cf6c252SPaul Traina 				add = next;
9728cf6c252SPaul Traina 			else
9738cf6c252SPaul Traina 				add = this_op(next->next);
9748cf6c252SPaul Traina 			if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
97504fb2745SSam Leffler 				continue;
9768cf6c252SPaul Traina 
97704fb2745SSam Leffler 			/*
97804fb2745SSam Leffler 			 * Check that a tax follows that (with 0 or more
97904fb2745SSam Leffler 			 * nops between them).
98004fb2745SSam Leffler 			 */
9818cf6c252SPaul Traina 			tax = this_op(add->next);
9828cf6c252SPaul Traina 			if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
98304fb2745SSam Leffler 				continue;
9848cf6c252SPaul Traina 
98504fb2745SSam Leffler 			/*
98604fb2745SSam Leffler 			 * Check that an ild follows that (with 0 or more
98704fb2745SSam Leffler 			 * nops between them).
98804fb2745SSam Leffler 			 */
9898cf6c252SPaul Traina 			ild = this_op(tax->next);
9908cf6c252SPaul Traina 			if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
9918cf6c252SPaul Traina 			    BPF_MODE(ild->s.code) != BPF_IND)
99204fb2745SSam Leffler 				continue;
9938cf6c252SPaul Traina 			/*
9948cf6c252SPaul Traina 			 * We want to turn this sequence:
9958cf6c252SPaul Traina 			 *
9968cf6c252SPaul Traina 			 * (004) ldi     #0x2		{s}
9978cf6c252SPaul Traina 			 * (005) ldxms   [14]		{next}  -- optional
9988cf6c252SPaul Traina 			 * (006) addx			{add}
9998cf6c252SPaul Traina 			 * (007) tax			{tax}
10008cf6c252SPaul Traina 			 * (008) ild     [x+0]		{ild}
10018cf6c252SPaul Traina 			 *
10028cf6c252SPaul Traina 			 * into this sequence:
10038cf6c252SPaul Traina 			 *
10048cf6c252SPaul Traina 			 * (004) nop
10058cf6c252SPaul Traina 			 * (005) ldxms   [14]
10068cf6c252SPaul Traina 			 * (006) nop
10078cf6c252SPaul Traina 			 * (007) nop
10088cf6c252SPaul Traina 			 * (008) ild     [x+2]
10098cf6c252SPaul Traina 			 *
101004fb2745SSam Leffler 			 * XXX We need to check that X is not
101104fb2745SSam Leffler 			 * subsequently used, because we want to change
101204fb2745SSam Leffler 			 * what'll be in it after this sequence.
101304fb2745SSam Leffler 			 *
101404fb2745SSam Leffler 			 * We know we can eliminate the accumulator
101504fb2745SSam Leffler 			 * modifications earlier in the sequence since
101604fb2745SSam Leffler 			 * it is defined by the last stmt of this sequence
101704fb2745SSam Leffler 			 * (i.e., the last statement of the sequence loads
101804fb2745SSam Leffler 			 * a value into the accumulator, so we can eliminate
101904fb2745SSam Leffler 			 * earlier operations on the accumulator).
10208cf6c252SPaul Traina 			 */
10218cf6c252SPaul Traina 			ild->s.k += s->s.k;
10228cf6c252SPaul Traina 			s->s.code = NOP;
10238cf6c252SPaul Traina 			add->s.code = NOP;
10248cf6c252SPaul Traina 			tax->s.code = NOP;
10256f9cba8fSJoseph Mingrone 			/*
10266f9cba8fSJoseph Mingrone 			 * XXX - optimizer loop detection.
10276f9cba8fSJoseph Mingrone 			 */
10286f9cba8fSJoseph Mingrone 			opt_state->non_branch_movement_performed = 1;
1029ada6f083SXin LI 			opt_state->done = 0;
10308cf6c252SPaul Traina 		}
10318cf6c252SPaul Traina 	}
10328cf6c252SPaul Traina 	/*
103304fb2745SSam Leffler 	 * If the comparison at the end of a block is an equality
103404fb2745SSam Leffler 	 * comparison against a constant, and nobody uses the value
103504fb2745SSam Leffler 	 * we leave in the A register at the end of a block, and
103604fb2745SSam Leffler 	 * the operation preceding the comparison is an arithmetic
103704fb2745SSam Leffler 	 * operation, we can sometime optimize it away.
10388cf6c252SPaul Traina 	 */
103904fb2745SSam Leffler 	if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) &&
10408cf6c252SPaul Traina 	    !ATOMELEM(b->out_use, A_ATOM)) {
104104fb2745SSam Leffler 		/*
104204fb2745SSam Leffler 		 * We can optimize away certain subtractions of the
104304fb2745SSam Leffler 		 * X register.
104404fb2745SSam Leffler 		 */
104504fb2745SSam Leffler 		if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) {
10468cf6c252SPaul Traina 			val = b->val[X_ATOM];
1047ada6f083SXin LI 			if (opt_state->vmap[val].is_const) {
1048feb4ecdbSBruce M Simpson 				/*
104904fb2745SSam Leffler 				 * If we have a subtract to do a comparison,
105004fb2745SSam Leffler 				 * and the X register is a known constant,
105104fb2745SSam Leffler 				 * we can merge this value into the
105204fb2745SSam Leffler 				 * comparison:
105304fb2745SSam Leffler 				 *
1054feb4ecdbSBruce M Simpson 				 * sub x  ->	nop
1055feb4ecdbSBruce M Simpson 				 * jeq #y	jeq #(x+y)
1056feb4ecdbSBruce M Simpson 				 */
1057ada6f083SXin LI 				b->s.k += opt_state->vmap[val].const_val;
10588cf6c252SPaul Traina 				last->s.code = NOP;
10596f9cba8fSJoseph Mingrone 				/*
10606f9cba8fSJoseph Mingrone 				 * XXX - optimizer loop detection.
10616f9cba8fSJoseph Mingrone 				 */
10626f9cba8fSJoseph Mingrone 				opt_state->non_branch_movement_performed = 1;
1063ada6f083SXin LI 				opt_state->done = 0;
10648cf6c252SPaul Traina 			} else if (b->s.k == 0) {
10658cf6c252SPaul Traina 				/*
106604fb2745SSam Leffler 				 * If the X register isn't a constant,
106704fb2745SSam Leffler 				 * and the comparison in the test is
106804fb2745SSam Leffler 				 * against 0, we can compare with the
106904fb2745SSam Leffler 				 * X register, instead:
107004fb2745SSam Leffler 				 *
107104fb2745SSam Leffler 				 * sub x  ->	nop
107204fb2745SSam Leffler 				 * jeq #0	jeq x
10738cf6c252SPaul Traina 				 */
10748cf6c252SPaul Traina 				last->s.code = NOP;
107504fb2745SSam Leffler 				b->s.code = BPF_JMP|BPF_JEQ|BPF_X;
10766f9cba8fSJoseph Mingrone 				/*
10776f9cba8fSJoseph Mingrone 				 * XXX - optimizer loop detection.
10786f9cba8fSJoseph Mingrone 				 */
10796f9cba8fSJoseph Mingrone 				opt_state->non_branch_movement_performed = 1;
1080ada6f083SXin LI 				opt_state->done = 0;
10818cf6c252SPaul Traina 			}
10828cf6c252SPaul Traina 		}
10838cf6c252SPaul Traina 		/*
108404fb2745SSam Leffler 		 * Likewise, a constant subtract can be simplified:
108504fb2745SSam Leffler 		 *
108604fb2745SSam Leffler 		 * sub #x ->	nop
108704fb2745SSam Leffler 		 * jeq #y ->	jeq #(x+y)
10888cf6c252SPaul Traina 		 */
108904fb2745SSam Leffler 		else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) {
10908cf6c252SPaul Traina 			last->s.code = NOP;
1091feb4ecdbSBruce M Simpson 			b->s.k += last->s.k;
10926f9cba8fSJoseph Mingrone 			/*
10936f9cba8fSJoseph Mingrone 			 * XXX - optimizer loop detection.
10946f9cba8fSJoseph Mingrone 			 */
10956f9cba8fSJoseph Mingrone 			opt_state->non_branch_movement_performed = 1;
1096ada6f083SXin LI 			opt_state->done = 0;
10978cf6c252SPaul Traina 		}
10988cf6c252SPaul Traina 		/*
109904fb2745SSam Leffler 		 * And, similarly, a constant AND can be simplified
110004fb2745SSam Leffler 		 * if we're testing against 0, i.e.:
110104fb2745SSam Leffler 		 *
11028cf6c252SPaul Traina 		 * and #k	nop
11038cf6c252SPaul Traina 		 * jeq #0  ->	jset #k
11048cf6c252SPaul Traina 		 */
110504fb2745SSam Leffler 		else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
110604fb2745SSam Leffler 		    b->s.k == 0) {
11078cf6c252SPaul Traina 			b->s.k = last->s.k;
11088cf6c252SPaul Traina 			b->s.code = BPF_JMP|BPF_K|BPF_JSET;
11098cf6c252SPaul Traina 			last->s.code = NOP;
11106f9cba8fSJoseph Mingrone 			/*
11116f9cba8fSJoseph Mingrone 			 * XXX - optimizer loop detection.
11126f9cba8fSJoseph Mingrone 			 */
11136f9cba8fSJoseph Mingrone 			opt_state->non_branch_movement_performed = 1;
1114ada6f083SXin LI 			opt_state->done = 0;
11158cf6c252SPaul Traina 			opt_not(b);
11168cf6c252SPaul Traina 		}
111704fb2745SSam Leffler 	}
11188cf6c252SPaul Traina 	/*
1119feb4ecdbSBruce M Simpson 	 * jset #0        ->   never
1120feb4ecdbSBruce M Simpson 	 * jset #ffffffff ->   always
1121feb4ecdbSBruce M Simpson 	 */
1122feb4ecdbSBruce M Simpson 	if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) {
1123feb4ecdbSBruce M Simpson 		if (b->s.k == 0)
1124feb4ecdbSBruce M Simpson 			JT(b) = JF(b);
11256f9cba8fSJoseph Mingrone 		if (b->s.k == 0xffffffffU)
1126feb4ecdbSBruce M Simpson 			JF(b) = JT(b);
1127feb4ecdbSBruce M Simpson 	}
1128feb4ecdbSBruce M Simpson 	/*
1129a8e07101SRui Paulo 	 * If we're comparing against the index register, and the index
1130a8e07101SRui Paulo 	 * register is a known constant, we can just compare against that
1131a8e07101SRui Paulo 	 * constant.
1132a8e07101SRui Paulo 	 */
1133a8e07101SRui Paulo 	val = b->val[X_ATOM];
1134ada6f083SXin LI 	if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) {
11356f9cba8fSJoseph Mingrone 		bpf_u_int32 v = opt_state->vmap[val].const_val;
1136a8e07101SRui Paulo 		b->s.code &= ~BPF_X;
1137a8e07101SRui Paulo 		b->s.k = v;
1138a8e07101SRui Paulo 	}
1139a8e07101SRui Paulo 	/*
11408cf6c252SPaul Traina 	 * If the accumulator is a known constant, we can compute the
11418cf6c252SPaul Traina 	 * comparison result.
11428cf6c252SPaul Traina 	 */
11438cf6c252SPaul Traina 	val = b->val[A_ATOM];
1144ada6f083SXin LI 	if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
11456f9cba8fSJoseph Mingrone 		bpf_u_int32 v = opt_state->vmap[val].const_val;
11468cf6c252SPaul Traina 		switch (BPF_OP(b->s.code)) {
11478cf6c252SPaul Traina 
11488cf6c252SPaul Traina 		case BPF_JEQ:
11498cf6c252SPaul Traina 			v = v == b->s.k;
11508cf6c252SPaul Traina 			break;
11518cf6c252SPaul Traina 
11528cf6c252SPaul Traina 		case BPF_JGT:
11536f9cba8fSJoseph Mingrone 			v = v > b->s.k;
11548cf6c252SPaul Traina 			break;
11558cf6c252SPaul Traina 
11568cf6c252SPaul Traina 		case BPF_JGE:
11576f9cba8fSJoseph Mingrone 			v = v >= b->s.k;
11588cf6c252SPaul Traina 			break;
11598cf6c252SPaul Traina 
11608cf6c252SPaul Traina 		case BPF_JSET:
11618cf6c252SPaul Traina 			v &= b->s.k;
11628cf6c252SPaul Traina 			break;
11638cf6c252SPaul Traina 
11648cf6c252SPaul Traina 		default:
11658cf6c252SPaul Traina 			abort();
11668cf6c252SPaul Traina 		}
11676f9cba8fSJoseph Mingrone 		if (JF(b) != JT(b)) {
11686f9cba8fSJoseph Mingrone 			/*
11696f9cba8fSJoseph Mingrone 			 * XXX - optimizer loop detection.
11706f9cba8fSJoseph Mingrone 			 */
11716f9cba8fSJoseph Mingrone 			opt_state->non_branch_movement_performed = 1;
1172ada6f083SXin LI 			opt_state->done = 0;
11736f9cba8fSJoseph Mingrone 		}
11748cf6c252SPaul Traina 		if (v)
11758cf6c252SPaul Traina 			JF(b) = JT(b);
11768cf6c252SPaul Traina 		else
11778cf6c252SPaul Traina 			JT(b) = JF(b);
11788cf6c252SPaul Traina 	}
11798cf6c252SPaul Traina }
11808cf6c252SPaul Traina 
11818cf6c252SPaul Traina /*
11828cf6c252SPaul Traina  * Compute the symbolic value of expression of 's', and update
11838cf6c252SPaul Traina  * anything it defines in the value table 'val'.  If 'alter' is true,
11848cf6c252SPaul Traina  * do various optimizations.  This code would be cleaner if symbolic
11858cf6c252SPaul Traina  * evaluation and code transformations weren't folded together.
11868cf6c252SPaul Traina  */
11878cf6c252SPaul Traina static void
opt_stmt(opt_state_t * opt_state,struct stmt * s,bpf_u_int32 val[],int alter)11886f9cba8fSJoseph Mingrone opt_stmt(opt_state_t *opt_state, struct stmt *s, bpf_u_int32 val[], int alter)
11898cf6c252SPaul Traina {
11908cf6c252SPaul Traina 	int op;
11916f9cba8fSJoseph Mingrone 	bpf_u_int32 v;
11928cf6c252SPaul Traina 
11938cf6c252SPaul Traina 	switch (s->code) {
11948cf6c252SPaul Traina 
11958cf6c252SPaul Traina 	case BPF_LD|BPF_ABS|BPF_W:
11968cf6c252SPaul Traina 	case BPF_LD|BPF_ABS|BPF_H:
11978cf6c252SPaul Traina 	case BPF_LD|BPF_ABS|BPF_B:
1198ada6f083SXin LI 		v = F(opt_state, s->code, s->k, 0L);
11998cf6c252SPaul Traina 		vstore(s, &val[A_ATOM], v, alter);
12008cf6c252SPaul Traina 		break;
12018cf6c252SPaul Traina 
12028cf6c252SPaul Traina 	case BPF_LD|BPF_IND|BPF_W:
12038cf6c252SPaul Traina 	case BPF_LD|BPF_IND|BPF_H:
12048cf6c252SPaul Traina 	case BPF_LD|BPF_IND|BPF_B:
12058cf6c252SPaul Traina 		v = val[X_ATOM];
1206ada6f083SXin LI 		if (alter && opt_state->vmap[v].is_const) {
12078cf6c252SPaul Traina 			s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
1208ada6f083SXin LI 			s->k += opt_state->vmap[v].const_val;
1209ada6f083SXin LI 			v = F(opt_state, s->code, s->k, 0L);
12106f9cba8fSJoseph Mingrone 			/*
12116f9cba8fSJoseph Mingrone 			 * XXX - optimizer loop detection.
12126f9cba8fSJoseph Mingrone 			 */
12136f9cba8fSJoseph Mingrone 			opt_state->non_branch_movement_performed = 1;
1214ada6f083SXin LI 			opt_state->done = 0;
12158cf6c252SPaul Traina 		}
12168cf6c252SPaul Traina 		else
1217ada6f083SXin LI 			v = F(opt_state, s->code, s->k, v);
12188cf6c252SPaul Traina 		vstore(s, &val[A_ATOM], v, alter);
12198cf6c252SPaul Traina 		break;
12208cf6c252SPaul Traina 
12218cf6c252SPaul Traina 	case BPF_LD|BPF_LEN:
1222ada6f083SXin LI 		v = F(opt_state, s->code, 0L, 0L);
12238cf6c252SPaul Traina 		vstore(s, &val[A_ATOM], v, alter);
12248cf6c252SPaul Traina 		break;
12258cf6c252SPaul Traina 
12268cf6c252SPaul Traina 	case BPF_LD|BPF_IMM:
12278cf6c252SPaul Traina 		v = K(s->k);
12288cf6c252SPaul Traina 		vstore(s, &val[A_ATOM], v, alter);
12298cf6c252SPaul Traina 		break;
12308cf6c252SPaul Traina 
12318cf6c252SPaul Traina 	case BPF_LDX|BPF_IMM:
12328cf6c252SPaul Traina 		v = K(s->k);
12338cf6c252SPaul Traina 		vstore(s, &val[X_ATOM], v, alter);
12348cf6c252SPaul Traina 		break;
12358cf6c252SPaul Traina 
12368cf6c252SPaul Traina 	case BPF_LDX|BPF_MSH|BPF_B:
1237ada6f083SXin LI 		v = F(opt_state, s->code, s->k, 0L);
12388cf6c252SPaul Traina 		vstore(s, &val[X_ATOM], v, alter);
12398cf6c252SPaul Traina 		break;
12408cf6c252SPaul Traina 
12418cf6c252SPaul Traina 	case BPF_ALU|BPF_NEG:
1242ada6f083SXin LI 		if (alter && opt_state->vmap[val[A_ATOM]].is_const) {
12438cf6c252SPaul Traina 			s->code = BPF_LD|BPF_IMM;
124457e22627SCy Schubert 			/*
124557e22627SCy Schubert 			 * Do this negation as unsigned arithmetic; that's
124657e22627SCy Schubert 			 * what modern BPF engines do, and it guarantees
124757e22627SCy Schubert 			 * that all possible values can be negated.  (Yeah,
124857e22627SCy Schubert 			 * negating 0x80000000, the minimum signed 32-bit
124957e22627SCy Schubert 			 * two's-complement value, results in 0x80000000,
125057e22627SCy Schubert 			 * so it's still negative, but we *should* be doing
125157e22627SCy Schubert 			 * all unsigned arithmetic here, to match what
125257e22627SCy Schubert 			 * modern BPF engines do.)
125357e22627SCy Schubert 			 *
125457e22627SCy Schubert 			 * Express it as 0U - (unsigned value) so that we
125557e22627SCy Schubert 			 * don't get compiler warnings about negating an
125657e22627SCy Schubert 			 * unsigned value and don't get UBSan warnings
125757e22627SCy Schubert 			 * about the result of negating 0x80000000 being
125857e22627SCy Schubert 			 * undefined.
125957e22627SCy Schubert 			 */
12606f9cba8fSJoseph Mingrone 			s->k = 0U - opt_state->vmap[val[A_ATOM]].const_val;
12618cf6c252SPaul Traina 			val[A_ATOM] = K(s->k);
12628cf6c252SPaul Traina 		}
12638cf6c252SPaul Traina 		else
1264ada6f083SXin LI 			val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], 0L);
12658cf6c252SPaul Traina 		break;
12668cf6c252SPaul Traina 
12678cf6c252SPaul Traina 	case BPF_ALU|BPF_ADD|BPF_K:
12688cf6c252SPaul Traina 	case BPF_ALU|BPF_SUB|BPF_K:
12698cf6c252SPaul Traina 	case BPF_ALU|BPF_MUL|BPF_K:
12708cf6c252SPaul Traina 	case BPF_ALU|BPF_DIV|BPF_K:
1271681ed54cSXin LI 	case BPF_ALU|BPF_MOD|BPF_K:
12728cf6c252SPaul Traina 	case BPF_ALU|BPF_AND|BPF_K:
12738cf6c252SPaul Traina 	case BPF_ALU|BPF_OR|BPF_K:
1274681ed54cSXin LI 	case BPF_ALU|BPF_XOR|BPF_K:
12758cf6c252SPaul Traina 	case BPF_ALU|BPF_LSH|BPF_K:
12768cf6c252SPaul Traina 	case BPF_ALU|BPF_RSH|BPF_K:
12778cf6c252SPaul Traina 		op = BPF_OP(s->code);
12788cf6c252SPaul Traina 		if (alter) {
12798cf6c252SPaul Traina 			if (s->k == 0) {
128057e22627SCy Schubert 				/*
128157e22627SCy Schubert 				 * Optimize operations where the constant
128257e22627SCy Schubert 				 * is zero.
128357e22627SCy Schubert 				 *
128457e22627SCy Schubert 				 * Don't optimize away "sub #0"
12850a94d38fSBill Fenner 				 * as it may be needed later to
128657e22627SCy Schubert 				 * fixup the generated math code.
128757e22627SCy Schubert 				 *
128857e22627SCy Schubert 				 * Fail if we're dividing by zero or taking
128957e22627SCy Schubert 				 * a modulus by zero.
129057e22627SCy Schubert 				 */
12910a94d38fSBill Fenner 				if (op == BPF_ADD ||
12928cf6c252SPaul Traina 				    op == BPF_LSH || op == BPF_RSH ||
1293681ed54cSXin LI 				    op == BPF_OR || op == BPF_XOR) {
12948cf6c252SPaul Traina 					s->code = NOP;
12958cf6c252SPaul Traina 					break;
12968cf6c252SPaul Traina 				}
12978cf6c252SPaul Traina 				if (op == BPF_MUL || op == BPF_AND) {
12988cf6c252SPaul Traina 					s->code = BPF_LD|BPF_IMM;
12998cf6c252SPaul Traina 					val[A_ATOM] = K(s->k);
13008cf6c252SPaul Traina 					break;
13018cf6c252SPaul Traina 				}
130257e22627SCy Schubert 				if (op == BPF_DIV)
130357e22627SCy Schubert 					opt_error(opt_state,
130457e22627SCy Schubert 					    "division by zero");
130557e22627SCy Schubert 				if (op == BPF_MOD)
130657e22627SCy Schubert 					opt_error(opt_state,
130757e22627SCy Schubert 					    "modulus by zero");
13088cf6c252SPaul Traina 			}
1309ada6f083SXin LI 			if (opt_state->vmap[val[A_ATOM]].is_const) {
131057e22627SCy Schubert 				fold_op(opt_state, s, val[A_ATOM], K(s->k));
13118cf6c252SPaul Traina 				val[A_ATOM] = K(s->k);
13128cf6c252SPaul Traina 				break;
13138cf6c252SPaul Traina 			}
13148cf6c252SPaul Traina 		}
1315ada6f083SXin LI 		val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], K(s->k));
13168cf6c252SPaul Traina 		break;
13178cf6c252SPaul Traina 
13188cf6c252SPaul Traina 	case BPF_ALU|BPF_ADD|BPF_X:
13198cf6c252SPaul Traina 	case BPF_ALU|BPF_SUB|BPF_X:
13208cf6c252SPaul Traina 	case BPF_ALU|BPF_MUL|BPF_X:
13218cf6c252SPaul Traina 	case BPF_ALU|BPF_DIV|BPF_X:
1322681ed54cSXin LI 	case BPF_ALU|BPF_MOD|BPF_X:
13238cf6c252SPaul Traina 	case BPF_ALU|BPF_AND|BPF_X:
13248cf6c252SPaul Traina 	case BPF_ALU|BPF_OR|BPF_X:
1325681ed54cSXin LI 	case BPF_ALU|BPF_XOR|BPF_X:
13268cf6c252SPaul Traina 	case BPF_ALU|BPF_LSH|BPF_X:
13278cf6c252SPaul Traina 	case BPF_ALU|BPF_RSH|BPF_X:
13288cf6c252SPaul Traina 		op = BPF_OP(s->code);
1329ada6f083SXin LI 		if (alter && opt_state->vmap[val[X_ATOM]].is_const) {
1330ada6f083SXin LI 			if (opt_state->vmap[val[A_ATOM]].is_const) {
133157e22627SCy Schubert 				fold_op(opt_state, s, val[A_ATOM], val[X_ATOM]);
13328cf6c252SPaul Traina 				val[A_ATOM] = K(s->k);
13338cf6c252SPaul Traina 			}
13348cf6c252SPaul Traina 			else {
13358cf6c252SPaul Traina 				s->code = BPF_ALU|BPF_K|op;
1336ada6f083SXin LI 				s->k = opt_state->vmap[val[X_ATOM]].const_val;
133757e22627SCy Schubert 				if ((op == BPF_LSH || op == BPF_RSH) &&
13386f9cba8fSJoseph Mingrone 				    s->k > 31)
133957e22627SCy Schubert 					opt_error(opt_state,
134057e22627SCy Schubert 					    "shift by more than 31 bits");
13416f9cba8fSJoseph Mingrone 				/*
13426f9cba8fSJoseph Mingrone 				 * XXX - optimizer loop detection.
13436f9cba8fSJoseph Mingrone 				 */
13446f9cba8fSJoseph Mingrone 				opt_state->non_branch_movement_performed = 1;
1345ada6f083SXin LI 				opt_state->done = 0;
13468cf6c252SPaul Traina 				val[A_ATOM] =
1347ada6f083SXin LI 					F(opt_state, s->code, val[A_ATOM], K(s->k));
13488cf6c252SPaul Traina 			}
13498cf6c252SPaul Traina 			break;
13508cf6c252SPaul Traina 		}
13518cf6c252SPaul Traina 		/*
13528cf6c252SPaul Traina 		 * Check if we're doing something to an accumulator
13538cf6c252SPaul Traina 		 * that is 0, and simplify.  This may not seem like
13548cf6c252SPaul Traina 		 * much of a simplification but it could open up further
13558cf6c252SPaul Traina 		 * optimizations.
1356feb4ecdbSBruce M Simpson 		 * XXX We could also check for mul by 1, etc.
13578cf6c252SPaul Traina 		 */
1358ada6f083SXin LI 		if (alter && opt_state->vmap[val[A_ATOM]].is_const
1359ada6f083SXin LI 		    && opt_state->vmap[val[A_ATOM]].const_val == 0) {
1360681ed54cSXin LI 			if (op == BPF_ADD || op == BPF_OR || op == BPF_XOR) {
13618cf6c252SPaul Traina 				s->code = BPF_MISC|BPF_TXA;
13628cf6c252SPaul Traina 				vstore(s, &val[A_ATOM], val[X_ATOM], alter);
13638cf6c252SPaul Traina 				break;
13648cf6c252SPaul Traina 			}
1365681ed54cSXin LI 			else if (op == BPF_MUL || op == BPF_DIV || op == BPF_MOD ||
1366feb4ecdbSBruce M Simpson 				 op == BPF_AND || op == BPF_LSH || op == BPF_RSH) {
13678cf6c252SPaul Traina 				s->code = BPF_LD|BPF_IMM;
13688cf6c252SPaul Traina 				s->k = 0;
13698cf6c252SPaul Traina 				vstore(s, &val[A_ATOM], K(s->k), alter);
13708cf6c252SPaul Traina 				break;
13718cf6c252SPaul Traina 			}
13728cf6c252SPaul Traina 			else if (op == BPF_NEG) {
13738cf6c252SPaul Traina 				s->code = NOP;
13748cf6c252SPaul Traina 				break;
13758cf6c252SPaul Traina 			}
13768cf6c252SPaul Traina 		}
1377ada6f083SXin LI 		val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], val[X_ATOM]);
13788cf6c252SPaul Traina 		break;
13798cf6c252SPaul Traina 
13808cf6c252SPaul Traina 	case BPF_MISC|BPF_TXA:
13818cf6c252SPaul Traina 		vstore(s, &val[A_ATOM], val[X_ATOM], alter);
13828cf6c252SPaul Traina 		break;
13838cf6c252SPaul Traina 
13848cf6c252SPaul Traina 	case BPF_LD|BPF_MEM:
13858cf6c252SPaul Traina 		v = val[s->k];
1386ada6f083SXin LI 		if (alter && opt_state->vmap[v].is_const) {
13878cf6c252SPaul Traina 			s->code = BPF_LD|BPF_IMM;
1388ada6f083SXin LI 			s->k = opt_state->vmap[v].const_val;
13896f9cba8fSJoseph Mingrone 			/*
13906f9cba8fSJoseph Mingrone 			 * XXX - optimizer loop detection.
13916f9cba8fSJoseph Mingrone 			 */
13926f9cba8fSJoseph Mingrone 			opt_state->non_branch_movement_performed = 1;
1393ada6f083SXin LI 			opt_state->done = 0;
13948cf6c252SPaul Traina 		}
13958cf6c252SPaul Traina 		vstore(s, &val[A_ATOM], v, alter);
13968cf6c252SPaul Traina 		break;
13978cf6c252SPaul Traina 
13988cf6c252SPaul Traina 	case BPF_MISC|BPF_TAX:
13998cf6c252SPaul Traina 		vstore(s, &val[X_ATOM], val[A_ATOM], alter);
14008cf6c252SPaul Traina 		break;
14018cf6c252SPaul Traina 
14028cf6c252SPaul Traina 	case BPF_LDX|BPF_MEM:
14038cf6c252SPaul Traina 		v = val[s->k];
1404ada6f083SXin LI 		if (alter && opt_state->vmap[v].is_const) {
14058cf6c252SPaul Traina 			s->code = BPF_LDX|BPF_IMM;
1406ada6f083SXin LI 			s->k = opt_state->vmap[v].const_val;
14076f9cba8fSJoseph Mingrone 			/*
14086f9cba8fSJoseph Mingrone 			 * XXX - optimizer loop detection.
14096f9cba8fSJoseph Mingrone 			 */
14106f9cba8fSJoseph Mingrone 			opt_state->non_branch_movement_performed = 1;
1411ada6f083SXin LI 			opt_state->done = 0;
14128cf6c252SPaul Traina 		}
14138cf6c252SPaul Traina 		vstore(s, &val[X_ATOM], v, alter);
14148cf6c252SPaul Traina 		break;
14158cf6c252SPaul Traina 
14168cf6c252SPaul Traina 	case BPF_ST:
14178cf6c252SPaul Traina 		vstore(s, &val[s->k], val[A_ATOM], alter);
14188cf6c252SPaul Traina 		break;
14198cf6c252SPaul Traina 
14208cf6c252SPaul Traina 	case BPF_STX:
14218cf6c252SPaul Traina 		vstore(s, &val[s->k], val[X_ATOM], alter);
14228cf6c252SPaul Traina 		break;
14238cf6c252SPaul Traina 	}
14248cf6c252SPaul Traina }
14258cf6c252SPaul Traina 
14268cf6c252SPaul Traina static void
deadstmt(opt_state_t * opt_state,register struct stmt * s,register struct stmt * last[])1427ada6f083SXin LI deadstmt(opt_state_t *opt_state, register struct stmt *s, register struct stmt *last[])
14288cf6c252SPaul Traina {
14298cf6c252SPaul Traina 	register int atom;
14308cf6c252SPaul Traina 
14318cf6c252SPaul Traina 	atom = atomuse(s);
14328cf6c252SPaul Traina 	if (atom >= 0) {
14338cf6c252SPaul Traina 		if (atom == AX_ATOM) {
14348cf6c252SPaul Traina 			last[X_ATOM] = 0;
14358cf6c252SPaul Traina 			last[A_ATOM] = 0;
14368cf6c252SPaul Traina 		}
14378cf6c252SPaul Traina 		else
14388cf6c252SPaul Traina 			last[atom] = 0;
14398cf6c252SPaul Traina 	}
14408cf6c252SPaul Traina 	atom = atomdef(s);
14418cf6c252SPaul Traina 	if (atom >= 0) {
14428cf6c252SPaul Traina 		if (last[atom]) {
14436f9cba8fSJoseph Mingrone 			/*
14446f9cba8fSJoseph Mingrone 			 * XXX - optimizer loop detection.
14456f9cba8fSJoseph Mingrone 			 */
14466f9cba8fSJoseph Mingrone 			opt_state->non_branch_movement_performed = 1;
1447ada6f083SXin LI 			opt_state->done = 0;
14488cf6c252SPaul Traina 			last[atom]->code = NOP;
14498cf6c252SPaul Traina 		}
14508cf6c252SPaul Traina 		last[atom] = s;
14518cf6c252SPaul Traina 	}
14528cf6c252SPaul Traina }
14538cf6c252SPaul Traina 
14548cf6c252SPaul Traina static void
opt_deadstores(opt_state_t * opt_state,register struct block * b)1455ada6f083SXin LI opt_deadstores(opt_state_t *opt_state, register struct block *b)
14568cf6c252SPaul Traina {
14578cf6c252SPaul Traina 	register struct slist *s;
14588cf6c252SPaul Traina 	register int atom;
14598cf6c252SPaul Traina 	struct stmt *last[N_ATOMS];
14608cf6c252SPaul Traina 
14618cf6c252SPaul Traina 	memset((char *)last, 0, sizeof last);
14628cf6c252SPaul Traina 
14638cf6c252SPaul Traina 	for (s = b->stmts; s != 0; s = s->next)
1464ada6f083SXin LI 		deadstmt(opt_state, &s->s, last);
1465ada6f083SXin LI 	deadstmt(opt_state, &b->s, last);
14668cf6c252SPaul Traina 
14678cf6c252SPaul Traina 	for (atom = 0; atom < N_ATOMS; ++atom)
14688cf6c252SPaul Traina 		if (last[atom] && !ATOMELEM(b->out_use, atom)) {
14698cf6c252SPaul Traina 			last[atom]->code = NOP;
14706f9cba8fSJoseph Mingrone 			/*
14716f9cba8fSJoseph Mingrone 			 * XXX - optimizer loop detection.
14726f9cba8fSJoseph Mingrone 			 */
14736f9cba8fSJoseph Mingrone 			opt_state->non_branch_movement_performed = 1;
1474ada6f083SXin LI 			opt_state->done = 0;
14758cf6c252SPaul Traina 		}
14768cf6c252SPaul Traina }
14778cf6c252SPaul Traina 
14788cf6c252SPaul Traina static void
opt_blk(opt_state_t * opt_state,struct block * b,int do_stmts)147957e22627SCy Schubert opt_blk(opt_state_t *opt_state, struct block *b, int do_stmts)
14808cf6c252SPaul Traina {
14818cf6c252SPaul Traina 	struct slist *s;
14828cf6c252SPaul Traina 	struct edge *p;
14838cf6c252SPaul Traina 	int i;
14846f9cba8fSJoseph Mingrone 	bpf_u_int32 aval, xval;
14858cf6c252SPaul Traina 
14868751327cSBill Fenner #if 0
14878751327cSBill Fenner 	for (s = b->stmts; s && s->next; s = s->next)
14888751327cSBill Fenner 		if (BPF_CLASS(s->s.code) == BPF_JMP) {
14898751327cSBill Fenner 			do_stmts = 0;
14908751327cSBill Fenner 			break;
14918751327cSBill Fenner 		}
14928751327cSBill Fenner #endif
14938751327cSBill Fenner 
14948cf6c252SPaul Traina 	/*
14958cf6c252SPaul Traina 	 * Initialize the atom values.
14968cf6c252SPaul Traina 	 */
14978cf6c252SPaul Traina 	p = b->in_edges;
149804fb2745SSam Leffler 	if (p == 0) {
149904fb2745SSam Leffler 		/*
150004fb2745SSam Leffler 		 * We have no predecessors, so everything is undefined
150104fb2745SSam Leffler 		 * upon entry to this block.
150204fb2745SSam Leffler 		 */
15038cf6c252SPaul Traina 		memset((char *)b->val, 0, sizeof(b->val));
150404fb2745SSam Leffler 	} else {
150504fb2745SSam Leffler 		/*
150604fb2745SSam Leffler 		 * Inherit values from our predecessors.
150704fb2745SSam Leffler 		 *
150804fb2745SSam Leffler 		 * First, get the values from the predecessor along the
150904fb2745SSam Leffler 		 * first edge leading to this node.
151004fb2745SSam Leffler 		 */
15118cf6c252SPaul Traina 		memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
151204fb2745SSam Leffler 		/*
151304fb2745SSam Leffler 		 * Now look at all the other nodes leading to this node.
151404fb2745SSam Leffler 		 * If, for the predecessor along that edge, a register
151504fb2745SSam Leffler 		 * has a different value from the one we have (i.e.,
151604fb2745SSam Leffler 		 * control paths are merging, and the merging paths
151704fb2745SSam Leffler 		 * assign different values to that register), give the
151804fb2745SSam Leffler 		 * register the undefined value of 0.
151904fb2745SSam Leffler 		 */
15208cf6c252SPaul Traina 		while ((p = p->next) != NULL) {
15218cf6c252SPaul Traina 			for (i = 0; i < N_ATOMS; ++i)
15228cf6c252SPaul Traina 				if (b->val[i] != p->pred->val[i])
15238cf6c252SPaul Traina 					b->val[i] = 0;
15248cf6c252SPaul Traina 		}
15258cf6c252SPaul Traina 	}
15268cf6c252SPaul Traina 	aval = b->val[A_ATOM];
152704fb2745SSam Leffler 	xval = b->val[X_ATOM];
15288cf6c252SPaul Traina 	for (s = b->stmts; s; s = s->next)
152957e22627SCy Schubert 		opt_stmt(opt_state, &s->s, b->val, do_stmts);
15308cf6c252SPaul Traina 
15318cf6c252SPaul Traina 	/*
15328cf6c252SPaul Traina 	 * This is a special case: if we don't use anything from this
153304fb2745SSam Leffler 	 * block, and we load the accumulator or index register with a
153404fb2745SSam Leffler 	 * value that is already there, or if this block is a return,
15358cf6c252SPaul Traina 	 * eliminate all the statements.
153604fb2745SSam Leffler 	 *
15376f9cba8fSJoseph Mingrone 	 * XXX - what if it does a store?  Presumably that falls under
15386f9cba8fSJoseph Mingrone 	 * the heading of "if we don't use anything from this block",
15396f9cba8fSJoseph Mingrone 	 * i.e., if we use any memory location set to a different
15406f9cba8fSJoseph Mingrone 	 * value by this block, then we use something from this block.
154104fb2745SSam Leffler 	 *
154204fb2745SSam Leffler 	 * XXX - why does it matter whether we use anything from this
154304fb2745SSam Leffler 	 * block?  If the accumulator or index register doesn't change
154404fb2745SSam Leffler 	 * its value, isn't that OK even if we use that value?
154504fb2745SSam Leffler 	 *
154604fb2745SSam Leffler 	 * XXX - if we load the accumulator with a different value,
154704fb2745SSam Leffler 	 * and the block ends with a conditional branch, we obviously
154804fb2745SSam Leffler 	 * can't eliminate it, as the branch depends on that value.
154904fb2745SSam Leffler 	 * For the index register, the conditional branch only depends
155004fb2745SSam Leffler 	 * on the index register value if the test is against the index
155104fb2745SSam Leffler 	 * register value rather than a constant; if nothing uses the
155204fb2745SSam Leffler 	 * value we put into the index register, and we're not testing
155304fb2745SSam Leffler 	 * against the index register's value, and there aren't any
155404fb2745SSam Leffler 	 * other problems that would keep us from eliminating this
155504fb2745SSam Leffler 	 * block, can we eliminate it?
15568cf6c252SPaul Traina 	 */
15578cf6c252SPaul Traina 	if (do_stmts &&
1558b00ab754SHans Petter Selasky 	    ((b->out_use == 0 &&
1559b00ab754SHans Petter Selasky 	      aval != VAL_UNKNOWN && b->val[A_ATOM] == aval &&
1560b00ab754SHans Petter Selasky 	      xval != VAL_UNKNOWN && b->val[X_ATOM] == xval) ||
15618cf6c252SPaul Traina 	     BPF_CLASS(b->s.code) == BPF_RET)) {
15628cf6c252SPaul Traina 		if (b->stmts != 0) {
15638cf6c252SPaul Traina 			b->stmts = 0;
15646f9cba8fSJoseph Mingrone 			/*
15656f9cba8fSJoseph Mingrone 			 * XXX - optimizer loop detection.
15666f9cba8fSJoseph Mingrone 			 */
15676f9cba8fSJoseph Mingrone 			opt_state->non_branch_movement_performed = 1;
1568ada6f083SXin LI 			opt_state->done = 0;
15698cf6c252SPaul Traina 		}
15708cf6c252SPaul Traina 	} else {
1571ada6f083SXin LI 		opt_peep(opt_state, b);
1572ada6f083SXin LI 		opt_deadstores(opt_state, b);
15738cf6c252SPaul Traina 	}
15748cf6c252SPaul Traina 	/*
15758cf6c252SPaul Traina 	 * Set up values for branch optimizer.
15768cf6c252SPaul Traina 	 */
15778cf6c252SPaul Traina 	if (BPF_SRC(b->s.code) == BPF_K)
15788cf6c252SPaul Traina 		b->oval = K(b->s.k);
15798cf6c252SPaul Traina 	else
15808cf6c252SPaul Traina 		b->oval = b->val[X_ATOM];
15818cf6c252SPaul Traina 	b->et.code = b->s.code;
15828cf6c252SPaul Traina 	b->ef.code = -b->s.code;
15838cf6c252SPaul Traina }
15848cf6c252SPaul Traina 
15858cf6c252SPaul Traina /*
15868cf6c252SPaul Traina  * Return true if any register that is used on exit from 'succ', has
15878cf6c252SPaul Traina  * an exit value that is different from the corresponding exit value
15888cf6c252SPaul Traina  * from 'b'.
15898cf6c252SPaul Traina  */
15908cf6c252SPaul Traina static int
use_conflict(struct block * b,struct block * succ)1591edc89b24SXin LI use_conflict(struct block *b, struct block *succ)
15928cf6c252SPaul Traina {
15938cf6c252SPaul Traina 	int atom;
15948cf6c252SPaul Traina 	atomset use = succ->out_use;
15958cf6c252SPaul Traina 
15968cf6c252SPaul Traina 	if (use == 0)
15978cf6c252SPaul Traina 		return 0;
15988cf6c252SPaul Traina 
15998cf6c252SPaul Traina 	for (atom = 0; atom < N_ATOMS; ++atom)
16008cf6c252SPaul Traina 		if (ATOMELEM(use, atom))
16018cf6c252SPaul Traina 			if (b->val[atom] != succ->val[atom])
16028cf6c252SPaul Traina 				return 1;
16038cf6c252SPaul Traina 	return 0;
16048cf6c252SPaul Traina }
16058cf6c252SPaul Traina 
16066f9cba8fSJoseph Mingrone /*
16076f9cba8fSJoseph Mingrone  * Given a block that is the successor of an edge, and an edge that
16086f9cba8fSJoseph Mingrone  * dominates that edge, return either a pointer to a child of that
16096f9cba8fSJoseph Mingrone  * block (a block to which that block jumps) if that block is a
16106f9cba8fSJoseph Mingrone  * candidate to replace the successor of the latter edge or NULL
16116f9cba8fSJoseph Mingrone  * if neither of the children of the first block are candidates.
16126f9cba8fSJoseph Mingrone  */
16138cf6c252SPaul Traina static struct block *
fold_edge(struct block * child,struct edge * ep)1614edc89b24SXin LI fold_edge(struct block *child, struct edge *ep)
16158cf6c252SPaul Traina {
16168cf6c252SPaul Traina 	int sense;
16176f9cba8fSJoseph Mingrone 	bpf_u_int32 aval0, aval1, oval0, oval1;
16188cf6c252SPaul Traina 	int code = ep->code;
16198cf6c252SPaul Traina 
16208cf6c252SPaul Traina 	if (code < 0) {
16216f9cba8fSJoseph Mingrone 		/*
16226f9cba8fSJoseph Mingrone 		 * This edge is a "branch if false" edge.
16236f9cba8fSJoseph Mingrone 		 */
16248cf6c252SPaul Traina 		code = -code;
16258cf6c252SPaul Traina 		sense = 0;
16266f9cba8fSJoseph Mingrone 	} else {
16276f9cba8fSJoseph Mingrone 		/*
16286f9cba8fSJoseph Mingrone 		 * This edge is a "branch if true" edge.
16296f9cba8fSJoseph Mingrone 		 */
16308cf6c252SPaul Traina 		sense = 1;
16316f9cba8fSJoseph Mingrone 	}
16328cf6c252SPaul Traina 
16336f9cba8fSJoseph Mingrone 	/*
16346f9cba8fSJoseph Mingrone 	 * If the opcode for the branch at the end of the block we
16356f9cba8fSJoseph Mingrone 	 * were handed isn't the same as the opcode for the branch
16366f9cba8fSJoseph Mingrone 	 * to which the edge we were handed corresponds, the tests
16376f9cba8fSJoseph Mingrone 	 * for those branches aren't testing the same conditions,
16386f9cba8fSJoseph Mingrone 	 * so the blocks to which the first block branches aren't
16396f9cba8fSJoseph Mingrone 	 * candidates to replace the successor of the edge.
16406f9cba8fSJoseph Mingrone 	 */
16418cf6c252SPaul Traina 	if (child->s.code != code)
16428cf6c252SPaul Traina 		return 0;
16438cf6c252SPaul Traina 
16448cf6c252SPaul Traina 	aval0 = child->val[A_ATOM];
16458cf6c252SPaul Traina 	oval0 = child->oval;
16468cf6c252SPaul Traina 	aval1 = ep->pred->val[A_ATOM];
16478cf6c252SPaul Traina 	oval1 = ep->pred->oval;
16488cf6c252SPaul Traina 
16496f9cba8fSJoseph Mingrone 	/*
16506f9cba8fSJoseph Mingrone 	 * If the A register value on exit from the successor block
16516f9cba8fSJoseph Mingrone 	 * isn't the same as the A register value on exit from the
16526f9cba8fSJoseph Mingrone 	 * predecessor of the edge, the blocks to which the first
16536f9cba8fSJoseph Mingrone 	 * block branches aren't candidates to replace the successor
16546f9cba8fSJoseph Mingrone 	 * of the edge.
16556f9cba8fSJoseph Mingrone 	 */
16568cf6c252SPaul Traina 	if (aval0 != aval1)
16578cf6c252SPaul Traina 		return 0;
16588cf6c252SPaul Traina 
16598cf6c252SPaul Traina 	if (oval0 == oval1)
16608cf6c252SPaul Traina 		/*
166104fb2745SSam Leffler 		 * The operands of the branch instructions are
16626f9cba8fSJoseph Mingrone 		 * identical, so the branches are testing the
16636f9cba8fSJoseph Mingrone 		 * same condition, and the result is true if a true
166404fb2745SSam Leffler 		 * branch was taken to get here, otherwise false.
16658cf6c252SPaul Traina 		 */
16668cf6c252SPaul Traina 		return sense ? JT(child) : JF(child);
16678cf6c252SPaul Traina 
16688cf6c252SPaul Traina 	if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
16698cf6c252SPaul Traina 		/*
16708cf6c252SPaul Traina 		 * At this point, we only know the comparison if we
16718cf6c252SPaul Traina 		 * came down the true branch, and it was an equality
167204fb2745SSam Leffler 		 * comparison with a constant.
167304fb2745SSam Leffler 		 *
167404fb2745SSam Leffler 		 * I.e., if we came down the true branch, and the branch
167504fb2745SSam Leffler 		 * was an equality comparison with a constant, we know the
167604fb2745SSam Leffler 		 * accumulator contains that constant.  If we came down
167704fb2745SSam Leffler 		 * the false branch, or the comparison wasn't with a
167804fb2745SSam Leffler 		 * constant, we don't know what was in the accumulator.
167904fb2745SSam Leffler 		 *
168004fb2745SSam Leffler 		 * We rely on the fact that distinct constants have distinct
168104fb2745SSam Leffler 		 * value numbers.
16828cf6c252SPaul Traina 		 */
16838cf6c252SPaul Traina 		return JF(child);
16848cf6c252SPaul Traina 
16858cf6c252SPaul Traina 	return 0;
16868cf6c252SPaul Traina }
16878cf6c252SPaul Traina 
16886f9cba8fSJoseph Mingrone /*
16896f9cba8fSJoseph Mingrone  * If we can make this edge go directly to a child of the edge's current
16906f9cba8fSJoseph Mingrone  * successor, do so.
16916f9cba8fSJoseph Mingrone  */
16928cf6c252SPaul Traina static void
opt_j(opt_state_t * opt_state,struct edge * ep)1693ada6f083SXin LI opt_j(opt_state_t *opt_state, struct edge *ep)
16948cf6c252SPaul Traina {
16956f9cba8fSJoseph Mingrone 	register u_int i, k;
16968cf6c252SPaul Traina 	register struct block *target;
16978cf6c252SPaul Traina 
16986f9cba8fSJoseph Mingrone 	/*
16996f9cba8fSJoseph Mingrone 	 * Does this edge go to a block where, if the test
17006f9cba8fSJoseph Mingrone 	 * at the end of it succeeds, it goes to a block
17016f9cba8fSJoseph Mingrone 	 * that's a leaf node of the DAG, i.e. a return
17026f9cba8fSJoseph Mingrone 	 * statement?
17036f9cba8fSJoseph Mingrone 	 * If so, there's nothing to optimize.
17046f9cba8fSJoseph Mingrone 	 */
17058cf6c252SPaul Traina 	if (JT(ep->succ) == 0)
17068cf6c252SPaul Traina 		return;
17078cf6c252SPaul Traina 
17086f9cba8fSJoseph Mingrone 	/*
17096f9cba8fSJoseph Mingrone 	 * Does this edge go to a block that goes, in turn, to
17106f9cba8fSJoseph Mingrone 	 * the same block regardless of whether the test at the
17116f9cba8fSJoseph Mingrone 	 * end succeeds or fails?
17126f9cba8fSJoseph Mingrone 	 */
17138cf6c252SPaul Traina 	if (JT(ep->succ) == JF(ep->succ)) {
17148cf6c252SPaul Traina 		/*
17158cf6c252SPaul Traina 		 * Common branch targets can be eliminated, provided
17168cf6c252SPaul Traina 		 * there is no data dependency.
17176f9cba8fSJoseph Mingrone 		 *
17186f9cba8fSJoseph Mingrone 		 * Check whether any register used on exit from the
17196f9cba8fSJoseph Mingrone 		 * block to which the successor of this edge goes
17206f9cba8fSJoseph Mingrone 		 * has a value at that point that's different from
17216f9cba8fSJoseph Mingrone 		 * the value it has on exit from the predecessor of
17226f9cba8fSJoseph Mingrone 		 * this edge.  If not, the predecessor of this edge
17236f9cba8fSJoseph Mingrone 		 * can just go to the block to which the successor
17246f9cba8fSJoseph Mingrone 		 * of this edge goes, bypassing the successor of this
17256f9cba8fSJoseph Mingrone 		 * edge, as the successor of this edge isn't doing
17266f9cba8fSJoseph Mingrone 		 * any calculations whose results are different
17276f9cba8fSJoseph Mingrone 		 * from what the blocks before it did and isn't
17286f9cba8fSJoseph Mingrone 		 * doing any tests the results of which matter.
17298cf6c252SPaul Traina 		 */
17306f9cba8fSJoseph Mingrone 		if (!use_conflict(ep->pred, JT(ep->succ))) {
17316f9cba8fSJoseph Mingrone 			/*
17326f9cba8fSJoseph Mingrone 			 * No, there isn't.
17336f9cba8fSJoseph Mingrone 			 * Make this edge go to the block to
17346f9cba8fSJoseph Mingrone 			 * which the successor of that edge
17356f9cba8fSJoseph Mingrone 			 * goes.
17366f9cba8fSJoseph Mingrone 			 *
17376f9cba8fSJoseph Mingrone 			 * XXX - optimizer loop detection.
17386f9cba8fSJoseph Mingrone 			 */
17396f9cba8fSJoseph Mingrone 			opt_state->non_branch_movement_performed = 1;
1740ada6f083SXin LI 			opt_state->done = 0;
17418cf6c252SPaul Traina 			ep->succ = JT(ep->succ);
17428cf6c252SPaul Traina 		}
17438cf6c252SPaul Traina 	}
17448cf6c252SPaul Traina 	/*
17458cf6c252SPaul Traina 	 * For each edge dominator that matches the successor of this
17468cf6c252SPaul Traina 	 * edge, promote the edge successor to the its grandchild.
17478cf6c252SPaul Traina 	 *
17488cf6c252SPaul Traina 	 * XXX We violate the set abstraction here in favor a reasonably
17498cf6c252SPaul Traina 	 * efficient loop.
17508cf6c252SPaul Traina 	 */
17518cf6c252SPaul Traina  top:
1752ada6f083SXin LI 	for (i = 0; i < opt_state->edgewords; ++i) {
17536f9cba8fSJoseph Mingrone 		/* i'th word in the bitset of dominators */
17548cf6c252SPaul Traina 		register bpf_u_int32 x = ep->edom[i];
17558cf6c252SPaul Traina 
17568cf6c252SPaul Traina 		while (x != 0) {
17576f9cba8fSJoseph Mingrone 			/* Find the next dominator in that word and mark it as found */
1758b00ab754SHans Petter Selasky 			k = lowest_set_bit(x);
175957e22627SCy Schubert 			x &=~ ((bpf_u_int32)1 << k);
17608cf6c252SPaul Traina 			k += i * BITS_PER_WORD;
17618cf6c252SPaul Traina 
1762ada6f083SXin LI 			target = fold_edge(ep->succ, opt_state->edges[k]);
17638cf6c252SPaul Traina 			/*
17646f9cba8fSJoseph Mingrone 			 * We have a candidate to replace the successor
17656f9cba8fSJoseph Mingrone 			 * of ep.
17666f9cba8fSJoseph Mingrone 			 *
17678cf6c252SPaul Traina 			 * Check that there is no data dependency between
17686f9cba8fSJoseph Mingrone 			 * nodes that will be violated if we move the edge;
17696f9cba8fSJoseph Mingrone 			 * i.e., if any register used on exit from the
17706f9cba8fSJoseph Mingrone 			 * candidate has a value at that point different
17716f9cba8fSJoseph Mingrone 			 * from the value it has when we exit the
17726f9cba8fSJoseph Mingrone 			 * predecessor of that edge, there's a data
17736f9cba8fSJoseph Mingrone 			 * dependency that will be violated.
17748cf6c252SPaul Traina 			 */
17758cf6c252SPaul Traina 			if (target != 0 && !use_conflict(ep->pred, target)) {
17766f9cba8fSJoseph Mingrone 				/*
17776f9cba8fSJoseph Mingrone 				 * It's safe to replace the successor of
17786f9cba8fSJoseph Mingrone 				 * ep; do so, and note that we've made
17796f9cba8fSJoseph Mingrone 				 * at least one change.
17806f9cba8fSJoseph Mingrone 				 *
17816f9cba8fSJoseph Mingrone 				 * XXX - this is one of the operations that
17826f9cba8fSJoseph Mingrone 				 * happens when the optimizer gets into
17836f9cba8fSJoseph Mingrone 				 * one of those infinite loops.
17846f9cba8fSJoseph Mingrone 				 */
1785ada6f083SXin LI 				opt_state->done = 0;
17868cf6c252SPaul Traina 				ep->succ = target;
17878cf6c252SPaul Traina 				if (JT(target) != 0)
17888cf6c252SPaul Traina 					/*
17898cf6c252SPaul Traina 					 * Start over unless we hit a leaf.
17908cf6c252SPaul Traina 					 */
17918cf6c252SPaul Traina 					goto top;
17928cf6c252SPaul Traina 				return;
17938cf6c252SPaul Traina 			}
17948cf6c252SPaul Traina 		}
17958cf6c252SPaul Traina 	}
17968cf6c252SPaul Traina }
17978cf6c252SPaul Traina 
17986f9cba8fSJoseph Mingrone /*
17996f9cba8fSJoseph Mingrone  * XXX - is this, and and_pullup(), what's described in section 6.1.2
18006f9cba8fSJoseph Mingrone  * "Predicate Assertion Propagation" in the BPF+ paper?
18016f9cba8fSJoseph Mingrone  *
18026f9cba8fSJoseph Mingrone  * Note that this looks at block dominators, not edge dominators.
18036f9cba8fSJoseph Mingrone  * Don't think so.
18046f9cba8fSJoseph Mingrone  *
18056f9cba8fSJoseph Mingrone  * "A or B" compiles into
18066f9cba8fSJoseph Mingrone  *
18076f9cba8fSJoseph Mingrone  *          A
18086f9cba8fSJoseph Mingrone  *       t / \ f
18096f9cba8fSJoseph Mingrone  *        /   B
18106f9cba8fSJoseph Mingrone  *       / t / \ f
18116f9cba8fSJoseph Mingrone  *      \   /
18126f9cba8fSJoseph Mingrone  *       \ /
18136f9cba8fSJoseph Mingrone  *        X
18146f9cba8fSJoseph Mingrone  *
18156f9cba8fSJoseph Mingrone  *
18166f9cba8fSJoseph Mingrone  */
18178cf6c252SPaul Traina static void
or_pullup(opt_state_t * opt_state,struct block * b)1818ada6f083SXin LI or_pullup(opt_state_t *opt_state, struct block *b)
18198cf6c252SPaul Traina {
18206f9cba8fSJoseph Mingrone 	bpf_u_int32 val;
18216f9cba8fSJoseph Mingrone 	int at_top;
18228cf6c252SPaul Traina 	struct block *pull;
18238cf6c252SPaul Traina 	struct block **diffp, **samep;
18248cf6c252SPaul Traina 	struct edge *ep;
18258cf6c252SPaul Traina 
18268cf6c252SPaul Traina 	ep = b->in_edges;
18278cf6c252SPaul Traina 	if (ep == 0)
18288cf6c252SPaul Traina 		return;
18298cf6c252SPaul Traina 
18308cf6c252SPaul Traina 	/*
18318cf6c252SPaul Traina 	 * Make sure each predecessor loads the same value.
18328cf6c252SPaul Traina 	 * XXX why?
18338cf6c252SPaul Traina 	 */
18348cf6c252SPaul Traina 	val = ep->pred->val[A_ATOM];
18358cf6c252SPaul Traina 	for (ep = ep->next; ep != 0; ep = ep->next)
18368cf6c252SPaul Traina 		if (val != ep->pred->val[A_ATOM])
18378cf6c252SPaul Traina 			return;
18388cf6c252SPaul Traina 
18396f9cba8fSJoseph Mingrone 	/*
18406f9cba8fSJoseph Mingrone 	 * For the first edge in the list of edges coming into this block,
18416f9cba8fSJoseph Mingrone 	 * see whether the predecessor of that edge comes here via a true
18426f9cba8fSJoseph Mingrone 	 * branch or a false branch.
18436f9cba8fSJoseph Mingrone 	 */
18448cf6c252SPaul Traina 	if (JT(b->in_edges->pred) == b)
18456f9cba8fSJoseph Mingrone 		diffp = &JT(b->in_edges->pred);	/* jt */
18468cf6c252SPaul Traina 	else
18476f9cba8fSJoseph Mingrone 		diffp = &JF(b->in_edges->pred);	/* jf */
18488cf6c252SPaul Traina 
18496f9cba8fSJoseph Mingrone 	/*
18506f9cba8fSJoseph Mingrone 	 * diffp is a pointer to a pointer to the block.
18516f9cba8fSJoseph Mingrone 	 *
18526f9cba8fSJoseph Mingrone 	 * Go down the false chain looking as far as you can,
18536f9cba8fSJoseph Mingrone 	 * making sure that each jump-compare is doing the
18546f9cba8fSJoseph Mingrone 	 * same as the original block.
18556f9cba8fSJoseph Mingrone 	 *
18566f9cba8fSJoseph Mingrone 	 * If you reach the bottom before you reach a
18576f9cba8fSJoseph Mingrone 	 * different jump-compare, just exit.  There's nothing
18586f9cba8fSJoseph Mingrone 	 * to do here.  XXX - no, this version is checking for
18596f9cba8fSJoseph Mingrone 	 * the value leaving the block; that's from the BPF+
18606f9cba8fSJoseph Mingrone 	 * pullup routine.
18616f9cba8fSJoseph Mingrone 	 */
18628cf6c252SPaul Traina 	at_top = 1;
1863b00ab754SHans Petter Selasky 	for (;;) {
18646f9cba8fSJoseph Mingrone 		/*
18656f9cba8fSJoseph Mingrone 		 * Done if that's not going anywhere XXX
18666f9cba8fSJoseph Mingrone 		 */
18678cf6c252SPaul Traina 		if (*diffp == 0)
18688cf6c252SPaul Traina 			return;
18698cf6c252SPaul Traina 
18706f9cba8fSJoseph Mingrone 		/*
18716f9cba8fSJoseph Mingrone 		 * Done if that predecessor blah blah blah isn't
18726f9cba8fSJoseph Mingrone 		 * going the same place we're going XXX
18736f9cba8fSJoseph Mingrone 		 *
18746f9cba8fSJoseph Mingrone 		 * Does the true edge of this block point to the same
18756f9cba8fSJoseph Mingrone 		 * location as the true edge of b?
18766f9cba8fSJoseph Mingrone 		 */
18778cf6c252SPaul Traina 		if (JT(*diffp) != JT(b))
18788cf6c252SPaul Traina 			return;
18798cf6c252SPaul Traina 
18806f9cba8fSJoseph Mingrone 		/*
18816f9cba8fSJoseph Mingrone 		 * Done if this node isn't a dominator of that
18826f9cba8fSJoseph Mingrone 		 * node blah blah blah XXX
18836f9cba8fSJoseph Mingrone 		 *
18846f9cba8fSJoseph Mingrone 		 * Does b dominate diffp?
18856f9cba8fSJoseph Mingrone 		 */
18868cf6c252SPaul Traina 		if (!SET_MEMBER((*diffp)->dom, b->id))
18878cf6c252SPaul Traina 			return;
18888cf6c252SPaul Traina 
18896f9cba8fSJoseph Mingrone 		/*
18906f9cba8fSJoseph Mingrone 		 * Break out of the loop if that node's value of A
18916f9cba8fSJoseph Mingrone 		 * isn't the value of A above XXX
18926f9cba8fSJoseph Mingrone 		 */
18938cf6c252SPaul Traina 		if ((*diffp)->val[A_ATOM] != val)
18948cf6c252SPaul Traina 			break;
18958cf6c252SPaul Traina 
18966f9cba8fSJoseph Mingrone 		/*
18976f9cba8fSJoseph Mingrone 		 * Get the JF for that node XXX
18986f9cba8fSJoseph Mingrone 		 * Go down the false path.
18996f9cba8fSJoseph Mingrone 		 */
19008cf6c252SPaul Traina 		diffp = &JF(*diffp);
19018cf6c252SPaul Traina 		at_top = 0;
19028cf6c252SPaul Traina 	}
19036f9cba8fSJoseph Mingrone 
19046f9cba8fSJoseph Mingrone 	/*
19056f9cba8fSJoseph Mingrone 	 * Now that we've found a different jump-compare in a chain
19066f9cba8fSJoseph Mingrone 	 * below b, search further down until we find another
19076f9cba8fSJoseph Mingrone 	 * jump-compare that looks at the original value.  This
19086f9cba8fSJoseph Mingrone 	 * jump-compare should get pulled up.  XXX again we're
19096f9cba8fSJoseph Mingrone 	 * comparing values not jump-compares.
19106f9cba8fSJoseph Mingrone 	 */
19118cf6c252SPaul Traina 	samep = &JF(*diffp);
1912b00ab754SHans Petter Selasky 	for (;;) {
19136f9cba8fSJoseph Mingrone 		/*
19146f9cba8fSJoseph Mingrone 		 * Done if that's not going anywhere XXX
19156f9cba8fSJoseph Mingrone 		 */
19168cf6c252SPaul Traina 		if (*samep == 0)
19178cf6c252SPaul Traina 			return;
19188cf6c252SPaul Traina 
19196f9cba8fSJoseph Mingrone 		/*
19206f9cba8fSJoseph Mingrone 		 * Done if that predecessor blah blah blah isn't
19216f9cba8fSJoseph Mingrone 		 * going the same place we're going XXX
19226f9cba8fSJoseph Mingrone 		 */
19238cf6c252SPaul Traina 		if (JT(*samep) != JT(b))
19248cf6c252SPaul Traina 			return;
19258cf6c252SPaul Traina 
19266f9cba8fSJoseph Mingrone 		/*
19276f9cba8fSJoseph Mingrone 		 * Done if this node isn't a dominator of that
19286f9cba8fSJoseph Mingrone 		 * node blah blah blah XXX
19296f9cba8fSJoseph Mingrone 		 *
19306f9cba8fSJoseph Mingrone 		 * Does b dominate samep?
19316f9cba8fSJoseph Mingrone 		 */
19328cf6c252SPaul Traina 		if (!SET_MEMBER((*samep)->dom, b->id))
19338cf6c252SPaul Traina 			return;
19348cf6c252SPaul Traina 
19356f9cba8fSJoseph Mingrone 		/*
19366f9cba8fSJoseph Mingrone 		 * Break out of the loop if that node's value of A
19376f9cba8fSJoseph Mingrone 		 * is the value of A above XXX
19386f9cba8fSJoseph Mingrone 		 */
19398cf6c252SPaul Traina 		if ((*samep)->val[A_ATOM] == val)
19408cf6c252SPaul Traina 			break;
19418cf6c252SPaul Traina 
19428cf6c252SPaul Traina 		/* XXX Need to check that there are no data dependencies
19438cf6c252SPaul Traina 		   between dp0 and dp1.  Currently, the code generator
19448cf6c252SPaul Traina 		   will not produce such dependencies. */
19458cf6c252SPaul Traina 		samep = &JF(*samep);
19468cf6c252SPaul Traina 	}
19478cf6c252SPaul Traina #ifdef notdef
19488cf6c252SPaul Traina 	/* XXX This doesn't cover everything. */
19498cf6c252SPaul Traina 	for (i = 0; i < N_ATOMS; ++i)
19508cf6c252SPaul Traina 		if ((*samep)->val[i] != pred->val[i])
19518cf6c252SPaul Traina 			return;
19528cf6c252SPaul Traina #endif
19538cf6c252SPaul Traina 	/* Pull up the node. */
19548cf6c252SPaul Traina 	pull = *samep;
19558cf6c252SPaul Traina 	*samep = JF(pull);
19568cf6c252SPaul Traina 	JF(pull) = *diffp;
19578cf6c252SPaul Traina 
19588cf6c252SPaul Traina 	/*
19598cf6c252SPaul Traina 	 * At the top of the chain, each predecessor needs to point at the
19608cf6c252SPaul Traina 	 * pulled up node.  Inside the chain, there is only one predecessor
19618cf6c252SPaul Traina 	 * to worry about.
19628cf6c252SPaul Traina 	 */
19638cf6c252SPaul Traina 	if (at_top) {
19648cf6c252SPaul Traina 		for (ep = b->in_edges; ep != 0; ep = ep->next) {
19658cf6c252SPaul Traina 			if (JT(ep->pred) == b)
19668cf6c252SPaul Traina 				JT(ep->pred) = pull;
19678cf6c252SPaul Traina 			else
19688cf6c252SPaul Traina 				JF(ep->pred) = pull;
19698cf6c252SPaul Traina 		}
19708cf6c252SPaul Traina 	}
19718cf6c252SPaul Traina 	else
19728cf6c252SPaul Traina 		*diffp = pull;
19738cf6c252SPaul Traina 
19746f9cba8fSJoseph Mingrone 	/*
19756f9cba8fSJoseph Mingrone 	 * XXX - this is one of the operations that happens when the
19766f9cba8fSJoseph Mingrone 	 * optimizer gets into one of those infinite loops.
19776f9cba8fSJoseph Mingrone 	 */
1978ada6f083SXin LI 	opt_state->done = 0;
19798cf6c252SPaul Traina }
19808cf6c252SPaul Traina 
19818cf6c252SPaul Traina static void
and_pullup(opt_state_t * opt_state,struct block * b)1982ada6f083SXin LI and_pullup(opt_state_t *opt_state, struct block *b)
19838cf6c252SPaul Traina {
19846f9cba8fSJoseph Mingrone 	bpf_u_int32 val;
19856f9cba8fSJoseph Mingrone 	int at_top;
19868cf6c252SPaul Traina 	struct block *pull;
19878cf6c252SPaul Traina 	struct block **diffp, **samep;
19888cf6c252SPaul Traina 	struct edge *ep;
19898cf6c252SPaul Traina 
19908cf6c252SPaul Traina 	ep = b->in_edges;
19918cf6c252SPaul Traina 	if (ep == 0)
19928cf6c252SPaul Traina 		return;
19938cf6c252SPaul Traina 
19948cf6c252SPaul Traina 	/*
19958cf6c252SPaul Traina 	 * Make sure each predecessor loads the same value.
19968cf6c252SPaul Traina 	 */
19978cf6c252SPaul Traina 	val = ep->pred->val[A_ATOM];
19988cf6c252SPaul Traina 	for (ep = ep->next; ep != 0; ep = ep->next)
19998cf6c252SPaul Traina 		if (val != ep->pred->val[A_ATOM])
20008cf6c252SPaul Traina 			return;
20018cf6c252SPaul Traina 
20028cf6c252SPaul Traina 	if (JT(b->in_edges->pred) == b)
20038cf6c252SPaul Traina 		diffp = &JT(b->in_edges->pred);
20048cf6c252SPaul Traina 	else
20058cf6c252SPaul Traina 		diffp = &JF(b->in_edges->pred);
20068cf6c252SPaul Traina 
20078cf6c252SPaul Traina 	at_top = 1;
2008b00ab754SHans Petter Selasky 	for (;;) {
20098cf6c252SPaul Traina 		if (*diffp == 0)
20108cf6c252SPaul Traina 			return;
20118cf6c252SPaul Traina 
20128cf6c252SPaul Traina 		if (JF(*diffp) != JF(b))
20138cf6c252SPaul Traina 			return;
20148cf6c252SPaul Traina 
20158cf6c252SPaul Traina 		if (!SET_MEMBER((*diffp)->dom, b->id))
20168cf6c252SPaul Traina 			return;
20178cf6c252SPaul Traina 
20188cf6c252SPaul Traina 		if ((*diffp)->val[A_ATOM] != val)
20198cf6c252SPaul Traina 			break;
20208cf6c252SPaul Traina 
20218cf6c252SPaul Traina 		diffp = &JT(*diffp);
20228cf6c252SPaul Traina 		at_top = 0;
20238cf6c252SPaul Traina 	}
20248cf6c252SPaul Traina 	samep = &JT(*diffp);
2025b00ab754SHans Petter Selasky 	for (;;) {
20268cf6c252SPaul Traina 		if (*samep == 0)
20278cf6c252SPaul Traina 			return;
20288cf6c252SPaul Traina 
20298cf6c252SPaul Traina 		if (JF(*samep) != JF(b))
20308cf6c252SPaul Traina 			return;
20318cf6c252SPaul Traina 
20328cf6c252SPaul Traina 		if (!SET_MEMBER((*samep)->dom, b->id))
20338cf6c252SPaul Traina 			return;
20348cf6c252SPaul Traina 
20358cf6c252SPaul Traina 		if ((*samep)->val[A_ATOM] == val)
20368cf6c252SPaul Traina 			break;
20378cf6c252SPaul Traina 
20388cf6c252SPaul Traina 		/* XXX Need to check that there are no data dependencies
20398cf6c252SPaul Traina 		   between diffp and samep.  Currently, the code generator
20408cf6c252SPaul Traina 		   will not produce such dependencies. */
20418cf6c252SPaul Traina 		samep = &JT(*samep);
20428cf6c252SPaul Traina 	}
20438cf6c252SPaul Traina #ifdef notdef
20448cf6c252SPaul Traina 	/* XXX This doesn't cover everything. */
20458cf6c252SPaul Traina 	for (i = 0; i < N_ATOMS; ++i)
20468cf6c252SPaul Traina 		if ((*samep)->val[i] != pred->val[i])
20478cf6c252SPaul Traina 			return;
20488cf6c252SPaul Traina #endif
20498cf6c252SPaul Traina 	/* Pull up the node. */
20508cf6c252SPaul Traina 	pull = *samep;
20518cf6c252SPaul Traina 	*samep = JT(pull);
20528cf6c252SPaul Traina 	JT(pull) = *diffp;
20538cf6c252SPaul Traina 
20548cf6c252SPaul Traina 	/*
20558cf6c252SPaul Traina 	 * At the top of the chain, each predecessor needs to point at the
20568cf6c252SPaul Traina 	 * pulled up node.  Inside the chain, there is only one predecessor
20578cf6c252SPaul Traina 	 * to worry about.
20588cf6c252SPaul Traina 	 */
20598cf6c252SPaul Traina 	if (at_top) {
20608cf6c252SPaul Traina 		for (ep = b->in_edges; ep != 0; ep = ep->next) {
20618cf6c252SPaul Traina 			if (JT(ep->pred) == b)
20628cf6c252SPaul Traina 				JT(ep->pred) = pull;
20638cf6c252SPaul Traina 			else
20648cf6c252SPaul Traina 				JF(ep->pred) = pull;
20658cf6c252SPaul Traina 		}
20668cf6c252SPaul Traina 	}
20678cf6c252SPaul Traina 	else
20688cf6c252SPaul Traina 		*diffp = pull;
20698cf6c252SPaul Traina 
20706f9cba8fSJoseph Mingrone 	/*
20716f9cba8fSJoseph Mingrone 	 * XXX - this is one of the operations that happens when the
20726f9cba8fSJoseph Mingrone 	 * optimizer gets into one of those infinite loops.
20736f9cba8fSJoseph Mingrone 	 */
2074ada6f083SXin LI 	opt_state->done = 0;
20758cf6c252SPaul Traina }
20768cf6c252SPaul Traina 
20778cf6c252SPaul Traina static void
opt_blks(opt_state_t * opt_state,struct icode * ic,int do_stmts)207857e22627SCy Schubert opt_blks(opt_state_t *opt_state, struct icode *ic, int do_stmts)
20798cf6c252SPaul Traina {
20808cf6c252SPaul Traina 	int i, maxlevel;
20818cf6c252SPaul Traina 	struct block *p;
20828cf6c252SPaul Traina 
2083ada6f083SXin LI 	init_val(opt_state);
2084ada6f083SXin LI 	maxlevel = ic->root->level;
2085dc2c7305SBill Fenner 
2086ada6f083SXin LI 	find_inedges(opt_state, ic->root);
20878cf6c252SPaul Traina 	for (i = maxlevel; i >= 0; --i)
2088ada6f083SXin LI 		for (p = opt_state->levels[i]; p; p = p->link)
208957e22627SCy Schubert 			opt_blk(opt_state, p, do_stmts);
20908cf6c252SPaul Traina 
20918cf6c252SPaul Traina 	if (do_stmts)
20928cf6c252SPaul Traina 		/*
20938cf6c252SPaul Traina 		 * No point trying to move branches; it can't possibly
20948cf6c252SPaul Traina 		 * make a difference at this point.
20956f9cba8fSJoseph Mingrone 		 *
20966f9cba8fSJoseph Mingrone 		 * XXX - this might be after we detect a loop where
20976f9cba8fSJoseph Mingrone 		 * we were just looping infinitely moving branches
20986f9cba8fSJoseph Mingrone 		 * in such a fashion that we went through two or more
20996f9cba8fSJoseph Mingrone 		 * versions of the machine code, eventually returning
21006f9cba8fSJoseph Mingrone 		 * to the first version.  (We're really not doing a
21016f9cba8fSJoseph Mingrone 		 * full loop detection, we're just testing for two
21026f9cba8fSJoseph Mingrone 		 * passes in a row where we do nothing but
21036f9cba8fSJoseph Mingrone 		 * move branches.)
21048cf6c252SPaul Traina 		 */
21058cf6c252SPaul Traina 		return;
21068cf6c252SPaul Traina 
21076f9cba8fSJoseph Mingrone 	/*
21086f9cba8fSJoseph Mingrone 	 * Is this what the BPF+ paper describes in sections 6.1.1,
21096f9cba8fSJoseph Mingrone 	 * 6.1.2, and 6.1.3?
21106f9cba8fSJoseph Mingrone 	 */
21118cf6c252SPaul Traina 	for (i = 1; i <= maxlevel; ++i) {
2112ada6f083SXin LI 		for (p = opt_state->levels[i]; p; p = p->link) {
2113ada6f083SXin LI 			opt_j(opt_state, &p->et);
2114ada6f083SXin LI 			opt_j(opt_state, &p->ef);
21158cf6c252SPaul Traina 		}
21168cf6c252SPaul Traina 	}
2117dc2c7305SBill Fenner 
2118ada6f083SXin LI 	find_inedges(opt_state, ic->root);
21198cf6c252SPaul Traina 	for (i = 1; i <= maxlevel; ++i) {
2120ada6f083SXin LI 		for (p = opt_state->levels[i]; p; p = p->link) {
2121ada6f083SXin LI 			or_pullup(opt_state, p);
2122ada6f083SXin LI 			and_pullup(opt_state, p);
21238cf6c252SPaul Traina 		}
21248cf6c252SPaul Traina 	}
21258cf6c252SPaul Traina }
21268cf6c252SPaul Traina 
21278cf6c252SPaul Traina static inline void
link_inedge(struct edge * parent,struct block * child)2128edc89b24SXin LI link_inedge(struct edge *parent, struct block *child)
21298cf6c252SPaul Traina {
21308cf6c252SPaul Traina 	parent->next = child->in_edges;
21318cf6c252SPaul Traina 	child->in_edges = parent;
21328cf6c252SPaul Traina }
21338cf6c252SPaul Traina 
21348cf6c252SPaul Traina static void
find_inedges(opt_state_t * opt_state,struct block * root)2135ada6f083SXin LI find_inedges(opt_state_t *opt_state, struct block *root)
21368cf6c252SPaul Traina {
21376f9cba8fSJoseph Mingrone 	u_int i;
21386f9cba8fSJoseph Mingrone 	int level;
21398cf6c252SPaul Traina 	struct block *b;
21408cf6c252SPaul Traina 
2141ada6f083SXin LI 	for (i = 0; i < opt_state->n_blocks; ++i)
2142ada6f083SXin LI 		opt_state->blocks[i]->in_edges = 0;
21438cf6c252SPaul Traina 
21448cf6c252SPaul Traina 	/*
21458cf6c252SPaul Traina 	 * Traverse the graph, adding each edge to the predecessor
21468cf6c252SPaul Traina 	 * list of its successors.  Skip the leaves (i.e. level 0).
21478cf6c252SPaul Traina 	 */
21486f9cba8fSJoseph Mingrone 	for (level = root->level; level > 0; --level) {
21496f9cba8fSJoseph Mingrone 		for (b = opt_state->levels[level]; b != 0; b = b->link) {
21508cf6c252SPaul Traina 			link_inedge(&b->et, JT(b));
21518cf6c252SPaul Traina 			link_inedge(&b->ef, JF(b));
21528cf6c252SPaul Traina 		}
21538cf6c252SPaul Traina 	}
21548cf6c252SPaul Traina }
21558cf6c252SPaul Traina 
21568cf6c252SPaul Traina static void
opt_root(struct block ** b)2157edc89b24SXin LI opt_root(struct block **b)
21588cf6c252SPaul Traina {
21598cf6c252SPaul Traina 	struct slist *tmp, *s;
21608cf6c252SPaul Traina 
21618cf6c252SPaul Traina 	s = (*b)->stmts;
21628cf6c252SPaul Traina 	(*b)->stmts = 0;
21638cf6c252SPaul Traina 	while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
21648cf6c252SPaul Traina 		*b = JT(*b);
21658cf6c252SPaul Traina 
21668cf6c252SPaul Traina 	tmp = (*b)->stmts;
21678cf6c252SPaul Traina 	if (tmp != 0)
21688cf6c252SPaul Traina 		sappend(s, tmp);
21698cf6c252SPaul Traina 	(*b)->stmts = s;
21708cf6c252SPaul Traina 
21718cf6c252SPaul Traina 	/*
21728cf6c252SPaul Traina 	 * If the root node is a return, then there is no
21738cf6c252SPaul Traina 	 * point executing any statements (since the bpf machine
21748cf6c252SPaul Traina 	 * has no side effects).
21758cf6c252SPaul Traina 	 */
21768cf6c252SPaul Traina 	if (BPF_CLASS((*b)->s.code) == BPF_RET)
21778cf6c252SPaul Traina 		(*b)->stmts = 0;
21788cf6c252SPaul Traina }
21798cf6c252SPaul Traina 
21808cf6c252SPaul Traina static void
opt_loop(opt_state_t * opt_state,struct icode * ic,int do_stmts)218157e22627SCy Schubert opt_loop(opt_state_t *opt_state, struct icode *ic, int do_stmts)
21828cf6c252SPaul Traina {
21838cf6c252SPaul Traina 
21848cf6c252SPaul Traina #ifdef BDEBUG
2185b00ab754SHans Petter Selasky 	if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
21860a94d38fSBill Fenner 		printf("opt_loop(root, %d) begin\n", do_stmts);
218757e22627SCy Schubert 		opt_dump(opt_state, ic);
21880a94d38fSBill Fenner 	}
21898cf6c252SPaul Traina #endif
21906f9cba8fSJoseph Mingrone 
21916f9cba8fSJoseph Mingrone 	/*
21926f9cba8fSJoseph Mingrone 	 * XXX - optimizer loop detection.
21936f9cba8fSJoseph Mingrone 	 */
21946f9cba8fSJoseph Mingrone 	int loop_count = 0;
21956f9cba8fSJoseph Mingrone 	for (;;) {
2196ada6f083SXin LI 		opt_state->done = 1;
21976f9cba8fSJoseph Mingrone 		/*
21986f9cba8fSJoseph Mingrone 		 * XXX - optimizer loop detection.
21996f9cba8fSJoseph Mingrone 		 */
22006f9cba8fSJoseph Mingrone 		opt_state->non_branch_movement_performed = 0;
2201ada6f083SXin LI 		find_levels(opt_state, ic);
2202ada6f083SXin LI 		find_dom(opt_state, ic->root);
2203ada6f083SXin LI 		find_closure(opt_state, ic->root);
2204ada6f083SXin LI 		find_ud(opt_state, ic->root);
2205ada6f083SXin LI 		find_edom(opt_state, ic->root);
220657e22627SCy Schubert 		opt_blks(opt_state, ic, do_stmts);
22078cf6c252SPaul Traina #ifdef BDEBUG
2208b00ab754SHans Petter Selasky 		if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
2209ada6f083SXin LI 			printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, opt_state->done);
221057e22627SCy Schubert 			opt_dump(opt_state, ic);
22110a94d38fSBill Fenner 		}
22128cf6c252SPaul Traina #endif
22136f9cba8fSJoseph Mingrone 
22146f9cba8fSJoseph Mingrone 		/*
22156f9cba8fSJoseph Mingrone 		 * Was anything done in this optimizer pass?
22166f9cba8fSJoseph Mingrone 		 */
22176f9cba8fSJoseph Mingrone 		if (opt_state->done) {
22186f9cba8fSJoseph Mingrone 			/*
22196f9cba8fSJoseph Mingrone 			 * No, so we've reached a fixed point.
22206f9cba8fSJoseph Mingrone 			 * We're done.
22216f9cba8fSJoseph Mingrone 			 */
22226f9cba8fSJoseph Mingrone 			break;
22236f9cba8fSJoseph Mingrone 		}
22246f9cba8fSJoseph Mingrone 
22256f9cba8fSJoseph Mingrone 		/*
22266f9cba8fSJoseph Mingrone 		 * XXX - was anything done other than branch movement
22276f9cba8fSJoseph Mingrone 		 * in this pass?
22286f9cba8fSJoseph Mingrone 		 */
22296f9cba8fSJoseph Mingrone 		if (opt_state->non_branch_movement_performed) {
22306f9cba8fSJoseph Mingrone 			/*
22316f9cba8fSJoseph Mingrone 			 * Yes.  Clear any loop-detection counter;
22326f9cba8fSJoseph Mingrone 			 * we're making some form of progress (assuming
22336f9cba8fSJoseph Mingrone 			 * we can't get into a cycle doing *other*
22346f9cba8fSJoseph Mingrone 			 * optimizations...).
22356f9cba8fSJoseph Mingrone 			 */
22366f9cba8fSJoseph Mingrone 			loop_count = 0;
22376f9cba8fSJoseph Mingrone 		} else {
22386f9cba8fSJoseph Mingrone 			/*
22396f9cba8fSJoseph Mingrone 			 * No - increment the counter, and quit if
22406f9cba8fSJoseph Mingrone 			 * it's up to 100.
22416f9cba8fSJoseph Mingrone 			 */
22426f9cba8fSJoseph Mingrone 			loop_count++;
22436f9cba8fSJoseph Mingrone 			if (loop_count >= 100) {
22446f9cba8fSJoseph Mingrone 				/*
22456f9cba8fSJoseph Mingrone 				 * We've done nothing but branch movement
22466f9cba8fSJoseph Mingrone 				 * for 100 passes; we're probably
22476f9cba8fSJoseph Mingrone 				 * in a cycle and will never reach a
22486f9cba8fSJoseph Mingrone 				 * fixed point.
22496f9cba8fSJoseph Mingrone 				 *
22506f9cba8fSJoseph Mingrone 				 * XXX - yes, we really need a non-
22516f9cba8fSJoseph Mingrone 				 * heuristic way of detecting a cycle.
22526f9cba8fSJoseph Mingrone 				 */
22536f9cba8fSJoseph Mingrone 				opt_state->done = 1;
22546f9cba8fSJoseph Mingrone 				break;
22556f9cba8fSJoseph Mingrone 			}
22566f9cba8fSJoseph Mingrone 		}
22576f9cba8fSJoseph Mingrone 	}
22588cf6c252SPaul Traina }
22598cf6c252SPaul Traina 
22608cf6c252SPaul Traina /*
22618cf6c252SPaul Traina  * Optimize the filter code in its dag representation.
226257e22627SCy Schubert  * Return 0 on success, -1 on error.
22638cf6c252SPaul Traina  */
226457e22627SCy Schubert int
bpf_optimize(struct icode * ic,char * errbuf)226557e22627SCy Schubert bpf_optimize(struct icode *ic, char *errbuf)
22668cf6c252SPaul Traina {
2267ada6f083SXin LI 	opt_state_t opt_state;
22688cf6c252SPaul Traina 
226957e22627SCy Schubert 	memset(&opt_state, 0, sizeof(opt_state));
227057e22627SCy Schubert 	opt_state.errbuf = errbuf;
22716f9cba8fSJoseph Mingrone 	opt_state.non_branch_movement_performed = 0;
227257e22627SCy Schubert 	if (setjmp(opt_state.top_ctx)) {
227357e22627SCy Schubert 		opt_cleanup(&opt_state);
227457e22627SCy Schubert 		return -1;
227557e22627SCy Schubert 	}
227657e22627SCy Schubert 	opt_init(&opt_state, ic);
227757e22627SCy Schubert 	opt_loop(&opt_state, ic, 0);
227857e22627SCy Schubert 	opt_loop(&opt_state, ic, 1);
2279ada6f083SXin LI 	intern_blocks(&opt_state, ic);
22800a94d38fSBill Fenner #ifdef BDEBUG
2281b00ab754SHans Petter Selasky 	if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
22820a94d38fSBill Fenner 		printf("after intern_blocks()\n");
228357e22627SCy Schubert 		opt_dump(&opt_state, ic);
22840a94d38fSBill Fenner 	}
22850a94d38fSBill Fenner #endif
2286ada6f083SXin LI 	opt_root(&ic->root);
22870a94d38fSBill Fenner #ifdef BDEBUG
2288b00ab754SHans Petter Selasky 	if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
22890a94d38fSBill Fenner 		printf("after opt_root()\n");
229057e22627SCy Schubert 		opt_dump(&opt_state, ic);
22910a94d38fSBill Fenner 	}
22920a94d38fSBill Fenner #endif
2293ada6f083SXin LI 	opt_cleanup(&opt_state);
229457e22627SCy Schubert 	return 0;
22958cf6c252SPaul Traina }
22968cf6c252SPaul Traina 
22978cf6c252SPaul Traina static void
make_marks(struct icode * ic,struct block * p)2298ada6f083SXin LI make_marks(struct icode *ic, struct block *p)
22998cf6c252SPaul Traina {
2300ada6f083SXin LI 	if (!isMarked(ic, p)) {
2301ada6f083SXin LI 		Mark(ic, p);
23028cf6c252SPaul Traina 		if (BPF_CLASS(p->s.code) != BPF_RET) {
2303ada6f083SXin LI 			make_marks(ic, JT(p));
2304ada6f083SXin LI 			make_marks(ic, JF(p));
23058cf6c252SPaul Traina 		}
23068cf6c252SPaul Traina 	}
23078cf6c252SPaul Traina }
23088cf6c252SPaul Traina 
23098cf6c252SPaul Traina /*
2310ada6f083SXin LI  * Mark code array such that isMarked(ic->cur_mark, i) is true
23118cf6c252SPaul Traina  * only for nodes that are alive.
23128cf6c252SPaul Traina  */
23138cf6c252SPaul Traina static void
mark_code(struct icode * ic)2314ada6f083SXin LI mark_code(struct icode *ic)
23158cf6c252SPaul Traina {
2316ada6f083SXin LI 	ic->cur_mark += 1;
2317ada6f083SXin LI 	make_marks(ic, ic->root);
23188cf6c252SPaul Traina }
23198cf6c252SPaul Traina 
23208cf6c252SPaul Traina /*
23218cf6c252SPaul Traina  * True iff the two stmt lists load the same value from the packet into
23228cf6c252SPaul Traina  * the accumulator.
23238cf6c252SPaul Traina  */
23248cf6c252SPaul Traina static int
eq_slist(struct slist * x,struct slist * y)2325edc89b24SXin LI eq_slist(struct slist *x, struct slist *y)
23268cf6c252SPaul Traina {
2327b00ab754SHans Petter Selasky 	for (;;) {
23288cf6c252SPaul Traina 		while (x && x->s.code == NOP)
23298cf6c252SPaul Traina 			x = x->next;
23308cf6c252SPaul Traina 		while (y && y->s.code == NOP)
23318cf6c252SPaul Traina 			y = y->next;
23328cf6c252SPaul Traina 		if (x == 0)
23338cf6c252SPaul Traina 			return y == 0;
23348cf6c252SPaul Traina 		if (y == 0)
23358cf6c252SPaul Traina 			return x == 0;
23368cf6c252SPaul Traina 		if (x->s.code != y->s.code || x->s.k != y->s.k)
23378cf6c252SPaul Traina 			return 0;
23388cf6c252SPaul Traina 		x = x->next;
23398cf6c252SPaul Traina 		y = y->next;
23408cf6c252SPaul Traina 	}
23418cf6c252SPaul Traina }
23428cf6c252SPaul Traina 
23438cf6c252SPaul Traina static inline int
eq_blk(struct block * b0,struct block * b1)2344edc89b24SXin LI eq_blk(struct block *b0, struct block *b1)
23458cf6c252SPaul Traina {
23468cf6c252SPaul Traina 	if (b0->s.code == b1->s.code &&
23478cf6c252SPaul Traina 	    b0->s.k == b1->s.k &&
23488cf6c252SPaul Traina 	    b0->et.succ == b1->et.succ &&
23498cf6c252SPaul Traina 	    b0->ef.succ == b1->ef.succ)
23508cf6c252SPaul Traina 		return eq_slist(b0->stmts, b1->stmts);
23518cf6c252SPaul Traina 	return 0;
23528cf6c252SPaul Traina }
23538cf6c252SPaul Traina 
23548cf6c252SPaul Traina static void
intern_blocks(opt_state_t * opt_state,struct icode * ic)2355ada6f083SXin LI intern_blocks(opt_state_t *opt_state, struct icode *ic)
23568cf6c252SPaul Traina {
23578cf6c252SPaul Traina 	struct block *p;
23586f9cba8fSJoseph Mingrone 	u_int i, j;
2359ef96d74fSMax Laier 	int done1; /* don't shadow global */
23608cf6c252SPaul Traina  top:
2361ef96d74fSMax Laier 	done1 = 1;
2362ada6f083SXin LI 	for (i = 0; i < opt_state->n_blocks; ++i)
2363ada6f083SXin LI 		opt_state->blocks[i]->link = 0;
23648cf6c252SPaul Traina 
2365ada6f083SXin LI 	mark_code(ic);
23668cf6c252SPaul Traina 
23676f9cba8fSJoseph Mingrone 	for (i = opt_state->n_blocks - 1; i != 0; ) {
23686f9cba8fSJoseph Mingrone 		--i;
2369ada6f083SXin LI 		if (!isMarked(ic, opt_state->blocks[i]))
23708cf6c252SPaul Traina 			continue;
2371ada6f083SXin LI 		for (j = i + 1; j < opt_state->n_blocks; ++j) {
2372ada6f083SXin LI 			if (!isMarked(ic, opt_state->blocks[j]))
23738cf6c252SPaul Traina 				continue;
2374ada6f083SXin LI 			if (eq_blk(opt_state->blocks[i], opt_state->blocks[j])) {
2375ada6f083SXin LI 				opt_state->blocks[i]->link = opt_state->blocks[j]->link ?
2376ada6f083SXin LI 					opt_state->blocks[j]->link : opt_state->blocks[j];
23778cf6c252SPaul Traina 				break;
23788cf6c252SPaul Traina 			}
23798cf6c252SPaul Traina 		}
23808cf6c252SPaul Traina 	}
2381ada6f083SXin LI 	for (i = 0; i < opt_state->n_blocks; ++i) {
2382ada6f083SXin LI 		p = opt_state->blocks[i];
23838cf6c252SPaul Traina 		if (JT(p) == 0)
23848cf6c252SPaul Traina 			continue;
23858cf6c252SPaul Traina 		if (JT(p)->link) {
2386ef96d74fSMax Laier 			done1 = 0;
23878cf6c252SPaul Traina 			JT(p) = JT(p)->link;
23888cf6c252SPaul Traina 		}
23898cf6c252SPaul Traina 		if (JF(p)->link) {
2390ef96d74fSMax Laier 			done1 = 0;
23918cf6c252SPaul Traina 			JF(p) = JF(p)->link;
23928cf6c252SPaul Traina 		}
23938cf6c252SPaul Traina 	}
2394ef96d74fSMax Laier 	if (!done1)
23958cf6c252SPaul Traina 		goto top;
23968cf6c252SPaul Traina }
23978cf6c252SPaul Traina 
23988cf6c252SPaul Traina static void
opt_cleanup(opt_state_t * opt_state)2399ada6f083SXin LI opt_cleanup(opt_state_t *opt_state)
24008cf6c252SPaul Traina {
2401ada6f083SXin LI 	free((void *)opt_state->vnode_base);
2402ada6f083SXin LI 	free((void *)opt_state->vmap);
2403ada6f083SXin LI 	free((void *)opt_state->edges);
2404ada6f083SXin LI 	free((void *)opt_state->space);
2405ada6f083SXin LI 	free((void *)opt_state->levels);
2406ada6f083SXin LI 	free((void *)opt_state->blocks);
24078cf6c252SPaul Traina }
24088cf6c252SPaul Traina 
24098cf6c252SPaul Traina /*
241057e22627SCy Schubert  * For optimizer errors.
241157e22627SCy Schubert  */
241257e22627SCy Schubert static void PCAP_NORETURN
opt_error(opt_state_t * opt_state,const char * fmt,...)241357e22627SCy Schubert opt_error(opt_state_t *opt_state, const char *fmt, ...)
241457e22627SCy Schubert {
241557e22627SCy Schubert 	va_list ap;
241657e22627SCy Schubert 
241757e22627SCy Schubert 	if (opt_state->errbuf != NULL) {
241857e22627SCy Schubert 		va_start(ap, fmt);
24196f9cba8fSJoseph Mingrone 		(void)vsnprintf(opt_state->errbuf,
242057e22627SCy Schubert 		    PCAP_ERRBUF_SIZE, fmt, ap);
242157e22627SCy Schubert 		va_end(ap);
242257e22627SCy Schubert 	}
242357e22627SCy Schubert 	longjmp(opt_state->top_ctx, 1);
242457e22627SCy Schubert 	/* NOTREACHED */
24256f9cba8fSJoseph Mingrone #ifdef _AIX
24266f9cba8fSJoseph Mingrone 	PCAP_UNREACHABLE
24276f9cba8fSJoseph Mingrone #endif /* _AIX */
242857e22627SCy Schubert }
242957e22627SCy Schubert 
243057e22627SCy Schubert /*
24318cf6c252SPaul Traina  * Return the number of stmts in 's'.
24328cf6c252SPaul Traina  */
243315752fa8SXin LI static u_int
slength(struct slist * s)2434edc89b24SXin LI slength(struct slist *s)
24358cf6c252SPaul Traina {
243615752fa8SXin LI 	u_int n = 0;
24378cf6c252SPaul Traina 
24388cf6c252SPaul Traina 	for (; s; s = s->next)
24398cf6c252SPaul Traina 		if (s->s.code != NOP)
24408cf6c252SPaul Traina 			++n;
24418cf6c252SPaul Traina 	return n;
24428cf6c252SPaul Traina }
24438cf6c252SPaul Traina 
24448cf6c252SPaul Traina /*
24458cf6c252SPaul Traina  * Return the number of nodes reachable by 'p'.
24468cf6c252SPaul Traina  * All nodes should be initially unmarked.
24478cf6c252SPaul Traina  */
24488cf6c252SPaul Traina static int
count_blocks(struct icode * ic,struct block * p)2449ada6f083SXin LI count_blocks(struct icode *ic, struct block *p)
24508cf6c252SPaul Traina {
2451ada6f083SXin LI 	if (p == 0 || isMarked(ic, p))
24528cf6c252SPaul Traina 		return 0;
2453ada6f083SXin LI 	Mark(ic, p);
2454ada6f083SXin LI 	return count_blocks(ic, JT(p)) + count_blocks(ic, JF(p)) + 1;
24558cf6c252SPaul Traina }
24568cf6c252SPaul Traina 
24578cf6c252SPaul Traina /*
24588cf6c252SPaul Traina  * Do a depth first search on the flow graph, numbering the
24598cf6c252SPaul Traina  * the basic blocks, and entering them into the 'blocks' array.`
24608cf6c252SPaul Traina  */
24618cf6c252SPaul Traina static void
number_blks_r(opt_state_t * opt_state,struct icode * ic,struct block * p)2462ada6f083SXin LI number_blks_r(opt_state_t *opt_state, struct icode *ic, struct block *p)
24638cf6c252SPaul Traina {
24646f9cba8fSJoseph Mingrone 	u_int n;
24658cf6c252SPaul Traina 
2466ada6f083SXin LI 	if (p == 0 || isMarked(ic, p))
24678cf6c252SPaul Traina 		return;
24688cf6c252SPaul Traina 
2469ada6f083SXin LI 	Mark(ic, p);
2470ada6f083SXin LI 	n = opt_state->n_blocks++;
24716f9cba8fSJoseph Mingrone 	if (opt_state->n_blocks == 0) {
24726f9cba8fSJoseph Mingrone 		/*
24736f9cba8fSJoseph Mingrone 		 * Overflow.
24746f9cba8fSJoseph Mingrone 		 */
24756f9cba8fSJoseph Mingrone 		opt_error(opt_state, "filter is too complex to optimize");
24766f9cba8fSJoseph Mingrone 	}
24778cf6c252SPaul Traina 	p->id = n;
2478ada6f083SXin LI 	opt_state->blocks[n] = p;
24798cf6c252SPaul Traina 
2480ada6f083SXin LI 	number_blks_r(opt_state, ic, JT(p));
2481ada6f083SXin LI 	number_blks_r(opt_state, ic, JF(p));
24828cf6c252SPaul Traina }
24838cf6c252SPaul Traina 
24848cf6c252SPaul Traina /*
24858cf6c252SPaul Traina  * Return the number of stmts in the flowgraph reachable by 'p'.
24868cf6c252SPaul Traina  * The nodes should be unmarked before calling.
2487dc2c7305SBill Fenner  *
2488dc2c7305SBill Fenner  * Note that "stmts" means "instructions", and that this includes
2489dc2c7305SBill Fenner  *
2490dc2c7305SBill Fenner  *	side-effect statements in 'p' (slength(p->stmts));
2491dc2c7305SBill Fenner  *
2492dc2c7305SBill Fenner  *	statements in the true branch from 'p' (count_stmts(JT(p)));
2493dc2c7305SBill Fenner  *
2494dc2c7305SBill Fenner  *	statements in the false branch from 'p' (count_stmts(JF(p)));
2495dc2c7305SBill Fenner  *
2496dc2c7305SBill Fenner  *	the conditional jump itself (1);
2497dc2c7305SBill Fenner  *
2498dc2c7305SBill Fenner  *	an extra long jump if the true branch requires it (p->longjt);
2499dc2c7305SBill Fenner  *
2500dc2c7305SBill Fenner  *	an extra long jump if the false branch requires it (p->longjf).
25018cf6c252SPaul Traina  */
250215752fa8SXin LI static u_int
count_stmts(struct icode * ic,struct block * p)2503ada6f083SXin LI count_stmts(struct icode *ic, struct block *p)
25048cf6c252SPaul Traina {
250515752fa8SXin LI 	u_int n;
25068cf6c252SPaul Traina 
2507ada6f083SXin LI 	if (p == 0 || isMarked(ic, p))
25088cf6c252SPaul Traina 		return 0;
2509ada6f083SXin LI 	Mark(ic, p);
2510ada6f083SXin LI 	n = count_stmts(ic, JT(p)) + count_stmts(ic, JF(p));
2511dc2c7305SBill Fenner 	return slength(p->stmts) + n + 1 + p->longjt + p->longjf;
25128cf6c252SPaul Traina }
25138cf6c252SPaul Traina 
25148cf6c252SPaul Traina /*
25158cf6c252SPaul Traina  * Allocate memory.  All allocation is done before optimization
25168cf6c252SPaul Traina  * is begun.  A linear bound on the size of all data structures is computed
25178cf6c252SPaul Traina  * from the total number of blocks and/or statements.
25188cf6c252SPaul Traina  */
25198cf6c252SPaul Traina static void
opt_init(opt_state_t * opt_state,struct icode * ic)252057e22627SCy Schubert opt_init(opt_state_t *opt_state, struct icode *ic)
25218cf6c252SPaul Traina {
25228cf6c252SPaul Traina 	bpf_u_int32 *p;
25238cf6c252SPaul Traina 	int i, n, max_stmts;
25246f9cba8fSJoseph Mingrone 	u_int product;
25256f9cba8fSJoseph Mingrone 	size_t block_memsize, edge_memsize;
25268cf6c252SPaul Traina 
25278cf6c252SPaul Traina 	/*
25288cf6c252SPaul Traina 	 * First, count the blocks, so we can malloc an array to map
25298cf6c252SPaul Traina 	 * block number to block.  Then, put the blocks into the array.
25308cf6c252SPaul Traina 	 */
2531ada6f083SXin LI 	unMarkAll(ic);
2532ada6f083SXin LI 	n = count_blocks(ic, ic->root);
2533ada6f083SXin LI 	opt_state->blocks = (struct block **)calloc(n, sizeof(*opt_state->blocks));
2534ada6f083SXin LI 	if (opt_state->blocks == NULL)
253557e22627SCy Schubert 		opt_error(opt_state, "malloc");
2536ada6f083SXin LI 	unMarkAll(ic);
2537ada6f083SXin LI 	opt_state->n_blocks = 0;
2538ada6f083SXin LI 	number_blks_r(opt_state, ic, ic->root);
25398cf6c252SPaul Traina 
25406f9cba8fSJoseph Mingrone 	/*
25416f9cba8fSJoseph Mingrone 	 * This "should not happen".
25426f9cba8fSJoseph Mingrone 	 */
25436f9cba8fSJoseph Mingrone 	if (opt_state->n_blocks == 0)
25446f9cba8fSJoseph Mingrone 		opt_error(opt_state, "filter has no instructions; please report this as a libpcap issue");
25456f9cba8fSJoseph Mingrone 
2546ada6f083SXin LI 	opt_state->n_edges = 2 * opt_state->n_blocks;
25476f9cba8fSJoseph Mingrone 	if ((opt_state->n_edges / 2) != opt_state->n_blocks) {
25486f9cba8fSJoseph Mingrone 		/*
25496f9cba8fSJoseph Mingrone 		 * Overflow.
25506f9cba8fSJoseph Mingrone 		 */
25516f9cba8fSJoseph Mingrone 		opt_error(opt_state, "filter is too complex to optimize");
25526f9cba8fSJoseph Mingrone 	}
2553ada6f083SXin LI 	opt_state->edges = (struct edge **)calloc(opt_state->n_edges, sizeof(*opt_state->edges));
255457e22627SCy Schubert 	if (opt_state->edges == NULL) {
255557e22627SCy Schubert 		opt_error(opt_state, "malloc");
255657e22627SCy Schubert 	}
25578cf6c252SPaul Traina 
25588cf6c252SPaul Traina 	/*
25598cf6c252SPaul Traina 	 * The number of levels is bounded by the number of nodes.
25608cf6c252SPaul Traina 	 */
2561ada6f083SXin LI 	opt_state->levels = (struct block **)calloc(opt_state->n_blocks, sizeof(*opt_state->levels));
256257e22627SCy Schubert 	if (opt_state->levels == NULL) {
256357e22627SCy Schubert 		opt_error(opt_state, "malloc");
256457e22627SCy Schubert 	}
25658cf6c252SPaul Traina 
25666f9cba8fSJoseph Mingrone 	opt_state->edgewords = opt_state->n_edges / BITS_PER_WORD + 1;
25676f9cba8fSJoseph Mingrone 	opt_state->nodewords = opt_state->n_blocks / BITS_PER_WORD + 1;
25686f9cba8fSJoseph Mingrone 
25696f9cba8fSJoseph Mingrone 	/*
25706f9cba8fSJoseph Mingrone 	 * Make sure opt_state->n_blocks * opt_state->nodewords fits
25716f9cba8fSJoseph Mingrone 	 * in a u_int; we use it as a u_int number-of-iterations
25726f9cba8fSJoseph Mingrone 	 * value.
25736f9cba8fSJoseph Mingrone 	 */
25746f9cba8fSJoseph Mingrone 	product = opt_state->n_blocks * opt_state->nodewords;
25756f9cba8fSJoseph Mingrone 	if ((product / opt_state->n_blocks) != opt_state->nodewords) {
25766f9cba8fSJoseph Mingrone 		/*
25776f9cba8fSJoseph Mingrone 		 * XXX - just punt and don't try to optimize?
25786f9cba8fSJoseph Mingrone 		 * In practice, this is unlikely to happen with
25796f9cba8fSJoseph Mingrone 		 * a normal filter.
25806f9cba8fSJoseph Mingrone 		 */
25816f9cba8fSJoseph Mingrone 		opt_error(opt_state, "filter is too complex to optimize");
25826f9cba8fSJoseph Mingrone 	}
25836f9cba8fSJoseph Mingrone 
25846f9cba8fSJoseph Mingrone 	/*
25856f9cba8fSJoseph Mingrone 	 * Make sure the total memory required for that doesn't
25866f9cba8fSJoseph Mingrone 	 * overflow.
25876f9cba8fSJoseph Mingrone 	 */
25886f9cba8fSJoseph Mingrone 	block_memsize = (size_t)2 * product * sizeof(*opt_state->space);
25896f9cba8fSJoseph Mingrone 	if ((block_memsize / product) != 2 * sizeof(*opt_state->space)) {
25906f9cba8fSJoseph Mingrone 		opt_error(opt_state, "filter is too complex to optimize");
25916f9cba8fSJoseph Mingrone 	}
25926f9cba8fSJoseph Mingrone 
25936f9cba8fSJoseph Mingrone 	/*
25946f9cba8fSJoseph Mingrone 	 * Make sure opt_state->n_edges * opt_state->edgewords fits
25956f9cba8fSJoseph Mingrone 	 * in a u_int; we use it as a u_int number-of-iterations
25966f9cba8fSJoseph Mingrone 	 * value.
25976f9cba8fSJoseph Mingrone 	 */
25986f9cba8fSJoseph Mingrone 	product = opt_state->n_edges * opt_state->edgewords;
25996f9cba8fSJoseph Mingrone 	if ((product / opt_state->n_edges) != opt_state->edgewords) {
26006f9cba8fSJoseph Mingrone 		opt_error(opt_state, "filter is too complex to optimize");
26016f9cba8fSJoseph Mingrone 	}
26026f9cba8fSJoseph Mingrone 
26036f9cba8fSJoseph Mingrone 	/*
26046f9cba8fSJoseph Mingrone 	 * Make sure the total memory required for that doesn't
26056f9cba8fSJoseph Mingrone 	 * overflow.
26066f9cba8fSJoseph Mingrone 	 */
26076f9cba8fSJoseph Mingrone 	edge_memsize = (size_t)product * sizeof(*opt_state->space);
26086f9cba8fSJoseph Mingrone 	if (edge_memsize / product != sizeof(*opt_state->space)) {
26096f9cba8fSJoseph Mingrone 		opt_error(opt_state, "filter is too complex to optimize");
26106f9cba8fSJoseph Mingrone 	}
26116f9cba8fSJoseph Mingrone 
26126f9cba8fSJoseph Mingrone 	/*
26136f9cba8fSJoseph Mingrone 	 * Make sure the total memory required for both of them doesn't
26146f9cba8fSJoseph Mingrone 	 * overflow.
26156f9cba8fSJoseph Mingrone 	 */
26166f9cba8fSJoseph Mingrone 	if (block_memsize > SIZE_MAX - edge_memsize) {
26176f9cba8fSJoseph Mingrone 		opt_error(opt_state, "filter is too complex to optimize");
26186f9cba8fSJoseph Mingrone 	}
26198cf6c252SPaul Traina 
26208cf6c252SPaul Traina 	/* XXX */
26216f9cba8fSJoseph Mingrone 	opt_state->space = (bpf_u_int32 *)malloc(block_memsize + edge_memsize);
262257e22627SCy Schubert 	if (opt_state->space == NULL) {
262357e22627SCy Schubert 		opt_error(opt_state, "malloc");
262457e22627SCy Schubert 	}
2625ada6f083SXin LI 	p = opt_state->space;
2626ada6f083SXin LI 	opt_state->all_dom_sets = p;
26278cf6c252SPaul Traina 	for (i = 0; i < n; ++i) {
2628ada6f083SXin LI 		opt_state->blocks[i]->dom = p;
2629ada6f083SXin LI 		p += opt_state->nodewords;
26308cf6c252SPaul Traina 	}
2631ada6f083SXin LI 	opt_state->all_closure_sets = p;
26328cf6c252SPaul Traina 	for (i = 0; i < n; ++i) {
2633ada6f083SXin LI 		opt_state->blocks[i]->closure = p;
2634ada6f083SXin LI 		p += opt_state->nodewords;
26358cf6c252SPaul Traina 	}
2636ada6f083SXin LI 	opt_state->all_edge_sets = p;
26378cf6c252SPaul Traina 	for (i = 0; i < n; ++i) {
2638ada6f083SXin LI 		register struct block *b = opt_state->blocks[i];
26398cf6c252SPaul Traina 
26408cf6c252SPaul Traina 		b->et.edom = p;
2641ada6f083SXin LI 		p += opt_state->edgewords;
26428cf6c252SPaul Traina 		b->ef.edom = p;
2643ada6f083SXin LI 		p += opt_state->edgewords;
26448cf6c252SPaul Traina 		b->et.id = i;
2645ada6f083SXin LI 		opt_state->edges[i] = &b->et;
2646ada6f083SXin LI 		b->ef.id = opt_state->n_blocks + i;
2647ada6f083SXin LI 		opt_state->edges[opt_state->n_blocks + i] = &b->ef;
26488cf6c252SPaul Traina 		b->et.pred = b;
26498cf6c252SPaul Traina 		b->ef.pred = b;
26508cf6c252SPaul Traina 	}
26518cf6c252SPaul Traina 	max_stmts = 0;
26528cf6c252SPaul Traina 	for (i = 0; i < n; ++i)
2653ada6f083SXin LI 		max_stmts += slength(opt_state->blocks[i]->stmts) + 1;
26548cf6c252SPaul Traina 	/*
26558cf6c252SPaul Traina 	 * We allocate at most 3 value numbers per statement,
26568cf6c252SPaul Traina 	 * so this is an upper bound on the number of valnodes
26578cf6c252SPaul Traina 	 * we'll need.
26588cf6c252SPaul Traina 	 */
2659ada6f083SXin LI 	opt_state->maxval = 3 * max_stmts;
2660ada6f083SXin LI 	opt_state->vmap = (struct vmapinfo *)calloc(opt_state->maxval, sizeof(*opt_state->vmap));
266157e22627SCy Schubert 	if (opt_state->vmap == NULL) {
266257e22627SCy Schubert 		opt_error(opt_state, "malloc");
266357e22627SCy Schubert 	}
2664ada6f083SXin LI 	opt_state->vnode_base = (struct valnode *)calloc(opt_state->maxval, sizeof(*opt_state->vnode_base));
266557e22627SCy Schubert 	if (opt_state->vnode_base == NULL) {
266657e22627SCy Schubert 		opt_error(opt_state, "malloc");
266757e22627SCy Schubert 	}
26688cf6c252SPaul Traina }
26698cf6c252SPaul Traina 
26708cf6c252SPaul Traina /*
2671ada6f083SXin LI  * This is only used when supporting optimizer debugging.  It is
2672ada6f083SXin LI  * global state, so do *not* do more than one compile in parallel
2673ada6f083SXin LI  * and expect it to provide meaningful information.
26748cf6c252SPaul Traina  */
26758cf6c252SPaul Traina #ifdef BDEBUG
2676b00ab754SHans Petter Selasky int bids[NBIDS];
26778cf6c252SPaul Traina #endif
26788cf6c252SPaul Traina 
267957e22627SCy Schubert static void PCAP_NORETURN conv_error(conv_state_t *, const char *, ...)
268057e22627SCy Schubert     PCAP_PRINTFLIKE(2, 3);
268157e22627SCy Schubert 
26828cf6c252SPaul Traina /*
26838cf6c252SPaul Traina  * Returns true if successful.  Returns false if a branch has
26848cf6c252SPaul Traina  * an offset that is too large.  If so, we have marked that
26858cf6c252SPaul Traina  * branch so that on a subsequent iteration, it will be treated
26868cf6c252SPaul Traina  * properly.
26878cf6c252SPaul Traina  */
26888cf6c252SPaul Traina static int
convert_code_r(conv_state_t * conv_state,struct icode * ic,struct block * p)268957e22627SCy Schubert convert_code_r(conv_state_t *conv_state, struct icode *ic, struct block *p)
26908cf6c252SPaul Traina {
26918cf6c252SPaul Traina 	struct bpf_insn *dst;
26928cf6c252SPaul Traina 	struct slist *src;
2693ada6f083SXin LI 	u_int slen;
26948cf6c252SPaul Traina 	u_int off;
26958751327cSBill Fenner 	struct slist **offset = NULL;
26968cf6c252SPaul Traina 
2697ada6f083SXin LI 	if (p == 0 || isMarked(ic, p))
26988cf6c252SPaul Traina 		return (1);
2699ada6f083SXin LI 	Mark(ic, p);
27008cf6c252SPaul Traina 
270157e22627SCy Schubert 	if (convert_code_r(conv_state, ic, JF(p)) == 0)
27028cf6c252SPaul Traina 		return (0);
270357e22627SCy Schubert 	if (convert_code_r(conv_state, ic, JT(p)) == 0)
27048cf6c252SPaul Traina 		return (0);
27058cf6c252SPaul Traina 
27068cf6c252SPaul Traina 	slen = slength(p->stmts);
2707ada6f083SXin LI 	dst = conv_state->ftail -= (slen + 1 + p->longjt + p->longjf);
27088cf6c252SPaul Traina 		/* inflate length by any extra jumps */
27098cf6c252SPaul Traina 
2710ada6f083SXin LI 	p->offset = (int)(dst - conv_state->fstart);
27118cf6c252SPaul Traina 
27128751327cSBill Fenner 	/* generate offset[] for convenience  */
27138751327cSBill Fenner 	if (slen) {
2714feb4ecdbSBruce M Simpson 		offset = (struct slist **)calloc(slen, sizeof(struct slist *));
27158751327cSBill Fenner 		if (!offset) {
271657e22627SCy Schubert 			conv_error(conv_state, "not enough core");
27178751327cSBill Fenner 			/*NOTREACHED*/
27188751327cSBill Fenner 		}
27198751327cSBill Fenner 	}
27208751327cSBill Fenner 	src = p->stmts;
27218751327cSBill Fenner 	for (off = 0; off < slen && src; off++) {
27228751327cSBill Fenner #if 0
27238751327cSBill Fenner 		printf("off=%d src=%x\n", off, src);
27248751327cSBill Fenner #endif
27258751327cSBill Fenner 		offset[off] = src;
27268751327cSBill Fenner 		src = src->next;
27278751327cSBill Fenner 	}
27288751327cSBill Fenner 
27298751327cSBill Fenner 	off = 0;
27308cf6c252SPaul Traina 	for (src = p->stmts; src; src = src->next) {
27318cf6c252SPaul Traina 		if (src->s.code == NOP)
27328cf6c252SPaul Traina 			continue;
27338cf6c252SPaul Traina 		dst->code = (u_short)src->s.code;
27348cf6c252SPaul Traina 		dst->k = src->s.k;
27358751327cSBill Fenner 
27368751327cSBill Fenner 		/* fill block-local relative jump */
2737dc2c7305SBill Fenner 		if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) {
27388751327cSBill Fenner #if 0
27398751327cSBill Fenner 			if (src->s.jt || src->s.jf) {
274057e22627SCy Schubert 				free(offset);
274157e22627SCy Schubert 				conv_error(conv_state, "illegal jmp destination");
27428751327cSBill Fenner 				/*NOTREACHED*/
27438cf6c252SPaul Traina 			}
27448751327cSBill Fenner #endif
27458751327cSBill Fenner 			goto filled;
27468751327cSBill Fenner 		}
27478751327cSBill Fenner 		if (off == slen - 2)	/*???*/
27488751327cSBill Fenner 			goto filled;
27498751327cSBill Fenner 
27508751327cSBill Fenner 	    {
2751ada6f083SXin LI 		u_int i;
27528751327cSBill Fenner 		int jt, jf;
2753b00ab754SHans Petter Selasky 		const char ljerr[] = "%s for block-local relative jump: off=%d";
27548751327cSBill Fenner 
27558751327cSBill Fenner #if 0
27568751327cSBill Fenner 		printf("code=%x off=%d %x %x\n", src->s.code,
27578751327cSBill Fenner 			off, src->s.jt, src->s.jf);
27588751327cSBill Fenner #endif
27598751327cSBill Fenner 
27608751327cSBill Fenner 		if (!src->s.jt || !src->s.jf) {
276157e22627SCy Schubert 			free(offset);
276257e22627SCy Schubert 			conv_error(conv_state, ljerr, "no jmp destination", off);
27638751327cSBill Fenner 			/*NOTREACHED*/
27648751327cSBill Fenner 		}
27658751327cSBill Fenner 
27668751327cSBill Fenner 		jt = jf = 0;
27678751327cSBill Fenner 		for (i = 0; i < slen; i++) {
27688751327cSBill Fenner 			if (offset[i] == src->s.jt) {
27698751327cSBill Fenner 				if (jt) {
277057e22627SCy Schubert 					free(offset);
277157e22627SCy Schubert 					conv_error(conv_state, ljerr, "multiple matches", off);
27728751327cSBill Fenner 					/*NOTREACHED*/
27738751327cSBill Fenner 				}
27748751327cSBill Fenner 
2775b00ab754SHans Petter Selasky 				if (i - off - 1 >= 256) {
277657e22627SCy Schubert 					free(offset);
277757e22627SCy Schubert 					conv_error(conv_state, ljerr, "out-of-range jump", off);
2778b00ab754SHans Petter Selasky 					/*NOTREACHED*/
2779b00ab754SHans Petter Selasky 				}
2780b00ab754SHans Petter Selasky 				dst->jt = (u_char)(i - off - 1);
27818751327cSBill Fenner 				jt++;
27828751327cSBill Fenner 			}
27838751327cSBill Fenner 			if (offset[i] == src->s.jf) {
27848751327cSBill Fenner 				if (jf) {
278557e22627SCy Schubert 					free(offset);
278657e22627SCy Schubert 					conv_error(conv_state, ljerr, "multiple matches", off);
27878751327cSBill Fenner 					/*NOTREACHED*/
27888751327cSBill Fenner 				}
2789b00ab754SHans Petter Selasky 				if (i - off - 1 >= 256) {
279057e22627SCy Schubert 					free(offset);
279157e22627SCy Schubert 					conv_error(conv_state, ljerr, "out-of-range jump", off);
2792b00ab754SHans Petter Selasky 					/*NOTREACHED*/
2793b00ab754SHans Petter Selasky 				}
2794b00ab754SHans Petter Selasky 				dst->jf = (u_char)(i - off - 1);
27958751327cSBill Fenner 				jf++;
27968751327cSBill Fenner 			}
27978751327cSBill Fenner 		}
27988751327cSBill Fenner 		if (!jt || !jf) {
279957e22627SCy Schubert 			free(offset);
280057e22627SCy Schubert 			conv_error(conv_state, ljerr, "no destination found", off);
28018751327cSBill Fenner 			/*NOTREACHED*/
28028751327cSBill Fenner 		}
28038751327cSBill Fenner 	    }
28048751327cSBill Fenner filled:
28058751327cSBill Fenner 		++dst;
28068751327cSBill Fenner 		++off;
28078751327cSBill Fenner 	}
28088751327cSBill Fenner 	if (offset)
28098751327cSBill Fenner 		free(offset);
28108751327cSBill Fenner 
28118cf6c252SPaul Traina #ifdef BDEBUG
2812b00ab754SHans Petter Selasky 	if (dst - conv_state->fstart < NBIDS)
2813ada6f083SXin LI 		bids[dst - conv_state->fstart] = p->id + 1;
28148cf6c252SPaul Traina #endif
28158cf6c252SPaul Traina 	dst->code = (u_short)p->s.code;
28168cf6c252SPaul Traina 	dst->k = p->s.k;
28178cf6c252SPaul Traina 	if (JT(p)) {
28186f9cba8fSJoseph Mingrone 		/* number of extra jumps inserted */
28196f9cba8fSJoseph Mingrone 		u_char extrajmps = 0;
28208cf6c252SPaul Traina 		off = JT(p)->offset - (p->offset + slen) - 1;
28218cf6c252SPaul Traina 		if (off >= 256) {
28228cf6c252SPaul Traina 		    /* offset too large for branch, must add a jump */
28238cf6c252SPaul Traina 		    if (p->longjt == 0) {
28248cf6c252SPaul Traina 			/* mark this instruction and retry */
28258cf6c252SPaul Traina 			p->longjt++;
28268cf6c252SPaul Traina 			return(0);
28278cf6c252SPaul Traina 		    }
28286f9cba8fSJoseph Mingrone 		    dst->jt = extrajmps;
28298cf6c252SPaul Traina 		    extrajmps++;
28308cf6c252SPaul Traina 		    dst[extrajmps].code = BPF_JMP|BPF_JA;
28318cf6c252SPaul Traina 		    dst[extrajmps].k = off - extrajmps;
28328cf6c252SPaul Traina 		}
28338cf6c252SPaul Traina 		else
2834b00ab754SHans Petter Selasky 		    dst->jt = (u_char)off;
28358cf6c252SPaul Traina 		off = JF(p)->offset - (p->offset + slen) - 1;
28368cf6c252SPaul Traina 		if (off >= 256) {
28378cf6c252SPaul Traina 		    /* offset too large for branch, must add a jump */
28388cf6c252SPaul Traina 		    if (p->longjf == 0) {
28398cf6c252SPaul Traina 			/* mark this instruction and retry */
28408cf6c252SPaul Traina 			p->longjf++;
28418cf6c252SPaul Traina 			return(0);
28428cf6c252SPaul Traina 		    }
28438cf6c252SPaul Traina 		    /* branch if F to following jump */
28448cf6c252SPaul Traina 		    /* if two jumps are inserted, F goes to second one */
28456f9cba8fSJoseph Mingrone 		    dst->jf = extrajmps;
28468cf6c252SPaul Traina 		    extrajmps++;
28478cf6c252SPaul Traina 		    dst[extrajmps].code = BPF_JMP|BPF_JA;
28488cf6c252SPaul Traina 		    dst[extrajmps].k = off - extrajmps;
28498cf6c252SPaul Traina 		}
28508cf6c252SPaul Traina 		else
2851b00ab754SHans Petter Selasky 		    dst->jf = (u_char)off;
28528cf6c252SPaul Traina 	}
28538cf6c252SPaul Traina 	return (1);
28548cf6c252SPaul Traina }
28558cf6c252SPaul Traina 
28568cf6c252SPaul Traina 
28578cf6c252SPaul Traina /*
28588cf6c252SPaul Traina  * Convert flowgraph intermediate representation to the
28598cf6c252SPaul Traina  * BPF array representation.  Set *lenp to the number of instructions.
2860ef96d74fSMax Laier  *
2861ef96d74fSMax Laier  * This routine does *NOT* leak the memory pointed to by fp.  It *must
2862ef96d74fSMax Laier  * not* do free(fp) before returning fp; doing so would make no sense,
2863ef96d74fSMax Laier  * as the BPF array pointed to by the return value of icode_to_fcode()
2864ef96d74fSMax Laier  * must be valid - it's being returned for use in a bpf_program structure.
2865ef96d74fSMax Laier  *
2866ef96d74fSMax Laier  * If it appears that icode_to_fcode() is leaking, the problem is that
2867ef96d74fSMax Laier  * the program using pcap_compile() is failing to free the memory in
2868ef96d74fSMax Laier  * the BPF program when it's done - the leak is in the program, not in
2869ef96d74fSMax Laier  * the routine that happens to be allocating the memory.  (By analogy, if
2870ef96d74fSMax Laier  * a program calls fopen() without ever calling fclose() on the FILE *,
2871ef96d74fSMax Laier  * it will leak the FILE structure; the leak is not in fopen(), it's in
2872ef96d74fSMax Laier  * the program.)  Change the program to use pcap_freecode() when it's
2873ef96d74fSMax Laier  * done with the filter program.  See the pcap man page.
28748cf6c252SPaul Traina  */
28758cf6c252SPaul Traina struct bpf_insn *
icode_to_fcode(struct icode * ic,struct block * root,u_int * lenp,char * errbuf)287657e22627SCy Schubert icode_to_fcode(struct icode *ic, struct block *root, u_int *lenp,
287757e22627SCy Schubert     char *errbuf)
28788cf6c252SPaul Traina {
287915752fa8SXin LI 	u_int n;
28808cf6c252SPaul Traina 	struct bpf_insn *fp;
2881ada6f083SXin LI 	conv_state_t conv_state;
28828cf6c252SPaul Traina 
288357e22627SCy Schubert 	conv_state.fstart = NULL;
288457e22627SCy Schubert 	conv_state.errbuf = errbuf;
288557e22627SCy Schubert 	if (setjmp(conv_state.top_ctx) != 0) {
288657e22627SCy Schubert 		free(conv_state.fstart);
288757e22627SCy Schubert 		return NULL;
288857e22627SCy Schubert 	}
288957e22627SCy Schubert 
28908cf6c252SPaul Traina 	/*
28910a94d38fSBill Fenner 	 * Loop doing convert_code_r() until no branches remain
28928cf6c252SPaul Traina 	 * with too-large offsets.
28938cf6c252SPaul Traina 	 */
2894b00ab754SHans Petter Selasky 	for (;;) {
2895ada6f083SXin LI 	    unMarkAll(ic);
2896ada6f083SXin LI 	    n = *lenp = count_stmts(ic, root);
28978cf6c252SPaul Traina 
28988cf6c252SPaul Traina 	    fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
289957e22627SCy Schubert 	    if (fp == NULL) {
29006f9cba8fSJoseph Mingrone 		(void)snprintf(errbuf, PCAP_ERRBUF_SIZE,
290157e22627SCy Schubert 		    "malloc");
290257e22627SCy Schubert 		return NULL;
290357e22627SCy Schubert 	    }
29048cf6c252SPaul Traina 	    memset((char *)fp, 0, sizeof(*fp) * n);
2905ada6f083SXin LI 	    conv_state.fstart = fp;
2906ada6f083SXin LI 	    conv_state.ftail = fp + n;
29078cf6c252SPaul Traina 
2908ada6f083SXin LI 	    unMarkAll(ic);
290957e22627SCy Schubert 	    if (convert_code_r(&conv_state, ic, root))
29108cf6c252SPaul Traina 		break;
29118cf6c252SPaul Traina 	    free(fp);
29128cf6c252SPaul Traina 	}
29138cf6c252SPaul Traina 
29148cf6c252SPaul Traina 	return fp;
29158cf6c252SPaul Traina }
29168cf6c252SPaul Traina 
2917dc2c7305SBill Fenner /*
291857e22627SCy Schubert  * For iconv_to_fconv() errors.
291957e22627SCy Schubert  */
292057e22627SCy Schubert static void PCAP_NORETURN
conv_error(conv_state_t * conv_state,const char * fmt,...)292157e22627SCy Schubert conv_error(conv_state_t *conv_state, const char *fmt, ...)
292257e22627SCy Schubert {
292357e22627SCy Schubert 	va_list ap;
292457e22627SCy Schubert 
292557e22627SCy Schubert 	va_start(ap, fmt);
29266f9cba8fSJoseph Mingrone 	(void)vsnprintf(conv_state->errbuf,
292757e22627SCy Schubert 	    PCAP_ERRBUF_SIZE, fmt, ap);
292857e22627SCy Schubert 	va_end(ap);
292957e22627SCy Schubert 	longjmp(conv_state->top_ctx, 1);
293057e22627SCy Schubert 	/* NOTREACHED */
29316f9cba8fSJoseph Mingrone #ifdef _AIX
29326f9cba8fSJoseph Mingrone 	PCAP_UNREACHABLE
29336f9cba8fSJoseph Mingrone #endif /* _AIX */
293457e22627SCy Schubert }
293557e22627SCy Schubert 
293657e22627SCy Schubert /*
2937dc2c7305SBill Fenner  * Make a copy of a BPF program and put it in the "fcode" member of
2938dc2c7305SBill Fenner  * a "pcap_t".
2939dc2c7305SBill Fenner  *
2940dc2c7305SBill Fenner  * If we fail to allocate memory for the copy, fill in the "errbuf"
2941dc2c7305SBill Fenner  * member of the "pcap_t" with an error message, and return -1;
2942dc2c7305SBill Fenner  * otherwise, return 0.
2943dc2c7305SBill Fenner  */
2944dc2c7305SBill Fenner int
install_bpf_program(pcap_t * p,struct bpf_program * fp)2945dc2c7305SBill Fenner install_bpf_program(pcap_t *p, struct bpf_program *fp)
2946dc2c7305SBill Fenner {
2947dc2c7305SBill Fenner 	size_t prog_size;
2948dc2c7305SBill Fenner 
2949dc2c7305SBill Fenner 	/*
2950a8e07101SRui Paulo 	 * Validate the program.
2951a8e07101SRui Paulo 	 */
29526f9cba8fSJoseph Mingrone 	if (!pcap_validate_filter(fp->bf_insns, fp->bf_len)) {
29536f9cba8fSJoseph Mingrone 		snprintf(p->errbuf, sizeof(p->errbuf),
2954a8e07101SRui Paulo 			"BPF program is not valid");
2955a8e07101SRui Paulo 		return (-1);
2956a8e07101SRui Paulo 	}
2957a8e07101SRui Paulo 
2958a8e07101SRui Paulo 	/*
2959dc2c7305SBill Fenner 	 * Free up any already installed program.
2960dc2c7305SBill Fenner 	 */
2961dc2c7305SBill Fenner 	pcap_freecode(&p->fcode);
2962dc2c7305SBill Fenner 
2963dc2c7305SBill Fenner 	prog_size = sizeof(*fp->bf_insns) * fp->bf_len;
2964dc2c7305SBill Fenner 	p->fcode.bf_len = fp->bf_len;
2965dc2c7305SBill Fenner 	p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size);
2966dc2c7305SBill Fenner 	if (p->fcode.bf_insns == NULL) {
2967b00ab754SHans Petter Selasky 		pcap_fmt_errmsg_for_errno(p->errbuf, sizeof(p->errbuf),
2968b00ab754SHans Petter Selasky 		    errno, "malloc");
2969dc2c7305SBill Fenner 		return (-1);
2970dc2c7305SBill Fenner 	}
2971dc2c7305SBill Fenner 	memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size);
2972dc2c7305SBill Fenner 	return (0);
2973dc2c7305SBill Fenner }
2974dc2c7305SBill Fenner 
29758cf6c252SPaul Traina #ifdef BDEBUG
29768cf6c252SPaul Traina static void
dot_dump_node(struct icode * ic,struct block * block,struct bpf_program * prog,FILE * out)2977ada6f083SXin LI dot_dump_node(struct icode *ic, struct block *block, struct bpf_program *prog,
2978ada6f083SXin LI     FILE *out)
2979ada6f083SXin LI {
2980ada6f083SXin LI 	int icount, noffset;
2981ada6f083SXin LI 	int i;
2982ada6f083SXin LI 
2983ada6f083SXin LI 	if (block == NULL || isMarked(ic, block))
2984ada6f083SXin LI 		return;
2985ada6f083SXin LI 	Mark(ic, block);
2986ada6f083SXin LI 
2987ada6f083SXin LI 	icount = slength(block->stmts) + 1 + block->longjt + block->longjf;
2988ada6f083SXin LI 	noffset = min(block->offset + icount, (int)prog->bf_len);
2989ada6f083SXin LI 
29906f9cba8fSJoseph Mingrone 	fprintf(out, "\tblock%u [shape=ellipse, id=\"block-%u\" label=\"BLOCK%u\\n", block->id, block->id, block->id);
2991ada6f083SXin LI 	for (i = block->offset; i < noffset; i++) {
2992ada6f083SXin LI 		fprintf(out, "\\n%s", bpf_image(prog->bf_insns + i, i));
2993ada6f083SXin LI 	}
2994ada6f083SXin LI 	fprintf(out, "\" tooltip=\"");
2995ada6f083SXin LI 	for (i = 0; i < BPF_MEMWORDS; i++)
2996b00ab754SHans Petter Selasky 		if (block->val[i] != VAL_UNKNOWN)
2997ada6f083SXin LI 			fprintf(out, "val[%d]=%d ", i, block->val[i]);
2998ada6f083SXin LI 	fprintf(out, "val[A]=%d ", block->val[A_ATOM]);
2999ada6f083SXin LI 	fprintf(out, "val[X]=%d", block->val[X_ATOM]);
3000ada6f083SXin LI 	fprintf(out, "\"");
3001ada6f083SXin LI 	if (JT(block) == NULL)
3002ada6f083SXin LI 		fprintf(out, ", peripheries=2");
3003ada6f083SXin LI 	fprintf(out, "];\n");
3004ada6f083SXin LI 
3005ada6f083SXin LI 	dot_dump_node(ic, JT(block), prog, out);
3006ada6f083SXin LI 	dot_dump_node(ic, JF(block), prog, out);
3007ada6f083SXin LI }
3008ada6f083SXin LI 
3009ada6f083SXin LI static void
dot_dump_edge(struct icode * ic,struct block * block,FILE * out)3010ada6f083SXin LI dot_dump_edge(struct icode *ic, struct block *block, FILE *out)
3011ada6f083SXin LI {
3012ada6f083SXin LI 	if (block == NULL || isMarked(ic, block))
3013ada6f083SXin LI 		return;
3014ada6f083SXin LI 	Mark(ic, block);
3015ada6f083SXin LI 
3016ada6f083SXin LI 	if (JT(block)) {
30176f9cba8fSJoseph Mingrone 		fprintf(out, "\t\"block%u\":se -> \"block%u\":n [label=\"T\"]; \n",
3018ada6f083SXin LI 				block->id, JT(block)->id);
30196f9cba8fSJoseph Mingrone 		fprintf(out, "\t\"block%u\":sw -> \"block%u\":n [label=\"F\"]; \n",
3020ada6f083SXin LI 			   block->id, JF(block)->id);
3021ada6f083SXin LI 	}
3022ada6f083SXin LI 	dot_dump_edge(ic, JT(block), out);
3023ada6f083SXin LI 	dot_dump_edge(ic, JF(block), out);
3024ada6f083SXin LI }
3025ada6f083SXin LI 
3026ada6f083SXin LI /* Output the block CFG using graphviz/DOT language
3027ada6f083SXin LI  * In the CFG, block's code, value index for each registers at EXIT,
3028ada6f083SXin LI  * and the jump relationship is show.
3029ada6f083SXin LI  *
3030ada6f083SXin LI  * example DOT for BPF `ip src host 1.1.1.1' is:
3031ada6f083SXin LI     digraph BPF {
3032ada6f083SXin LI 	block0 [shape=ellipse, id="block-0" label="BLOCK0\n\n(000) ldh      [12]\n(001) jeq      #0x800           jt 2	jf 5" tooltip="val[A]=0 val[X]=0"];
3033ada6f083SXin LI 	block1 [shape=ellipse, id="block-1" label="BLOCK1\n\n(002) ld       [26]\n(003) jeq      #0x1010101       jt 4	jf 5" tooltip="val[A]=0 val[X]=0"];
3034ada6f083SXin LI 	block2 [shape=ellipse, id="block-2" label="BLOCK2\n\n(004) ret      #68" tooltip="val[A]=0 val[X]=0", peripheries=2];
3035ada6f083SXin LI 	block3 [shape=ellipse, id="block-3" label="BLOCK3\n\n(005) ret      #0" tooltip="val[A]=0 val[X]=0", peripheries=2];
3036ada6f083SXin LI 	"block0":se -> "block1":n [label="T"];
3037ada6f083SXin LI 	"block0":sw -> "block3":n [label="F"];
3038ada6f083SXin LI 	"block1":se -> "block2":n [label="T"];
3039ada6f083SXin LI 	"block1":sw -> "block3":n [label="F"];
3040ada6f083SXin LI     }
3041ada6f083SXin LI  *
30426f9cba8fSJoseph Mingrone  *  After install graphviz on https://www.graphviz.org/, save it as bpf.dot
3043ada6f083SXin LI  *  and run `dot -Tpng -O bpf.dot' to draw the graph.
3044ada6f083SXin LI  */
304557e22627SCy Schubert static int
dot_dump(struct icode * ic,char * errbuf)304657e22627SCy Schubert dot_dump(struct icode *ic, char *errbuf)
3047ada6f083SXin LI {
3048ada6f083SXin LI 	struct bpf_program f;
3049ada6f083SXin LI 	FILE *out = stdout;
3050ada6f083SXin LI 
3051ada6f083SXin LI 	memset(bids, 0, sizeof bids);
305257e22627SCy Schubert 	f.bf_insns = icode_to_fcode(ic, ic->root, &f.bf_len, errbuf);
305357e22627SCy Schubert 	if (f.bf_insns == NULL)
305457e22627SCy Schubert 		return -1;
3055ada6f083SXin LI 
3056ada6f083SXin LI 	fprintf(out, "digraph BPF {\n");
3057ada6f083SXin LI 	unMarkAll(ic);
3058ada6f083SXin LI 	dot_dump_node(ic, ic->root, &f, out);
3059ada6f083SXin LI 	unMarkAll(ic);
3060ada6f083SXin LI 	dot_dump_edge(ic, ic->root, out);
3061ada6f083SXin LI 	fprintf(out, "}\n");
3062ada6f083SXin LI 
3063ada6f083SXin LI 	free((char *)f.bf_insns);
306457e22627SCy Schubert 	return 0;
3065ada6f083SXin LI }
3066ada6f083SXin LI 
306757e22627SCy Schubert static int
plain_dump(struct icode * ic,char * errbuf)306857e22627SCy Schubert plain_dump(struct icode *ic, char *errbuf)
30698cf6c252SPaul Traina {
30708cf6c252SPaul Traina 	struct bpf_program f;
30718cf6c252SPaul Traina 
30728cf6c252SPaul Traina 	memset(bids, 0, sizeof bids);
307357e22627SCy Schubert 	f.bf_insns = icode_to_fcode(ic, ic->root, &f.bf_len, errbuf);
307457e22627SCy Schubert 	if (f.bf_insns == NULL)
307557e22627SCy Schubert 		return -1;
30768cf6c252SPaul Traina 	bpf_dump(&f, 1);
30778cf6c252SPaul Traina 	putchar('\n');
30788cf6c252SPaul Traina 	free((char *)f.bf_insns);
307957e22627SCy Schubert 	return 0;
30808cf6c252SPaul Traina }
3081ada6f083SXin LI 
3082ada6f083SXin LI static void
opt_dump(opt_state_t * opt_state,struct icode * ic)308357e22627SCy Schubert opt_dump(opt_state_t *opt_state, struct icode *ic)
3084ada6f083SXin LI {
308557e22627SCy Schubert 	int status;
308657e22627SCy Schubert 	char errbuf[PCAP_ERRBUF_SIZE];
308757e22627SCy Schubert 
3088b00ab754SHans Petter Selasky 	/*
3089b00ab754SHans Petter Selasky 	 * If the CFG, in DOT format, is requested, output it rather than
3090b00ab754SHans Petter Selasky 	 * the code that would be generated from that graph.
3091ada6f083SXin LI 	 */
3092b00ab754SHans Petter Selasky 	if (pcap_print_dot_graph)
309357e22627SCy Schubert 		status = dot_dump(ic, errbuf);
3094ada6f083SXin LI 	else
309557e22627SCy Schubert 		status = plain_dump(ic, errbuf);
309657e22627SCy Schubert 	if (status == -1)
309757e22627SCy Schubert 		opt_error(opt_state, "opt_dump: icode_to_fcode failed: %s", errbuf);
3098ada6f083SXin LI }
30998cf6c252SPaul Traina #endif
3100