xref: /dragonfly/contrib/libpcap/optimize.c (revision ea16f64e)
11077d0bdSPeter Avalos /*
21077d0bdSPeter Avalos  * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
31077d0bdSPeter Avalos  *	The Regents of the University of California.  All rights reserved.
41077d0bdSPeter Avalos  *
51077d0bdSPeter Avalos  * Redistribution and use in source and binary forms, with or without
61077d0bdSPeter Avalos  * modification, are permitted provided that: (1) source code distributions
71077d0bdSPeter Avalos  * retain the above copyright notice and this paragraph in its entirety, (2)
81077d0bdSPeter Avalos  * distributions including binary code include the above copyright notice and
91077d0bdSPeter Avalos  * this paragraph in its entirety in the documentation or other materials
101077d0bdSPeter Avalos  * provided with the distribution, and (3) all advertising materials mentioning
111077d0bdSPeter Avalos  * features or use of this software display the following acknowledgement:
121077d0bdSPeter Avalos  * ``This product includes software developed by the University of California,
131077d0bdSPeter Avalos  * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
141077d0bdSPeter Avalos  * the University nor the names of its contributors may be used to endorse
151077d0bdSPeter Avalos  * or promote products derived from this software without specific prior
161077d0bdSPeter Avalos  * written permission.
171077d0bdSPeter Avalos  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
181077d0bdSPeter Avalos  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
191077d0bdSPeter Avalos  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
201077d0bdSPeter Avalos  *
213a289941SAaron LI  *  Optimization module for BPF code intermediate representation.
221077d0bdSPeter Avalos  */
231077d0bdSPeter Avalos 
241077d0bdSPeter Avalos #ifdef HAVE_CONFIG_H
253a289941SAaron LI #include <config.h>
261077d0bdSPeter Avalos #endif
271077d0bdSPeter Avalos 
283a289941SAaron LI #include <pcap-types.h>
29a85e14b0SPeter Avalos 
301077d0bdSPeter Avalos #include <stdio.h>
311077d0bdSPeter Avalos #include <stdlib.h>
321077d0bdSPeter Avalos #include <memory.h>
333a289941SAaron LI #include <setjmp.h>
341077d0bdSPeter Avalos #include <string.h>
351077d0bdSPeter Avalos 
361077d0bdSPeter Avalos #include <errno.h>
371077d0bdSPeter Avalos 
381077d0bdSPeter Avalos #include "pcap-int.h"
391077d0bdSPeter Avalos 
401077d0bdSPeter Avalos #include "gencode.h"
413a289941SAaron LI #include "optimize.h"
421077d0bdSPeter Avalos 
431077d0bdSPeter Avalos #ifdef HAVE_OS_PROTO_H
441077d0bdSPeter Avalos #include "os-proto.h"
451077d0bdSPeter Avalos #endif
461077d0bdSPeter Avalos 
471077d0bdSPeter Avalos #ifdef BDEBUG
4897a9217aSAntonio Huete Jimenez /*
493a289941SAaron LI  * The internal "debug printout" flag for the filter expression optimizer.
503a289941SAaron LI  * The code to print that stuff is present only if BDEBUG is defined, so
513a289941SAaron LI  * the flag, and the routine to set it, are defined only if BDEBUG is
523a289941SAaron LI  * defined.
5397a9217aSAntonio Huete Jimenez  */
543a289941SAaron LI static int pcap_optimizer_debug;
553a289941SAaron LI 
5697a9217aSAntonio Huete Jimenez /*
573a289941SAaron LI  * Routine to set that flag.
5897a9217aSAntonio Huete Jimenez  *
593a289941SAaron LI  * This is intended for libpcap developers, not for general use.
603a289941SAaron LI  * If you want to set these in a program, you'll have to declare this
613a289941SAaron LI  * routine yourself, with the appropriate DLL import attribute on Windows;
623a289941SAaron LI  * it's not declared in any header file, and won't be declared in any
633a289941SAaron LI  * header file provided by libpcap.
643a289941SAaron LI  */
653a289941SAaron LI PCAP_API void pcap_set_optimizer_debug(int value);
663a289941SAaron LI 
673a289941SAaron LI PCAP_API_DEF void
pcap_set_optimizer_debug(int value)683a289941SAaron LI pcap_set_optimizer_debug(int value)
693a289941SAaron LI {
703a289941SAaron LI 	pcap_optimizer_debug = value;
713a289941SAaron LI }
723a289941SAaron LI 
733a289941SAaron LI /*
743a289941SAaron LI  * The internal "print dot graph" flag for the filter expression optimizer.
753a289941SAaron LI  * The code to print that stuff is present only if BDEBUG is defined, so
763a289941SAaron LI  * the flag, and the routine to set it, are defined only if BDEBUG is
773a289941SAaron LI  * defined.
783a289941SAaron LI  */
793a289941SAaron LI static int pcap_print_dot_graph;
803a289941SAaron LI 
813a289941SAaron LI /*
823a289941SAaron LI  * Routine to set that flag.
833a289941SAaron LI  *
843a289941SAaron LI  * This is intended for libpcap developers, not for general use.
853a289941SAaron LI  * If you want to set these in a program, you'll have to declare this
863a289941SAaron LI  * routine yourself, with the appropriate DLL import attribute on Windows;
873a289941SAaron LI  * it's not declared in any header file, and won't be declared in any
883a289941SAaron LI  * header file provided by libpcap.
893a289941SAaron LI  */
903a289941SAaron LI PCAP_API void pcap_set_print_dot_graph(int value);
913a289941SAaron LI 
923a289941SAaron LI PCAP_API_DEF void
pcap_set_print_dot_graph(int value)933a289941SAaron LI pcap_set_print_dot_graph(int value)
943a289941SAaron LI {
953a289941SAaron LI 	pcap_print_dot_graph = value;
963a289941SAaron LI }
973a289941SAaron LI 
983a289941SAaron LI #endif
993a289941SAaron LI 
1003a289941SAaron LI /*
1013a289941SAaron LI  * lowest_set_bit().
1023a289941SAaron LI  *
1033a289941SAaron LI  * Takes a 32-bit integer as an argument.
1043a289941SAaron LI  *
1053a289941SAaron LI  * If handed a non-zero value, returns the index of the lowest set bit,
106*ea16f64eSAntonio Huete Jimenez  * counting upwards from zero.
1073a289941SAaron LI  *
1083a289941SAaron LI  * If handed zero, the results are platform- and compiler-dependent.
1093a289941SAaron LI  * Keep it out of the light, don't give it any water, don't feed it
1103a289941SAaron LI  * after midnight, and don't pass zero to it.
1113a289941SAaron LI  *
1123a289941SAaron LI  * This is the same as the count of trailing zeroes in the word.
1133a289941SAaron LI  */
1143a289941SAaron LI #if PCAP_IS_AT_LEAST_GNUC_VERSION(3,4)
1153a289941SAaron LI   /*
1163a289941SAaron LI    * GCC 3.4 and later; we have __builtin_ctz().
1173a289941SAaron LI    */
118*ea16f64eSAntonio Huete Jimenez   #define lowest_set_bit(mask) ((u_int)__builtin_ctz(mask))
1193a289941SAaron LI #elif defined(_MSC_VER)
1203a289941SAaron LI   /*
1213a289941SAaron LI    * Visual Studio; we support only 2005 and later, so use
1223a289941SAaron LI    * _BitScanForward().
1233a289941SAaron LI    */
1243a289941SAaron LI #include <intrin.h>
1253a289941SAaron LI 
1263a289941SAaron LI #ifndef __clang__
1273a289941SAaron LI #pragma intrinsic(_BitScanForward)
1283a289941SAaron LI #endif
1293a289941SAaron LI 
130*ea16f64eSAntonio Huete Jimenez static __forceinline u_int
lowest_set_bit(int mask)1313a289941SAaron LI lowest_set_bit(int mask)
1323a289941SAaron LI {
1333a289941SAaron LI 	unsigned long bit;
1343a289941SAaron LI 
1353a289941SAaron LI 	/*
1363a289941SAaron LI 	 * Don't sign-extend mask if long is longer than int.
1373a289941SAaron LI 	 * (It's currently not, in MSVC, even on 64-bit platforms, but....)
1383a289941SAaron LI 	 */
1393a289941SAaron LI 	if (_BitScanForward(&bit, (unsigned int)mask) == 0)
140*ea16f64eSAntonio Huete Jimenez 		abort();	/* mask is zero */
141*ea16f64eSAntonio Huete Jimenez 	return (u_int)bit;
1423a289941SAaron LI }
1433a289941SAaron LI #elif defined(MSDOS) && defined(__DJGPP__)
1443a289941SAaron LI   /*
1453a289941SAaron LI    * MS-DOS with DJGPP, which declares ffs() in <string.h>, which
1463a289941SAaron LI    * we've already included.
1473a289941SAaron LI    */
148*ea16f64eSAntonio Huete Jimenez   #define lowest_set_bit(mask)	((u_int)(ffs((mask)) - 1))
1493a289941SAaron LI #elif (defined(MSDOS) && defined(__WATCOMC__)) || defined(STRINGS_H_DECLARES_FFS)
1503a289941SAaron LI   /*
1513a289941SAaron LI    * MS-DOS with Watcom C, which has <strings.h> and declares ffs() there,
1523a289941SAaron LI    * or some other platform (UN*X conforming to a sufficient recent version
1533a289941SAaron LI    * of the Single UNIX Specification).
1543a289941SAaron LI    */
1553a289941SAaron LI   #include <strings.h>
156*ea16f64eSAntonio Huete Jimenez   #define lowest_set_bit(mask)	(u_int)((ffs((mask)) - 1))
1573a289941SAaron LI #else
1583a289941SAaron LI /*
1593a289941SAaron LI  * None of the above.
1603a289941SAaron LI  * Use a perfect-hash-function-based function.
16197a9217aSAntonio Huete Jimenez  */
162*ea16f64eSAntonio Huete Jimenez static u_int
lowest_set_bit(int mask)1633a289941SAaron LI lowest_set_bit(int mask)
16497a9217aSAntonio Huete Jimenez {
1653a289941SAaron LI 	unsigned int v = (unsigned int)mask;
16697a9217aSAntonio Huete Jimenez 
167*ea16f64eSAntonio Huete Jimenez 	static const u_int MultiplyDeBruijnBitPosition[32] = {
1683a289941SAaron LI 		0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
1693a289941SAaron LI 		31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
1703a289941SAaron LI 	};
1713a289941SAaron LI 
1723a289941SAaron LI 	/*
1733a289941SAaron LI 	 * We strip off all but the lowermost set bit (v & ~v),
1743a289941SAaron LI 	 * and perform a minimal perfect hash on it to look up the
1753a289941SAaron LI 	 * number of low-order zero bits in a table.
1763a289941SAaron LI 	 *
1773a289941SAaron LI 	 * See:
1783a289941SAaron LI 	 *
1793a289941SAaron LI 	 *	http://7ooo.mooo.com/text/ComputingTrailingZerosHOWTO.pdf
1803a289941SAaron LI 	 *
1813a289941SAaron LI 	 *	http://supertech.csail.mit.edu/papers/debruijn.pdf
1823a289941SAaron LI 	 */
1833a289941SAaron LI 	return (MultiplyDeBruijnBitPosition[((v & -v) * 0x077CB531U) >> 27]);
18497a9217aSAntonio Huete Jimenez }
185de0d3203SPeter Avalos #endif
186de0d3203SPeter Avalos 
1871077d0bdSPeter Avalos /*
1881077d0bdSPeter Avalos  * Represents a deleted instruction.
1891077d0bdSPeter Avalos  */
1901077d0bdSPeter Avalos #define NOP -1
1911077d0bdSPeter Avalos 
1921077d0bdSPeter Avalos /*
1931077d0bdSPeter Avalos  * Register numbers for use-def values.
1941077d0bdSPeter Avalos  * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory
1951077d0bdSPeter Avalos  * location.  A_ATOM is the accumulator and X_ATOM is the index
1961077d0bdSPeter Avalos  * register.
1971077d0bdSPeter Avalos  */
1981077d0bdSPeter Avalos #define A_ATOM BPF_MEMWORDS
1991077d0bdSPeter Avalos #define X_ATOM (BPF_MEMWORDS+1)
2001077d0bdSPeter Avalos 
2011077d0bdSPeter Avalos /*
2021077d0bdSPeter Avalos  * This define is used to represent *both* the accumulator and
2031077d0bdSPeter Avalos  * x register in use-def computations.
2041077d0bdSPeter Avalos  * Currently, the use-def code assumes only one definition per instruction.
2051077d0bdSPeter Avalos  */
2061077d0bdSPeter Avalos #define AX_ATOM N_ATOMS
2071077d0bdSPeter Avalos 
2081077d0bdSPeter Avalos /*
20997a9217aSAntonio Huete Jimenez  * These data structures are used in a Cocke and Shwarz style
21097a9217aSAntonio Huete Jimenez  * value numbering scheme.  Since the flowgraph is acyclic,
21197a9217aSAntonio Huete Jimenez  * exit values can be propagated from a node's predecessors
21297a9217aSAntonio Huete Jimenez  * provided it is uniquely defined.
21397a9217aSAntonio Huete Jimenez  */
21497a9217aSAntonio Huete Jimenez struct valnode {
21597a9217aSAntonio Huete Jimenez 	int code;
216*ea16f64eSAntonio Huete Jimenez 	bpf_u_int32 v0, v1;
217*ea16f64eSAntonio Huete Jimenez 	int val;		/* the value number */
21897a9217aSAntonio Huete Jimenez 	struct valnode *next;
21997a9217aSAntonio Huete Jimenez };
22097a9217aSAntonio Huete Jimenez 
22197a9217aSAntonio Huete Jimenez /* Integer constants mapped with the load immediate opcode. */
222*ea16f64eSAntonio Huete Jimenez #define K(i) F(opt_state, BPF_LD|BPF_IMM|BPF_W, i, 0U)
22397a9217aSAntonio Huete Jimenez 
22497a9217aSAntonio Huete Jimenez struct vmapinfo {
22597a9217aSAntonio Huete Jimenez 	int is_const;
226*ea16f64eSAntonio Huete Jimenez 	bpf_u_int32 const_val;
22797a9217aSAntonio Huete Jimenez };
22897a9217aSAntonio Huete Jimenez 
2293a289941SAaron LI typedef struct {
2303a289941SAaron LI 	/*
2313a289941SAaron LI 	 * Place to longjmp to on an error.
2323a289941SAaron LI 	 */
2333a289941SAaron LI 	jmp_buf top_ctx;
2343a289941SAaron LI 
2353a289941SAaron LI 	/*
2363a289941SAaron LI 	 * The buffer into which to put error message.
2373a289941SAaron LI 	 */
2383a289941SAaron LI 	char *errbuf;
2393a289941SAaron LI 
24097a9217aSAntonio Huete Jimenez 	/*
2411077d0bdSPeter Avalos 	 * A flag to indicate that further optimization is needed.
2421077d0bdSPeter Avalos 	 * Iterative passes are continued until a given pass yields no
243*ea16f64eSAntonio Huete Jimenez 	 * code simplification or branch movement.
2441077d0bdSPeter Avalos 	 */
24597a9217aSAntonio Huete Jimenez 	int done;
2461077d0bdSPeter Avalos 
247*ea16f64eSAntonio Huete Jimenez 	/*
248*ea16f64eSAntonio Huete Jimenez 	 * XXX - detect loops that do nothing but repeated AND/OR pullups
249*ea16f64eSAntonio Huete Jimenez 	 * and edge moves.
250*ea16f64eSAntonio Huete Jimenez 	 * If 100 passes in a row do nothing but that, treat that as a
251*ea16f64eSAntonio Huete Jimenez 	 * sign that we're in a loop that just shuffles in a cycle in
252*ea16f64eSAntonio Huete Jimenez 	 * which each pass just shuffles the code and we eventually
253*ea16f64eSAntonio Huete Jimenez 	 * get back to the original configuration.
254*ea16f64eSAntonio Huete Jimenez 	 *
255*ea16f64eSAntonio Huete Jimenez 	 * XXX - we need a non-heuristic way of detecting, or preventing,
256*ea16f64eSAntonio Huete Jimenez 	 * such a cycle.
257*ea16f64eSAntonio Huete Jimenez 	 */
258*ea16f64eSAntonio Huete Jimenez 	int non_branch_movement_performed;
259*ea16f64eSAntonio Huete Jimenez 
260*ea16f64eSAntonio Huete Jimenez 	u_int n_blocks;		/* number of blocks in the CFG; guaranteed to be > 0, as it's a RET instruction at a minimum */
2611077d0bdSPeter Avalos 	struct block **blocks;
262*ea16f64eSAntonio Huete Jimenez 	u_int n_edges;		/* twice n_blocks, so guaranteed to be > 0 */
2631077d0bdSPeter Avalos 	struct edge **edges;
2641077d0bdSPeter Avalos 
2651077d0bdSPeter Avalos 	/*
2661077d0bdSPeter Avalos 	 * A bit vector set representation of the dominators.
2671077d0bdSPeter Avalos 	 * We round up the set size to the next power of two.
2681077d0bdSPeter Avalos 	 */
269*ea16f64eSAntonio Huete Jimenez 	u_int nodewords;	/* number of 32-bit words for a bit vector of "number of nodes" bits; guaranteed to be > 0 */
270*ea16f64eSAntonio Huete Jimenez 	u_int edgewords;	/* number of 32-bit words for a bit vector of "number of edges" bits; guaranteed to be > 0 */
2711077d0bdSPeter Avalos 	struct block **levels;
2721077d0bdSPeter Avalos 	bpf_u_int32 *space;
27397a9217aSAntonio Huete Jimenez 
2741077d0bdSPeter Avalos #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
2751077d0bdSPeter Avalos /*
2761077d0bdSPeter Avalos  * True if a is in uset {p}
2771077d0bdSPeter Avalos  */
2781077d0bdSPeter Avalos #define SET_MEMBER(p, a) \
2793a289941SAaron LI ((p)[(unsigned)(a) / BITS_PER_WORD] & ((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD)))
2801077d0bdSPeter Avalos 
2811077d0bdSPeter Avalos /*
2821077d0bdSPeter Avalos  * Add 'a' to uset p.
2831077d0bdSPeter Avalos  */
2841077d0bdSPeter Avalos #define SET_INSERT(p, a) \
2853a289941SAaron LI (p)[(unsigned)(a) / BITS_PER_WORD] |= ((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD))
2861077d0bdSPeter Avalos 
2871077d0bdSPeter Avalos /*
2881077d0bdSPeter Avalos  * Delete 'a' from uset p.
2891077d0bdSPeter Avalos  */
2901077d0bdSPeter Avalos #define SET_DELETE(p, a) \
2913a289941SAaron LI (p)[(unsigned)(a) / BITS_PER_WORD] &= ~((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD))
2921077d0bdSPeter Avalos 
2931077d0bdSPeter Avalos /*
2941077d0bdSPeter Avalos  * a := a intersect b
295*ea16f64eSAntonio Huete Jimenez  * n must be guaranteed to be > 0
2961077d0bdSPeter Avalos  */
2971077d0bdSPeter Avalos #define SET_INTERSECT(a, b, n)\
2981077d0bdSPeter Avalos {\
2991077d0bdSPeter Avalos 	register bpf_u_int32 *_x = a, *_y = b;\
300*ea16f64eSAntonio Huete Jimenez 	register u_int _n = n;\
301*ea16f64eSAntonio Huete Jimenez 	do *_x++ &= *_y++; while (--_n != 0);\
3021077d0bdSPeter Avalos }
3031077d0bdSPeter Avalos 
3041077d0bdSPeter Avalos /*
3051077d0bdSPeter Avalos  * a := a - b
306*ea16f64eSAntonio Huete Jimenez  * n must be guaranteed to be > 0
3071077d0bdSPeter Avalos  */
3081077d0bdSPeter Avalos #define SET_SUBTRACT(a, b, n)\
3091077d0bdSPeter Avalos {\
3101077d0bdSPeter Avalos 	register bpf_u_int32 *_x = a, *_y = b;\
311*ea16f64eSAntonio Huete Jimenez 	register u_int _n = n;\
312*ea16f64eSAntonio Huete Jimenez 	do *_x++ &=~ *_y++; while (--_n != 0);\
3131077d0bdSPeter Avalos }
3141077d0bdSPeter Avalos 
3151077d0bdSPeter Avalos /*
3161077d0bdSPeter Avalos  * a := a union b
317*ea16f64eSAntonio Huete Jimenez  * n must be guaranteed to be > 0
3181077d0bdSPeter Avalos  */
3191077d0bdSPeter Avalos #define SET_UNION(a, b, n)\
3201077d0bdSPeter Avalos {\
3211077d0bdSPeter Avalos 	register bpf_u_int32 *_x = a, *_y = b;\
322*ea16f64eSAntonio Huete Jimenez 	register u_int _n = n;\
323*ea16f64eSAntonio Huete Jimenez 	do *_x++ |= *_y++; while (--_n != 0);\
3241077d0bdSPeter Avalos }
3251077d0bdSPeter Avalos 
32697a9217aSAntonio Huete Jimenez 	uset all_dom_sets;
32797a9217aSAntonio Huete Jimenez 	uset all_closure_sets;
32897a9217aSAntonio Huete Jimenez 	uset all_edge_sets;
32997a9217aSAntonio Huete Jimenez 
33097a9217aSAntonio Huete Jimenez #define MODULUS 213
33197a9217aSAntonio Huete Jimenez 	struct valnode *hashtbl[MODULUS];
332*ea16f64eSAntonio Huete Jimenez 	bpf_u_int32 curval;
333*ea16f64eSAntonio Huete Jimenez 	bpf_u_int32 maxval;
33497a9217aSAntonio Huete Jimenez 
33597a9217aSAntonio Huete Jimenez 	struct vmapinfo *vmap;
33697a9217aSAntonio Huete Jimenez 	struct valnode *vnode_base;
33797a9217aSAntonio Huete Jimenez 	struct valnode *next_vnode;
3383a289941SAaron LI } opt_state_t;
33997a9217aSAntonio Huete Jimenez 
34097a9217aSAntonio Huete Jimenez typedef struct {
34197a9217aSAntonio Huete Jimenez 	/*
3423a289941SAaron LI 	 * Place to longjmp to on an error.
3433a289941SAaron LI 	 */
3443a289941SAaron LI 	jmp_buf top_ctx;
3453a289941SAaron LI 
3463a289941SAaron LI 	/*
3473a289941SAaron LI 	 * The buffer into which to put error message.
3483a289941SAaron LI 	 */
3493a289941SAaron LI 	char *errbuf;
3503a289941SAaron LI 
3513a289941SAaron LI 	/*
35297a9217aSAntonio Huete Jimenez 	 * Some pointers used to convert the basic block form of the code,
35397a9217aSAntonio Huete Jimenez 	 * into the array form that BPF requires.  'fstart' will point to
35497a9217aSAntonio Huete Jimenez 	 * the malloc'd array while 'ftail' is used during the recursive
35597a9217aSAntonio Huete Jimenez 	 * traversal.
35697a9217aSAntonio Huete Jimenez 	 */
35797a9217aSAntonio Huete Jimenez 	struct bpf_insn *fstart;
35897a9217aSAntonio Huete Jimenez 	struct bpf_insn *ftail;
35997a9217aSAntonio Huete Jimenez } conv_state_t;
36097a9217aSAntonio Huete Jimenez 
3613a289941SAaron LI static void opt_init(opt_state_t *, struct icode *);
36297a9217aSAntonio Huete Jimenez static void opt_cleanup(opt_state_t *);
3633a289941SAaron LI static void PCAP_NORETURN opt_error(opt_state_t *, const char *, ...)
3643a289941SAaron LI     PCAP_PRINTFLIKE(2, 3);
36597a9217aSAntonio Huete Jimenez 
36697a9217aSAntonio Huete Jimenez static void intern_blocks(opt_state_t *, struct icode *);
36797a9217aSAntonio Huete Jimenez 
36897a9217aSAntonio Huete Jimenez static void find_inedges(opt_state_t *, struct block *);
36997a9217aSAntonio Huete Jimenez #ifdef BDEBUG
3703a289941SAaron LI static void opt_dump(opt_state_t *, struct icode *);
37197a9217aSAntonio Huete Jimenez #endif
3721077d0bdSPeter Avalos 
3731077d0bdSPeter Avalos #ifndef MAX
3741077d0bdSPeter Avalos #define MAX(a,b) ((a)>(b)?(a):(b))
3751077d0bdSPeter Avalos #endif
3761077d0bdSPeter Avalos 
3771077d0bdSPeter Avalos static void
find_levels_r(opt_state_t * opt_state,struct icode * ic,struct block * b)37897a9217aSAntonio Huete Jimenez find_levels_r(opt_state_t *opt_state, struct icode *ic, struct block *b)
3791077d0bdSPeter Avalos {
3801077d0bdSPeter Avalos 	int level;
3811077d0bdSPeter Avalos 
38297a9217aSAntonio Huete Jimenez 	if (isMarked(ic, b))
3831077d0bdSPeter Avalos 		return;
3841077d0bdSPeter Avalos 
38597a9217aSAntonio Huete Jimenez 	Mark(ic, b);
3861077d0bdSPeter Avalos 	b->link = 0;
3871077d0bdSPeter Avalos 
3881077d0bdSPeter Avalos 	if (JT(b)) {
38997a9217aSAntonio Huete Jimenez 		find_levels_r(opt_state, ic, JT(b));
39097a9217aSAntonio Huete Jimenez 		find_levels_r(opt_state, ic, JF(b));
3911077d0bdSPeter Avalos 		level = MAX(JT(b)->level, JF(b)->level) + 1;
3921077d0bdSPeter Avalos 	} else
3931077d0bdSPeter Avalos 		level = 0;
3941077d0bdSPeter Avalos 	b->level = level;
39597a9217aSAntonio Huete Jimenez 	b->link = opt_state->levels[level];
39697a9217aSAntonio Huete Jimenez 	opt_state->levels[level] = b;
3971077d0bdSPeter Avalos }
3981077d0bdSPeter Avalos 
3991077d0bdSPeter Avalos /*
4001077d0bdSPeter Avalos  * Level graph.  The levels go from 0 at the leaves to
40197a9217aSAntonio Huete Jimenez  * N_LEVELS at the root.  The opt_state->levels[] array points to the
4021077d0bdSPeter Avalos  * first node of the level list, whose elements are linked
4031077d0bdSPeter Avalos  * with the 'link' field of the struct block.
4041077d0bdSPeter Avalos  */
4051077d0bdSPeter Avalos static void
find_levels(opt_state_t * opt_state,struct icode * ic)40697a9217aSAntonio Huete Jimenez find_levels(opt_state_t *opt_state, struct icode *ic)
4071077d0bdSPeter Avalos {
40897a9217aSAntonio Huete Jimenez 	memset((char *)opt_state->levels, 0, opt_state->n_blocks * sizeof(*opt_state->levels));
40997a9217aSAntonio Huete Jimenez 	unMarkAll(ic);
41097a9217aSAntonio Huete Jimenez 	find_levels_r(opt_state, ic, ic->root);
4111077d0bdSPeter Avalos }
4121077d0bdSPeter Avalos 
4131077d0bdSPeter Avalos /*
4141077d0bdSPeter Avalos  * Find dominator relationships.
4151077d0bdSPeter Avalos  * Assumes graph has been leveled.
4161077d0bdSPeter Avalos  */
4171077d0bdSPeter Avalos static void
find_dom(opt_state_t * opt_state,struct block * root)41897a9217aSAntonio Huete Jimenez find_dom(opt_state_t *opt_state, struct block *root)
4191077d0bdSPeter Avalos {
420*ea16f64eSAntonio Huete Jimenez 	u_int i;
421*ea16f64eSAntonio Huete Jimenez 	int level;
4221077d0bdSPeter Avalos 	struct block *b;
4231077d0bdSPeter Avalos 	bpf_u_int32 *x;
4241077d0bdSPeter Avalos 
4251077d0bdSPeter Avalos 	/*
4261077d0bdSPeter Avalos 	 * Initialize sets to contain all nodes.
4271077d0bdSPeter Avalos 	 */
42897a9217aSAntonio Huete Jimenez 	x = opt_state->all_dom_sets;
429*ea16f64eSAntonio Huete Jimenez 	/*
430*ea16f64eSAntonio Huete Jimenez 	 * In opt_init(), we've made sure the product doesn't overflow.
431*ea16f64eSAntonio Huete Jimenez 	 */
43297a9217aSAntonio Huete Jimenez 	i = opt_state->n_blocks * opt_state->nodewords;
433*ea16f64eSAntonio Huete Jimenez 	while (i != 0) {
434*ea16f64eSAntonio Huete Jimenez 		--i;
4353a289941SAaron LI 		*x++ = 0xFFFFFFFFU;
436*ea16f64eSAntonio Huete Jimenez 	}
4371077d0bdSPeter Avalos 	/* Root starts off empty. */
438*ea16f64eSAntonio Huete Jimenez 	for (i = opt_state->nodewords; i != 0;) {
439*ea16f64eSAntonio Huete Jimenez 		--i;
4401077d0bdSPeter Avalos 		root->dom[i] = 0;
441*ea16f64eSAntonio Huete Jimenez 	}
4421077d0bdSPeter Avalos 
4431077d0bdSPeter Avalos 	/* root->level is the highest level no found. */
444*ea16f64eSAntonio Huete Jimenez 	for (level = root->level; level >= 0; --level) {
445*ea16f64eSAntonio Huete Jimenez 		for (b = opt_state->levels[level]; b; b = b->link) {
4461077d0bdSPeter Avalos 			SET_INSERT(b->dom, b->id);
4471077d0bdSPeter Avalos 			if (JT(b) == 0)
4481077d0bdSPeter Avalos 				continue;
44997a9217aSAntonio Huete Jimenez 			SET_INTERSECT(JT(b)->dom, b->dom, opt_state->nodewords);
45097a9217aSAntonio Huete Jimenez 			SET_INTERSECT(JF(b)->dom, b->dom, opt_state->nodewords);
4511077d0bdSPeter Avalos 		}
4521077d0bdSPeter Avalos 	}
4531077d0bdSPeter Avalos }
4541077d0bdSPeter Avalos 
4551077d0bdSPeter Avalos static void
propedom(opt_state_t * opt_state,struct edge * ep)45697a9217aSAntonio Huete Jimenez propedom(opt_state_t *opt_state, struct edge *ep)
4571077d0bdSPeter Avalos {
4581077d0bdSPeter Avalos 	SET_INSERT(ep->edom, ep->id);
4591077d0bdSPeter Avalos 	if (ep->succ) {
46097a9217aSAntonio Huete Jimenez 		SET_INTERSECT(ep->succ->et.edom, ep->edom, opt_state->edgewords);
46197a9217aSAntonio Huete Jimenez 		SET_INTERSECT(ep->succ->ef.edom, ep->edom, opt_state->edgewords);
4621077d0bdSPeter Avalos 	}
4631077d0bdSPeter Avalos }
4641077d0bdSPeter Avalos 
4651077d0bdSPeter Avalos /*
4661077d0bdSPeter Avalos  * Compute edge dominators.
4671077d0bdSPeter Avalos  * Assumes graph has been leveled and predecessors established.
4681077d0bdSPeter Avalos  */
4691077d0bdSPeter Avalos static void
find_edom(opt_state_t * opt_state,struct block * root)47097a9217aSAntonio Huete Jimenez find_edom(opt_state_t *opt_state, struct block *root)
4711077d0bdSPeter Avalos {
472*ea16f64eSAntonio Huete Jimenez 	u_int i;
4731077d0bdSPeter Avalos 	uset x;
474*ea16f64eSAntonio Huete Jimenez 	int level;
4751077d0bdSPeter Avalos 	struct block *b;
4761077d0bdSPeter Avalos 
47797a9217aSAntonio Huete Jimenez 	x = opt_state->all_edge_sets;
478*ea16f64eSAntonio Huete Jimenez 	/*
479*ea16f64eSAntonio Huete Jimenez 	 * In opt_init(), we've made sure the product doesn't overflow.
480*ea16f64eSAntonio Huete Jimenez 	 */
481*ea16f64eSAntonio Huete Jimenez 	for (i = opt_state->n_edges * opt_state->edgewords; i != 0; ) {
482*ea16f64eSAntonio Huete Jimenez 		--i;
4833a289941SAaron LI 		x[i] = 0xFFFFFFFFU;
484*ea16f64eSAntonio Huete Jimenez 	}
4851077d0bdSPeter Avalos 
4861077d0bdSPeter Avalos 	/* root->level is the highest level no found. */
48797a9217aSAntonio Huete Jimenez 	memset(root->et.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
48897a9217aSAntonio Huete Jimenez 	memset(root->ef.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
489*ea16f64eSAntonio Huete Jimenez 	for (level = root->level; level >= 0; --level) {
490*ea16f64eSAntonio Huete Jimenez 		for (b = opt_state->levels[level]; b != 0; b = b->link) {
49197a9217aSAntonio Huete Jimenez 			propedom(opt_state, &b->et);
49297a9217aSAntonio Huete Jimenez 			propedom(opt_state, &b->ef);
4931077d0bdSPeter Avalos 		}
4941077d0bdSPeter Avalos 	}
4951077d0bdSPeter Avalos }
4961077d0bdSPeter Avalos 
4971077d0bdSPeter Avalos /*
4981077d0bdSPeter Avalos  * Find the backwards transitive closure of the flow graph.  These sets
4991077d0bdSPeter Avalos  * are backwards in the sense that we find the set of nodes that reach
5001077d0bdSPeter Avalos  * a given node, not the set of nodes that can be reached by a node.
5011077d0bdSPeter Avalos  *
5021077d0bdSPeter Avalos  * Assumes graph has been leveled.
5031077d0bdSPeter Avalos  */
5041077d0bdSPeter Avalos static void
find_closure(opt_state_t * opt_state,struct block * root)50597a9217aSAntonio Huete Jimenez find_closure(opt_state_t *opt_state, struct block *root)
5061077d0bdSPeter Avalos {
507*ea16f64eSAntonio Huete Jimenez 	int level;
5081077d0bdSPeter Avalos 	struct block *b;
5091077d0bdSPeter Avalos 
5101077d0bdSPeter Avalos 	/*
5111077d0bdSPeter Avalos 	 * Initialize sets to contain no nodes.
5121077d0bdSPeter Avalos 	 */
51397a9217aSAntonio Huete Jimenez 	memset((char *)opt_state->all_closure_sets, 0,
51497a9217aSAntonio Huete Jimenez 	      opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->all_closure_sets));
5151077d0bdSPeter Avalos 
5161077d0bdSPeter Avalos 	/* root->level is the highest level no found. */
517*ea16f64eSAntonio Huete Jimenez 	for (level = root->level; level >= 0; --level) {
518*ea16f64eSAntonio Huete Jimenez 		for (b = opt_state->levels[level]; b; b = b->link) {
5191077d0bdSPeter Avalos 			SET_INSERT(b->closure, b->id);
5201077d0bdSPeter Avalos 			if (JT(b) == 0)
5211077d0bdSPeter Avalos 				continue;
52297a9217aSAntonio Huete Jimenez 			SET_UNION(JT(b)->closure, b->closure, opt_state->nodewords);
52397a9217aSAntonio Huete Jimenez 			SET_UNION(JF(b)->closure, b->closure, opt_state->nodewords);
5241077d0bdSPeter Avalos 		}
5251077d0bdSPeter Avalos 	}
5261077d0bdSPeter Avalos }
5271077d0bdSPeter Avalos 
5281077d0bdSPeter Avalos /*
529*ea16f64eSAntonio Huete Jimenez  * Return the register number that is used by s.
530*ea16f64eSAntonio Huete Jimenez  *
531*ea16f64eSAntonio Huete Jimenez  * Returns ATOM_A if A is used, ATOM_X if X is used, AX_ATOM if both A and X
532*ea16f64eSAntonio Huete Jimenez  * are used, the scratch memory location's number if a scratch memory
533*ea16f64eSAntonio Huete Jimenez  * location is used (e.g., 0 for M[0]), or -1 if none of those are used.
5341077d0bdSPeter Avalos  *
5351077d0bdSPeter Avalos  * The implementation should probably change to an array access.
5361077d0bdSPeter Avalos  */
5371077d0bdSPeter Avalos static int
atomuse(struct stmt * s)5380e381983SMatthew Dillon atomuse(struct stmt *s)
5391077d0bdSPeter Avalos {
5401077d0bdSPeter Avalos 	register int c = s->code;
5411077d0bdSPeter Avalos 
5421077d0bdSPeter Avalos 	if (c == NOP)
5431077d0bdSPeter Avalos 		return -1;
5441077d0bdSPeter Avalos 
5451077d0bdSPeter Avalos 	switch (BPF_CLASS(c)) {
5461077d0bdSPeter Avalos 
5471077d0bdSPeter Avalos 	case BPF_RET:
5481077d0bdSPeter Avalos 		return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
5491077d0bdSPeter Avalos 			(BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
5501077d0bdSPeter Avalos 
5511077d0bdSPeter Avalos 	case BPF_LD:
5521077d0bdSPeter Avalos 	case BPF_LDX:
553*ea16f64eSAntonio Huete Jimenez 		/*
554*ea16f64eSAntonio Huete Jimenez 		 * As there are fewer than 2^31 memory locations,
555*ea16f64eSAntonio Huete Jimenez 		 * s->k should be convertible to int without problems.
556*ea16f64eSAntonio Huete Jimenez 		 */
5571077d0bdSPeter Avalos 		return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
558*ea16f64eSAntonio Huete Jimenez 			(BPF_MODE(c) == BPF_MEM) ? (int)s->k : -1;
5591077d0bdSPeter Avalos 
5601077d0bdSPeter Avalos 	case BPF_ST:
5611077d0bdSPeter Avalos 		return A_ATOM;
5621077d0bdSPeter Avalos 
5631077d0bdSPeter Avalos 	case BPF_STX:
5641077d0bdSPeter Avalos 		return X_ATOM;
5651077d0bdSPeter Avalos 
5661077d0bdSPeter Avalos 	case BPF_JMP:
5671077d0bdSPeter Avalos 	case BPF_ALU:
5681077d0bdSPeter Avalos 		if (BPF_SRC(c) == BPF_X)
5691077d0bdSPeter Avalos 			return AX_ATOM;
5701077d0bdSPeter Avalos 		return A_ATOM;
5711077d0bdSPeter Avalos 
5721077d0bdSPeter Avalos 	case BPF_MISC:
5731077d0bdSPeter Avalos 		return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
5741077d0bdSPeter Avalos 	}
5751077d0bdSPeter Avalos 	abort();
5761077d0bdSPeter Avalos 	/* NOTREACHED */
5771077d0bdSPeter Avalos }
5781077d0bdSPeter Avalos 
5791077d0bdSPeter Avalos /*
5801077d0bdSPeter Avalos  * Return the register number that is defined by 's'.  We assume that
5811077d0bdSPeter Avalos  * a single stmt cannot define more than one register.  If no register
5821077d0bdSPeter Avalos  * is defined, return -1.
5831077d0bdSPeter Avalos  *
5841077d0bdSPeter Avalos  * The implementation should probably change to an array access.
5851077d0bdSPeter Avalos  */
5861077d0bdSPeter Avalos static int
atomdef(struct stmt * s)5870e381983SMatthew Dillon atomdef(struct stmt *s)
5881077d0bdSPeter Avalos {
5891077d0bdSPeter Avalos 	if (s->code == NOP)
5901077d0bdSPeter Avalos 		return -1;
5911077d0bdSPeter Avalos 
5921077d0bdSPeter Avalos 	switch (BPF_CLASS(s->code)) {
5931077d0bdSPeter Avalos 
5941077d0bdSPeter Avalos 	case BPF_LD:
5951077d0bdSPeter Avalos 	case BPF_ALU:
5961077d0bdSPeter Avalos 		return A_ATOM;
5971077d0bdSPeter Avalos 
5981077d0bdSPeter Avalos 	case BPF_LDX:
5991077d0bdSPeter Avalos 		return X_ATOM;
6001077d0bdSPeter Avalos 
6011077d0bdSPeter Avalos 	case BPF_ST:
6021077d0bdSPeter Avalos 	case BPF_STX:
6031077d0bdSPeter Avalos 		return s->k;
6041077d0bdSPeter Avalos 
6051077d0bdSPeter Avalos 	case BPF_MISC:
6061077d0bdSPeter Avalos 		return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
6071077d0bdSPeter Avalos 	}
6081077d0bdSPeter Avalos 	return -1;
6091077d0bdSPeter Avalos }
6101077d0bdSPeter Avalos 
6111077d0bdSPeter Avalos /*
6121077d0bdSPeter Avalos  * Compute the sets of registers used, defined, and killed by 'b'.
6131077d0bdSPeter Avalos  *
6141077d0bdSPeter Avalos  * "Used" means that a statement in 'b' uses the register before any
6151077d0bdSPeter Avalos  * statement in 'b' defines it, i.e. it uses the value left in
6161077d0bdSPeter Avalos  * that register by a predecessor block of this block.
6171077d0bdSPeter Avalos  * "Defined" means that a statement in 'b' defines it.
6181077d0bdSPeter Avalos  * "Killed" means that a statement in 'b' defines it before any
6191077d0bdSPeter Avalos  * statement in 'b' uses it, i.e. it kills the value left in that
6201077d0bdSPeter Avalos  * register by a predecessor block of this block.
6211077d0bdSPeter Avalos  */
6221077d0bdSPeter Avalos static void
compute_local_ud(struct block * b)6230e381983SMatthew Dillon compute_local_ud(struct block *b)
6241077d0bdSPeter Avalos {
6251077d0bdSPeter Avalos 	struct slist *s;
62697a9217aSAntonio Huete Jimenez 	atomset def = 0, use = 0, killed = 0;
6271077d0bdSPeter Avalos 	int atom;
6281077d0bdSPeter Avalos 
6291077d0bdSPeter Avalos 	for (s = b->stmts; s; s = s->next) {
6301077d0bdSPeter Avalos 		if (s->s.code == NOP)
6311077d0bdSPeter Avalos 			continue;
6321077d0bdSPeter Avalos 		atom = atomuse(&s->s);
6331077d0bdSPeter Avalos 		if (atom >= 0) {
6341077d0bdSPeter Avalos 			if (atom == AX_ATOM) {
6351077d0bdSPeter Avalos 				if (!ATOMELEM(def, X_ATOM))
6361077d0bdSPeter Avalos 					use |= ATOMMASK(X_ATOM);
6371077d0bdSPeter Avalos 				if (!ATOMELEM(def, A_ATOM))
6381077d0bdSPeter Avalos 					use |= ATOMMASK(A_ATOM);
6391077d0bdSPeter Avalos 			}
6401077d0bdSPeter Avalos 			else if (atom < N_ATOMS) {
6411077d0bdSPeter Avalos 				if (!ATOMELEM(def, atom))
6421077d0bdSPeter Avalos 					use |= ATOMMASK(atom);
6431077d0bdSPeter Avalos 			}
6441077d0bdSPeter Avalos 			else
6451077d0bdSPeter Avalos 				abort();
6461077d0bdSPeter Avalos 		}
6471077d0bdSPeter Avalos 		atom = atomdef(&s->s);
6481077d0bdSPeter Avalos 		if (atom >= 0) {
6491077d0bdSPeter Avalos 			if (!ATOMELEM(use, atom))
65097a9217aSAntonio Huete Jimenez 				killed |= ATOMMASK(atom);
6511077d0bdSPeter Avalos 			def |= ATOMMASK(atom);
6521077d0bdSPeter Avalos 		}
6531077d0bdSPeter Avalos 	}
6541077d0bdSPeter Avalos 	if (BPF_CLASS(b->s.code) == BPF_JMP) {
6551077d0bdSPeter Avalos 		/*
6561077d0bdSPeter Avalos 		 * XXX - what about RET?
6571077d0bdSPeter Avalos 		 */
6581077d0bdSPeter Avalos 		atom = atomuse(&b->s);
6591077d0bdSPeter Avalos 		if (atom >= 0) {
6601077d0bdSPeter Avalos 			if (atom == AX_ATOM) {
6611077d0bdSPeter Avalos 				if (!ATOMELEM(def, X_ATOM))
6621077d0bdSPeter Avalos 					use |= ATOMMASK(X_ATOM);
6631077d0bdSPeter Avalos 				if (!ATOMELEM(def, A_ATOM))
6641077d0bdSPeter Avalos 					use |= ATOMMASK(A_ATOM);
6651077d0bdSPeter Avalos 			}
6661077d0bdSPeter Avalos 			else if (atom < N_ATOMS) {
6671077d0bdSPeter Avalos 				if (!ATOMELEM(def, atom))
6681077d0bdSPeter Avalos 					use |= ATOMMASK(atom);
6691077d0bdSPeter Avalos 			}
6701077d0bdSPeter Avalos 			else
6711077d0bdSPeter Avalos 				abort();
6721077d0bdSPeter Avalos 		}
6731077d0bdSPeter Avalos 	}
6741077d0bdSPeter Avalos 
6751077d0bdSPeter Avalos 	b->def = def;
67697a9217aSAntonio Huete Jimenez 	b->kill = killed;
6771077d0bdSPeter Avalos 	b->in_use = use;
6781077d0bdSPeter Avalos }
6791077d0bdSPeter Avalos 
6801077d0bdSPeter Avalos /*
6811077d0bdSPeter Avalos  * Assume graph is already leveled.
6821077d0bdSPeter Avalos  */
6831077d0bdSPeter Avalos static void
find_ud(opt_state_t * opt_state,struct block * root)68497a9217aSAntonio Huete Jimenez find_ud(opt_state_t *opt_state, struct block *root)
6851077d0bdSPeter Avalos {
6861077d0bdSPeter Avalos 	int i, maxlevel;
6871077d0bdSPeter Avalos 	struct block *p;
6881077d0bdSPeter Avalos 
6891077d0bdSPeter Avalos 	/*
6901077d0bdSPeter Avalos 	 * root->level is the highest level no found;
6911077d0bdSPeter Avalos 	 * count down from there.
6921077d0bdSPeter Avalos 	 */
6931077d0bdSPeter Avalos 	maxlevel = root->level;
6941077d0bdSPeter Avalos 	for (i = maxlevel; i >= 0; --i)
69597a9217aSAntonio Huete Jimenez 		for (p = opt_state->levels[i]; p; p = p->link) {
6961077d0bdSPeter Avalos 			compute_local_ud(p);
6971077d0bdSPeter Avalos 			p->out_use = 0;
6981077d0bdSPeter Avalos 		}
6991077d0bdSPeter Avalos 
7001077d0bdSPeter Avalos 	for (i = 1; i <= maxlevel; ++i) {
70197a9217aSAntonio Huete Jimenez 		for (p = opt_state->levels[i]; p; p = p->link) {
7021077d0bdSPeter Avalos 			p->out_use |= JT(p)->in_use | JF(p)->in_use;
7031077d0bdSPeter Avalos 			p->in_use |= p->out_use &~ p->kill;
7041077d0bdSPeter Avalos 		}
7051077d0bdSPeter Avalos 	}
7061077d0bdSPeter Avalos }
7071077d0bdSPeter Avalos static void
init_val(opt_state_t * opt_state)70897a9217aSAntonio Huete Jimenez init_val(opt_state_t *opt_state)
7091077d0bdSPeter Avalos {
71097a9217aSAntonio Huete Jimenez 	opt_state->curval = 0;
71197a9217aSAntonio Huete Jimenez 	opt_state->next_vnode = opt_state->vnode_base;
71297a9217aSAntonio Huete Jimenez 	memset((char *)opt_state->vmap, 0, opt_state->maxval * sizeof(*opt_state->vmap));
71397a9217aSAntonio Huete Jimenez 	memset((char *)opt_state->hashtbl, 0, sizeof opt_state->hashtbl);
7141077d0bdSPeter Avalos }
7151077d0bdSPeter Avalos 
716*ea16f64eSAntonio Huete Jimenez /*
717*ea16f64eSAntonio Huete Jimenez  * Because we really don't have an IR, this stuff is a little messy.
718*ea16f64eSAntonio Huete Jimenez  *
719*ea16f64eSAntonio Huete Jimenez  * This routine looks in the table of existing value number for a value
720*ea16f64eSAntonio Huete Jimenez  * with generated from an operation with the specified opcode and
721*ea16f64eSAntonio Huete Jimenez  * the specified values.  If it finds it, it returns its value number,
722*ea16f64eSAntonio Huete Jimenez  * otherwise it makes a new entry in the table and returns the
723*ea16f64eSAntonio Huete Jimenez  * value number of that entry.
724*ea16f64eSAntonio Huete Jimenez  */
725*ea16f64eSAntonio Huete Jimenez static bpf_u_int32
F(opt_state_t * opt_state,int code,bpf_u_int32 v0,bpf_u_int32 v1)726*ea16f64eSAntonio Huete Jimenez F(opt_state_t *opt_state, int code, bpf_u_int32 v0, bpf_u_int32 v1)
7271077d0bdSPeter Avalos {
7281077d0bdSPeter Avalos 	u_int hash;
729*ea16f64eSAntonio Huete Jimenez 	bpf_u_int32 val;
7301077d0bdSPeter Avalos 	struct valnode *p;
7311077d0bdSPeter Avalos 
732*ea16f64eSAntonio Huete Jimenez 	hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8);
7331077d0bdSPeter Avalos 	hash %= MODULUS;
7341077d0bdSPeter Avalos 
73597a9217aSAntonio Huete Jimenez 	for (p = opt_state->hashtbl[hash]; p; p = p->next)
7361077d0bdSPeter Avalos 		if (p->code == code && p->v0 == v0 && p->v1 == v1)
7371077d0bdSPeter Avalos 			return p->val;
7381077d0bdSPeter Avalos 
739*ea16f64eSAntonio Huete Jimenez 	/*
740*ea16f64eSAntonio Huete Jimenez 	 * Not found.  Allocate a new value, and assign it a new
741*ea16f64eSAntonio Huete Jimenez 	 * value number.
742*ea16f64eSAntonio Huete Jimenez 	 *
743*ea16f64eSAntonio Huete Jimenez 	 * opt_state->curval starts out as 0, which means VAL_UNKNOWN; we
744*ea16f64eSAntonio Huete Jimenez 	 * increment it before using it as the new value number, which
745*ea16f64eSAntonio Huete Jimenez 	 * means we never assign VAL_UNKNOWN.
746*ea16f64eSAntonio Huete Jimenez 	 *
747*ea16f64eSAntonio Huete Jimenez 	 * XXX - unless we overflow, but we probably won't have 2^32-1
748*ea16f64eSAntonio Huete Jimenez 	 * values; we treat 32 bits as effectively infinite.
749*ea16f64eSAntonio Huete Jimenez 	 */
75097a9217aSAntonio Huete Jimenez 	val = ++opt_state->curval;
7511077d0bdSPeter Avalos 	if (BPF_MODE(code) == BPF_IMM &&
7521077d0bdSPeter Avalos 	    (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
75397a9217aSAntonio Huete Jimenez 		opt_state->vmap[val].const_val = v0;
75497a9217aSAntonio Huete Jimenez 		opt_state->vmap[val].is_const = 1;
7551077d0bdSPeter Avalos 	}
75697a9217aSAntonio Huete Jimenez 	p = opt_state->next_vnode++;
7571077d0bdSPeter Avalos 	p->val = val;
7581077d0bdSPeter Avalos 	p->code = code;
7591077d0bdSPeter Avalos 	p->v0 = v0;
7601077d0bdSPeter Avalos 	p->v1 = v1;
76197a9217aSAntonio Huete Jimenez 	p->next = opt_state->hashtbl[hash];
76297a9217aSAntonio Huete Jimenez 	opt_state->hashtbl[hash] = p;
7631077d0bdSPeter Avalos 
7641077d0bdSPeter Avalos 	return val;
7651077d0bdSPeter Avalos }
7661077d0bdSPeter Avalos 
7671077d0bdSPeter Avalos static inline void
vstore(struct stmt * s,bpf_u_int32 * valp,bpf_u_int32 newval,int alter)768*ea16f64eSAntonio Huete Jimenez vstore(struct stmt *s, bpf_u_int32 *valp, bpf_u_int32 newval, int alter)
7691077d0bdSPeter Avalos {
7703a289941SAaron LI 	if (alter && newval != VAL_UNKNOWN && *valp == newval)
7711077d0bdSPeter Avalos 		s->code = NOP;
7721077d0bdSPeter Avalos 	else
7731077d0bdSPeter Avalos 		*valp = newval;
7741077d0bdSPeter Avalos }
7751077d0bdSPeter Avalos 
77697a9217aSAntonio Huete Jimenez /*
77797a9217aSAntonio Huete Jimenez  * Do constant-folding on binary operators.
77897a9217aSAntonio Huete Jimenez  * (Unary operators are handled elsewhere.)
77997a9217aSAntonio Huete Jimenez  */
7801077d0bdSPeter Avalos static void
fold_op(opt_state_t * opt_state,struct stmt * s,bpf_u_int32 v0,bpf_u_int32 v1)781*ea16f64eSAntonio Huete Jimenez fold_op(opt_state_t *opt_state, struct stmt *s, bpf_u_int32 v0, bpf_u_int32 v1)
7821077d0bdSPeter Avalos {
7831077d0bdSPeter Avalos 	bpf_u_int32 a, b;
7841077d0bdSPeter Avalos 
78597a9217aSAntonio Huete Jimenez 	a = opt_state->vmap[v0].const_val;
78697a9217aSAntonio Huete Jimenez 	b = opt_state->vmap[v1].const_val;
7871077d0bdSPeter Avalos 
7881077d0bdSPeter Avalos 	switch (BPF_OP(s->code)) {
7891077d0bdSPeter Avalos 	case BPF_ADD:
7901077d0bdSPeter Avalos 		a += b;
7911077d0bdSPeter Avalos 		break;
7921077d0bdSPeter Avalos 
7931077d0bdSPeter Avalos 	case BPF_SUB:
7941077d0bdSPeter Avalos 		a -= b;
7951077d0bdSPeter Avalos 		break;
7961077d0bdSPeter Avalos 
7971077d0bdSPeter Avalos 	case BPF_MUL:
7981077d0bdSPeter Avalos 		a *= b;
7991077d0bdSPeter Avalos 		break;
8001077d0bdSPeter Avalos 
8011077d0bdSPeter Avalos 	case BPF_DIV:
8021077d0bdSPeter Avalos 		if (b == 0)
8033a289941SAaron LI 			opt_error(opt_state, "division by zero");
8041077d0bdSPeter Avalos 		a /= b;
8051077d0bdSPeter Avalos 		break;
8061077d0bdSPeter Avalos 
80797a9217aSAntonio Huete Jimenez 	case BPF_MOD:
80897a9217aSAntonio Huete Jimenez 		if (b == 0)
8093a289941SAaron LI 			opt_error(opt_state, "modulus by zero");
81097a9217aSAntonio Huete Jimenez 		a %= b;
81197a9217aSAntonio Huete Jimenez 		break;
81297a9217aSAntonio Huete Jimenez 
8131077d0bdSPeter Avalos 	case BPF_AND:
8141077d0bdSPeter Avalos 		a &= b;
8151077d0bdSPeter Avalos 		break;
8161077d0bdSPeter Avalos 
8171077d0bdSPeter Avalos 	case BPF_OR:
8181077d0bdSPeter Avalos 		a |= b;
8191077d0bdSPeter Avalos 		break;
8201077d0bdSPeter Avalos 
82197a9217aSAntonio Huete Jimenez 	case BPF_XOR:
82297a9217aSAntonio Huete Jimenez 		a ^= b;
82397a9217aSAntonio Huete Jimenez 		break;
82497a9217aSAntonio Huete Jimenez 
8251077d0bdSPeter Avalos 	case BPF_LSH:
8263a289941SAaron LI 		/*
8273a289941SAaron LI 		 * A left shift of more than the width of the type
8283a289941SAaron LI 		 * is undefined in C; we'll just treat it as shifting
8293a289941SAaron LI 		 * all the bits out.
8303a289941SAaron LI 		 *
8313a289941SAaron LI 		 * XXX - the BPF interpreter doesn't check for this,
8323a289941SAaron LI 		 * so its behavior is dependent on the behavior of
8333a289941SAaron LI 		 * the processor on which it's running.  There are
8343a289941SAaron LI 		 * processors on which it shifts all the bits out
8353a289941SAaron LI 		 * and processors on which it does no shift.
8363a289941SAaron LI 		 */
8373a289941SAaron LI 		if (b < 32)
8381077d0bdSPeter Avalos 			a <<= b;
8393a289941SAaron LI 		else
8403a289941SAaron LI 			a = 0;
8411077d0bdSPeter Avalos 		break;
8421077d0bdSPeter Avalos 
8431077d0bdSPeter Avalos 	case BPF_RSH:
8443a289941SAaron LI 		/*
8453a289941SAaron LI 		 * A right shift of more than the width of the type
8463a289941SAaron LI 		 * is undefined in C; we'll just treat it as shifting
8473a289941SAaron LI 		 * all the bits out.
8483a289941SAaron LI 		 *
8493a289941SAaron LI 		 * XXX - the BPF interpreter doesn't check for this,
8503a289941SAaron LI 		 * so its behavior is dependent on the behavior of
8513a289941SAaron LI 		 * the processor on which it's running.  There are
8523a289941SAaron LI 		 * processors on which it shifts all the bits out
8533a289941SAaron LI 		 * and processors on which it does no shift.
8543a289941SAaron LI 		 */
8553a289941SAaron LI 		if (b < 32)
8561077d0bdSPeter Avalos 			a >>= b;
8573a289941SAaron LI 		else
8583a289941SAaron LI 			a = 0;
8591077d0bdSPeter Avalos 		break;
8601077d0bdSPeter Avalos 
8611077d0bdSPeter Avalos 	default:
8621077d0bdSPeter Avalos 		abort();
8631077d0bdSPeter Avalos 	}
8641077d0bdSPeter Avalos 	s->k = a;
8651077d0bdSPeter Avalos 	s->code = BPF_LD|BPF_IMM;
866*ea16f64eSAntonio Huete Jimenez 	/*
867*ea16f64eSAntonio Huete Jimenez 	 * XXX - optimizer loop detection.
868*ea16f64eSAntonio Huete Jimenez 	 */
869*ea16f64eSAntonio Huete Jimenez 	opt_state->non_branch_movement_performed = 1;
87097a9217aSAntonio Huete Jimenez 	opt_state->done = 0;
8711077d0bdSPeter Avalos }
8721077d0bdSPeter Avalos 
8731077d0bdSPeter Avalos static inline struct slist *
this_op(struct slist * s)8740e381983SMatthew Dillon this_op(struct slist *s)
8751077d0bdSPeter Avalos {
8761077d0bdSPeter Avalos 	while (s != 0 && s->s.code == NOP)
8771077d0bdSPeter Avalos 		s = s->next;
8781077d0bdSPeter Avalos 	return s;
8791077d0bdSPeter Avalos }
8801077d0bdSPeter Avalos 
8811077d0bdSPeter Avalos static void
opt_not(struct block * b)8820e381983SMatthew Dillon opt_not(struct block *b)
8831077d0bdSPeter Avalos {
8841077d0bdSPeter Avalos 	struct block *tmp = JT(b);
8851077d0bdSPeter Avalos 
8861077d0bdSPeter Avalos 	JT(b) = JF(b);
8871077d0bdSPeter Avalos 	JF(b) = tmp;
8881077d0bdSPeter Avalos }
8891077d0bdSPeter Avalos 
8901077d0bdSPeter Avalos static void
opt_peep(opt_state_t * opt_state,struct block * b)89197a9217aSAntonio Huete Jimenez opt_peep(opt_state_t *opt_state, struct block *b)
8921077d0bdSPeter Avalos {
8931077d0bdSPeter Avalos 	struct slist *s;
8941077d0bdSPeter Avalos 	struct slist *next, *last;
895*ea16f64eSAntonio Huete Jimenez 	bpf_u_int32 val;
8961077d0bdSPeter Avalos 
8971077d0bdSPeter Avalos 	s = b->stmts;
8981077d0bdSPeter Avalos 	if (s == 0)
8991077d0bdSPeter Avalos 		return;
9001077d0bdSPeter Avalos 
9011077d0bdSPeter Avalos 	last = s;
9021077d0bdSPeter Avalos 	for (/*empty*/; /*empty*/; s = next) {
9031077d0bdSPeter Avalos 		/*
9041077d0bdSPeter Avalos 		 * Skip over nops.
9051077d0bdSPeter Avalos 		 */
9061077d0bdSPeter Avalos 		s = this_op(s);
9071077d0bdSPeter Avalos 		if (s == 0)
9081077d0bdSPeter Avalos 			break;	/* nothing left in the block */
9091077d0bdSPeter Avalos 
9101077d0bdSPeter Avalos 		/*
9111077d0bdSPeter Avalos 		 * Find the next real instruction after that one
9121077d0bdSPeter Avalos 		 * (skipping nops).
9131077d0bdSPeter Avalos 		 */
9141077d0bdSPeter Avalos 		next = this_op(s->next);
9151077d0bdSPeter Avalos 		if (next == 0)
9161077d0bdSPeter Avalos 			break;	/* no next instruction */
9171077d0bdSPeter Avalos 		last = next;
9181077d0bdSPeter Avalos 
9191077d0bdSPeter Avalos 		/*
9201077d0bdSPeter Avalos 		 * st  M[k]	-->	st  M[k]
9211077d0bdSPeter Avalos 		 * ldx M[k]		tax
9221077d0bdSPeter Avalos 		 */
9231077d0bdSPeter Avalos 		if (s->s.code == BPF_ST &&
9241077d0bdSPeter Avalos 		    next->s.code == (BPF_LDX|BPF_MEM) &&
9251077d0bdSPeter Avalos 		    s->s.k == next->s.k) {
926*ea16f64eSAntonio Huete Jimenez 			/*
927*ea16f64eSAntonio Huete Jimenez 			 * XXX - optimizer loop detection.
928*ea16f64eSAntonio Huete Jimenez 			 */
929*ea16f64eSAntonio Huete Jimenez 			opt_state->non_branch_movement_performed = 1;
93097a9217aSAntonio Huete Jimenez 			opt_state->done = 0;
9311077d0bdSPeter Avalos 			next->s.code = BPF_MISC|BPF_TAX;
9321077d0bdSPeter Avalos 		}
9331077d0bdSPeter Avalos 		/*
9341077d0bdSPeter Avalos 		 * ld  #k	-->	ldx  #k
9351077d0bdSPeter Avalos 		 * tax			txa
9361077d0bdSPeter Avalos 		 */
9371077d0bdSPeter Avalos 		if (s->s.code == (BPF_LD|BPF_IMM) &&
9381077d0bdSPeter Avalos 		    next->s.code == (BPF_MISC|BPF_TAX)) {
9391077d0bdSPeter Avalos 			s->s.code = BPF_LDX|BPF_IMM;
9401077d0bdSPeter Avalos 			next->s.code = BPF_MISC|BPF_TXA;
941*ea16f64eSAntonio Huete Jimenez 			/*
942*ea16f64eSAntonio Huete Jimenez 			 * XXX - optimizer loop detection.
943*ea16f64eSAntonio Huete Jimenez 			 */
944*ea16f64eSAntonio Huete Jimenez 			opt_state->non_branch_movement_performed = 1;
94597a9217aSAntonio Huete Jimenez 			opt_state->done = 0;
9461077d0bdSPeter Avalos 		}
9471077d0bdSPeter Avalos 		/*
9481077d0bdSPeter Avalos 		 * This is an ugly special case, but it happens
9491077d0bdSPeter Avalos 		 * when you say tcp[k] or udp[k] where k is a constant.
9501077d0bdSPeter Avalos 		 */
9511077d0bdSPeter Avalos 		if (s->s.code == (BPF_LD|BPF_IMM)) {
9521077d0bdSPeter Avalos 			struct slist *add, *tax, *ild;
9531077d0bdSPeter Avalos 
9541077d0bdSPeter Avalos 			/*
9551077d0bdSPeter Avalos 			 * Check that X isn't used on exit from this
9561077d0bdSPeter Avalos 			 * block (which the optimizer might cause).
9571077d0bdSPeter Avalos 			 * We know the code generator won't generate
9581077d0bdSPeter Avalos 			 * any local dependencies.
9591077d0bdSPeter Avalos 			 */
9601077d0bdSPeter Avalos 			if (ATOMELEM(b->out_use, X_ATOM))
9611077d0bdSPeter Avalos 				continue;
9621077d0bdSPeter Avalos 
9631077d0bdSPeter Avalos 			/*
9641077d0bdSPeter Avalos 			 * Check that the instruction following the ldi
9651077d0bdSPeter Avalos 			 * is an addx, or it's an ldxms with an addx
9661077d0bdSPeter Avalos 			 * following it (with 0 or more nops between the
9671077d0bdSPeter Avalos 			 * ldxms and addx).
9681077d0bdSPeter Avalos 			 */
9691077d0bdSPeter Avalos 			if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
9701077d0bdSPeter Avalos 				add = next;
9711077d0bdSPeter Avalos 			else
9721077d0bdSPeter Avalos 				add = this_op(next->next);
9731077d0bdSPeter Avalos 			if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
9741077d0bdSPeter Avalos 				continue;
9751077d0bdSPeter Avalos 
9761077d0bdSPeter Avalos 			/*
9771077d0bdSPeter Avalos 			 * Check that a tax follows that (with 0 or more
9781077d0bdSPeter Avalos 			 * nops between them).
9791077d0bdSPeter Avalos 			 */
9801077d0bdSPeter Avalos 			tax = this_op(add->next);
9811077d0bdSPeter Avalos 			if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
9821077d0bdSPeter Avalos 				continue;
9831077d0bdSPeter Avalos 
9841077d0bdSPeter Avalos 			/*
9851077d0bdSPeter Avalos 			 * Check that an ild follows that (with 0 or more
9861077d0bdSPeter Avalos 			 * nops between them).
9871077d0bdSPeter Avalos 			 */
9881077d0bdSPeter Avalos 			ild = this_op(tax->next);
9891077d0bdSPeter Avalos 			if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
9901077d0bdSPeter Avalos 			    BPF_MODE(ild->s.code) != BPF_IND)
9911077d0bdSPeter Avalos 				continue;
9921077d0bdSPeter Avalos 			/*
9931077d0bdSPeter Avalos 			 * We want to turn this sequence:
9941077d0bdSPeter Avalos 			 *
9951077d0bdSPeter Avalos 			 * (004) ldi     #0x2		{s}
9961077d0bdSPeter Avalos 			 * (005) ldxms   [14]		{next}  -- optional
9971077d0bdSPeter Avalos 			 * (006) addx			{add}
9981077d0bdSPeter Avalos 			 * (007) tax			{tax}
9991077d0bdSPeter Avalos 			 * (008) ild     [x+0]		{ild}
10001077d0bdSPeter Avalos 			 *
10011077d0bdSPeter Avalos 			 * into this sequence:
10021077d0bdSPeter Avalos 			 *
10031077d0bdSPeter Avalos 			 * (004) nop
10041077d0bdSPeter Avalos 			 * (005) ldxms   [14]
10051077d0bdSPeter Avalos 			 * (006) nop
10061077d0bdSPeter Avalos 			 * (007) nop
10071077d0bdSPeter Avalos 			 * (008) ild     [x+2]
10081077d0bdSPeter Avalos 			 *
10091077d0bdSPeter Avalos 			 * XXX We need to check that X is not
10101077d0bdSPeter Avalos 			 * subsequently used, because we want to change
10111077d0bdSPeter Avalos 			 * what'll be in it after this sequence.
10121077d0bdSPeter Avalos 			 *
10131077d0bdSPeter Avalos 			 * We know we can eliminate the accumulator
10141077d0bdSPeter Avalos 			 * modifications earlier in the sequence since
10151077d0bdSPeter Avalos 			 * it is defined by the last stmt of this sequence
10161077d0bdSPeter Avalos 			 * (i.e., the last statement of the sequence loads
10171077d0bdSPeter Avalos 			 * a value into the accumulator, so we can eliminate
10181077d0bdSPeter Avalos 			 * earlier operations on the accumulator).
10191077d0bdSPeter Avalos 			 */
10201077d0bdSPeter Avalos 			ild->s.k += s->s.k;
10211077d0bdSPeter Avalos 			s->s.code = NOP;
10221077d0bdSPeter Avalos 			add->s.code = NOP;
10231077d0bdSPeter Avalos 			tax->s.code = NOP;
1024*ea16f64eSAntonio Huete Jimenez 			/*
1025*ea16f64eSAntonio Huete Jimenez 			 * XXX - optimizer loop detection.
1026*ea16f64eSAntonio Huete Jimenez 			 */
1027*ea16f64eSAntonio Huete Jimenez 			opt_state->non_branch_movement_performed = 1;
102897a9217aSAntonio Huete Jimenez 			opt_state->done = 0;
10291077d0bdSPeter Avalos 		}
10301077d0bdSPeter Avalos 	}
10311077d0bdSPeter Avalos 	/*
10321077d0bdSPeter Avalos 	 * If the comparison at the end of a block is an equality
10331077d0bdSPeter Avalos 	 * comparison against a constant, and nobody uses the value
10341077d0bdSPeter Avalos 	 * we leave in the A register at the end of a block, and
10351077d0bdSPeter Avalos 	 * the operation preceding the comparison is an arithmetic
10361077d0bdSPeter Avalos 	 * operation, we can sometime optimize it away.
10371077d0bdSPeter Avalos 	 */
10381077d0bdSPeter Avalos 	if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) &&
10391077d0bdSPeter Avalos 	    !ATOMELEM(b->out_use, A_ATOM)) {
10401077d0bdSPeter Avalos 		/*
10411077d0bdSPeter Avalos 		 * We can optimize away certain subtractions of the
10421077d0bdSPeter Avalos 		 * X register.
10431077d0bdSPeter Avalos 		 */
10441077d0bdSPeter Avalos 		if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) {
10451077d0bdSPeter Avalos 			val = b->val[X_ATOM];
104697a9217aSAntonio Huete Jimenez 			if (opt_state->vmap[val].is_const) {
10471077d0bdSPeter Avalos 				/*
10481077d0bdSPeter Avalos 				 * If we have a subtract to do a comparison,
10491077d0bdSPeter Avalos 				 * and the X register is a known constant,
10501077d0bdSPeter Avalos 				 * we can merge this value into the
10511077d0bdSPeter Avalos 				 * comparison:
10521077d0bdSPeter Avalos 				 *
10531077d0bdSPeter Avalos 				 * sub x  ->	nop
10541077d0bdSPeter Avalos 				 * jeq #y	jeq #(x+y)
10551077d0bdSPeter Avalos 				 */
105697a9217aSAntonio Huete Jimenez 				b->s.k += opt_state->vmap[val].const_val;
10571077d0bdSPeter Avalos 				last->s.code = NOP;
1058*ea16f64eSAntonio Huete Jimenez 				/*
1059*ea16f64eSAntonio Huete Jimenez 				 * XXX - optimizer loop detection.
1060*ea16f64eSAntonio Huete Jimenez 				 */
1061*ea16f64eSAntonio Huete Jimenez 				opt_state->non_branch_movement_performed = 1;
106297a9217aSAntonio Huete Jimenez 				opt_state->done = 0;
10631077d0bdSPeter Avalos 			} else if (b->s.k == 0) {
10641077d0bdSPeter Avalos 				/*
10651077d0bdSPeter Avalos 				 * If the X register isn't a constant,
10661077d0bdSPeter Avalos 				 * and the comparison in the test is
10671077d0bdSPeter Avalos 				 * against 0, we can compare with the
10681077d0bdSPeter Avalos 				 * X register, instead:
10691077d0bdSPeter Avalos 				 *
10701077d0bdSPeter Avalos 				 * sub x  ->	nop
10711077d0bdSPeter Avalos 				 * jeq #0	jeq x
10721077d0bdSPeter Avalos 				 */
10731077d0bdSPeter Avalos 				last->s.code = NOP;
10741077d0bdSPeter Avalos 				b->s.code = BPF_JMP|BPF_JEQ|BPF_X;
1075*ea16f64eSAntonio Huete Jimenez 				/*
1076*ea16f64eSAntonio Huete Jimenez 				 * XXX - optimizer loop detection.
1077*ea16f64eSAntonio Huete Jimenez 				 */
1078*ea16f64eSAntonio Huete Jimenez 				opt_state->non_branch_movement_performed = 1;
107997a9217aSAntonio Huete Jimenez 				opt_state->done = 0;
10801077d0bdSPeter Avalos 			}
10811077d0bdSPeter Avalos 		}
10821077d0bdSPeter Avalos 		/*
10831077d0bdSPeter Avalos 		 * Likewise, a constant subtract can be simplified:
10841077d0bdSPeter Avalos 		 *
10851077d0bdSPeter Avalos 		 * sub #x ->	nop
10861077d0bdSPeter Avalos 		 * jeq #y ->	jeq #(x+y)
10871077d0bdSPeter Avalos 		 */
10881077d0bdSPeter Avalos 		else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) {
10891077d0bdSPeter Avalos 			last->s.code = NOP;
10901077d0bdSPeter Avalos 			b->s.k += last->s.k;
1091*ea16f64eSAntonio Huete Jimenez 			/*
1092*ea16f64eSAntonio Huete Jimenez 			 * XXX - optimizer loop detection.
1093*ea16f64eSAntonio Huete Jimenez 			 */
1094*ea16f64eSAntonio Huete Jimenez 			opt_state->non_branch_movement_performed = 1;
109597a9217aSAntonio Huete Jimenez 			opt_state->done = 0;
10961077d0bdSPeter Avalos 		}
10971077d0bdSPeter Avalos 		/*
10981077d0bdSPeter Avalos 		 * And, similarly, a constant AND can be simplified
10991077d0bdSPeter Avalos 		 * if we're testing against 0, i.e.:
11001077d0bdSPeter Avalos 		 *
11011077d0bdSPeter Avalos 		 * and #k	nop
11021077d0bdSPeter Avalos 		 * jeq #0  ->	jset #k
11031077d0bdSPeter Avalos 		 */
11041077d0bdSPeter Avalos 		else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
11051077d0bdSPeter Avalos 		    b->s.k == 0) {
11061077d0bdSPeter Avalos 			b->s.k = last->s.k;
11071077d0bdSPeter Avalos 			b->s.code = BPF_JMP|BPF_K|BPF_JSET;
11081077d0bdSPeter Avalos 			last->s.code = NOP;
1109*ea16f64eSAntonio Huete Jimenez 			/*
1110*ea16f64eSAntonio Huete Jimenez 			 * XXX - optimizer loop detection.
1111*ea16f64eSAntonio Huete Jimenez 			 */
1112*ea16f64eSAntonio Huete Jimenez 			opt_state->non_branch_movement_performed = 1;
111397a9217aSAntonio Huete Jimenez 			opt_state->done = 0;
11141077d0bdSPeter Avalos 			opt_not(b);
11151077d0bdSPeter Avalos 		}
11161077d0bdSPeter Avalos 	}
11171077d0bdSPeter Avalos 	/*
11181077d0bdSPeter Avalos 	 * jset #0        ->   never
11191077d0bdSPeter Avalos 	 * jset #ffffffff ->   always
11201077d0bdSPeter Avalos 	 */
11211077d0bdSPeter Avalos 	if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) {
11221077d0bdSPeter Avalos 		if (b->s.k == 0)
11231077d0bdSPeter Avalos 			JT(b) = JF(b);
1124*ea16f64eSAntonio Huete Jimenez 		if (b->s.k == 0xffffffffU)
11251077d0bdSPeter Avalos 			JF(b) = JT(b);
11261077d0bdSPeter Avalos 	}
11271077d0bdSPeter Avalos 	/*
1128de0d3203SPeter Avalos 	 * If we're comparing against the index register, and the index
1129de0d3203SPeter Avalos 	 * register is a known constant, we can just compare against that
1130de0d3203SPeter Avalos 	 * constant.
1131de0d3203SPeter Avalos 	 */
1132de0d3203SPeter Avalos 	val = b->val[X_ATOM];
113397a9217aSAntonio Huete Jimenez 	if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) {
1134*ea16f64eSAntonio Huete Jimenez 		bpf_u_int32 v = opt_state->vmap[val].const_val;
1135de0d3203SPeter Avalos 		b->s.code &= ~BPF_X;
1136de0d3203SPeter Avalos 		b->s.k = v;
1137de0d3203SPeter Avalos 	}
1138de0d3203SPeter Avalos 	/*
11391077d0bdSPeter Avalos 	 * If the accumulator is a known constant, we can compute the
11401077d0bdSPeter Avalos 	 * comparison result.
11411077d0bdSPeter Avalos 	 */
11421077d0bdSPeter Avalos 	val = b->val[A_ATOM];
114397a9217aSAntonio Huete Jimenez 	if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
1144*ea16f64eSAntonio Huete Jimenez 		bpf_u_int32 v = opt_state->vmap[val].const_val;
11451077d0bdSPeter Avalos 		switch (BPF_OP(b->s.code)) {
11461077d0bdSPeter Avalos 
11471077d0bdSPeter Avalos 		case BPF_JEQ:
11481077d0bdSPeter Avalos 			v = v == b->s.k;
11491077d0bdSPeter Avalos 			break;
11501077d0bdSPeter Avalos 
11511077d0bdSPeter Avalos 		case BPF_JGT:
1152*ea16f64eSAntonio Huete Jimenez 			v = v > b->s.k;
11531077d0bdSPeter Avalos 			break;
11541077d0bdSPeter Avalos 
11551077d0bdSPeter Avalos 		case BPF_JGE:
1156*ea16f64eSAntonio Huete Jimenez 			v = v >= b->s.k;
11571077d0bdSPeter Avalos 			break;
11581077d0bdSPeter Avalos 
11591077d0bdSPeter Avalos 		case BPF_JSET:
11601077d0bdSPeter Avalos 			v &= b->s.k;
11611077d0bdSPeter Avalos 			break;
11621077d0bdSPeter Avalos 
11631077d0bdSPeter Avalos 		default:
11641077d0bdSPeter Avalos 			abort();
11651077d0bdSPeter Avalos 		}
1166*ea16f64eSAntonio Huete Jimenez 		if (JF(b) != JT(b)) {
1167*ea16f64eSAntonio Huete Jimenez 			/*
1168*ea16f64eSAntonio Huete Jimenez 			 * XXX - optimizer loop detection.
1169*ea16f64eSAntonio Huete Jimenez 			 */
1170*ea16f64eSAntonio Huete Jimenez 			opt_state->non_branch_movement_performed = 1;
117197a9217aSAntonio Huete Jimenez 			opt_state->done = 0;
1172*ea16f64eSAntonio Huete Jimenez 		}
11731077d0bdSPeter Avalos 		if (v)
11741077d0bdSPeter Avalos 			JF(b) = JT(b);
11751077d0bdSPeter Avalos 		else
11761077d0bdSPeter Avalos 			JT(b) = JF(b);
11771077d0bdSPeter Avalos 	}
11781077d0bdSPeter Avalos }
11791077d0bdSPeter Avalos 
11801077d0bdSPeter Avalos /*
11811077d0bdSPeter Avalos  * Compute the symbolic value of expression of 's', and update
11821077d0bdSPeter Avalos  * anything it defines in the value table 'val'.  If 'alter' is true,
11831077d0bdSPeter Avalos  * do various optimizations.  This code would be cleaner if symbolic
11841077d0bdSPeter Avalos  * evaluation and code transformations weren't folded together.
11851077d0bdSPeter Avalos  */
11861077d0bdSPeter Avalos static void
opt_stmt(opt_state_t * opt_state,struct stmt * s,bpf_u_int32 val[],int alter)1187*ea16f64eSAntonio Huete Jimenez opt_stmt(opt_state_t *opt_state, struct stmt *s, bpf_u_int32 val[], int alter)
11881077d0bdSPeter Avalos {
11891077d0bdSPeter Avalos 	int op;
1190*ea16f64eSAntonio Huete Jimenez 	bpf_u_int32 v;
11911077d0bdSPeter Avalos 
11921077d0bdSPeter Avalos 	switch (s->code) {
11931077d0bdSPeter Avalos 
11941077d0bdSPeter Avalos 	case BPF_LD|BPF_ABS|BPF_W:
11951077d0bdSPeter Avalos 	case BPF_LD|BPF_ABS|BPF_H:
11961077d0bdSPeter Avalos 	case BPF_LD|BPF_ABS|BPF_B:
119797a9217aSAntonio Huete Jimenez 		v = F(opt_state, s->code, s->k, 0L);
11981077d0bdSPeter Avalos 		vstore(s, &val[A_ATOM], v, alter);
11991077d0bdSPeter Avalos 		break;
12001077d0bdSPeter Avalos 
12011077d0bdSPeter Avalos 	case BPF_LD|BPF_IND|BPF_W:
12021077d0bdSPeter Avalos 	case BPF_LD|BPF_IND|BPF_H:
12031077d0bdSPeter Avalos 	case BPF_LD|BPF_IND|BPF_B:
12041077d0bdSPeter Avalos 		v = val[X_ATOM];
120597a9217aSAntonio Huete Jimenez 		if (alter && opt_state->vmap[v].is_const) {
12061077d0bdSPeter Avalos 			s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
120797a9217aSAntonio Huete Jimenez 			s->k += opt_state->vmap[v].const_val;
120897a9217aSAntonio Huete Jimenez 			v = F(opt_state, s->code, s->k, 0L);
1209*ea16f64eSAntonio Huete Jimenez 			/*
1210*ea16f64eSAntonio Huete Jimenez 			 * XXX - optimizer loop detection.
1211*ea16f64eSAntonio Huete Jimenez 			 */
1212*ea16f64eSAntonio Huete Jimenez 			opt_state->non_branch_movement_performed = 1;
121397a9217aSAntonio Huete Jimenez 			opt_state->done = 0;
12141077d0bdSPeter Avalos 		}
12151077d0bdSPeter Avalos 		else
121697a9217aSAntonio Huete Jimenez 			v = F(opt_state, s->code, s->k, v);
12171077d0bdSPeter Avalos 		vstore(s, &val[A_ATOM], v, alter);
12181077d0bdSPeter Avalos 		break;
12191077d0bdSPeter Avalos 
12201077d0bdSPeter Avalos 	case BPF_LD|BPF_LEN:
122197a9217aSAntonio Huete Jimenez 		v = F(opt_state, s->code, 0L, 0L);
12221077d0bdSPeter Avalos 		vstore(s, &val[A_ATOM], v, alter);
12231077d0bdSPeter Avalos 		break;
12241077d0bdSPeter Avalos 
12251077d0bdSPeter Avalos 	case BPF_LD|BPF_IMM:
12261077d0bdSPeter Avalos 		v = K(s->k);
12271077d0bdSPeter Avalos 		vstore(s, &val[A_ATOM], v, alter);
12281077d0bdSPeter Avalos 		break;
12291077d0bdSPeter Avalos 
12301077d0bdSPeter Avalos 	case BPF_LDX|BPF_IMM:
12311077d0bdSPeter Avalos 		v = K(s->k);
12321077d0bdSPeter Avalos 		vstore(s, &val[X_ATOM], v, alter);
12331077d0bdSPeter Avalos 		break;
12341077d0bdSPeter Avalos 
12351077d0bdSPeter Avalos 	case BPF_LDX|BPF_MSH|BPF_B:
123697a9217aSAntonio Huete Jimenez 		v = F(opt_state, s->code, s->k, 0L);
12371077d0bdSPeter Avalos 		vstore(s, &val[X_ATOM], v, alter);
12381077d0bdSPeter Avalos 		break;
12391077d0bdSPeter Avalos 
12401077d0bdSPeter Avalos 	case BPF_ALU|BPF_NEG:
124197a9217aSAntonio Huete Jimenez 		if (alter && opt_state->vmap[val[A_ATOM]].is_const) {
12421077d0bdSPeter Avalos 			s->code = BPF_LD|BPF_IMM;
12433a289941SAaron LI 			/*
12443a289941SAaron LI 			 * Do this negation as unsigned arithmetic; that's
12453a289941SAaron LI 			 * what modern BPF engines do, and it guarantees
12463a289941SAaron LI 			 * that all possible values can be negated.  (Yeah,
12473a289941SAaron LI 			 * negating 0x80000000, the minimum signed 32-bit
12483a289941SAaron LI 			 * two's-complement value, results in 0x80000000,
12493a289941SAaron LI 			 * so it's still negative, but we *should* be doing
12503a289941SAaron LI 			 * all unsigned arithmetic here, to match what
12513a289941SAaron LI 			 * modern BPF engines do.)
12523a289941SAaron LI 			 *
12533a289941SAaron LI 			 * Express it as 0U - (unsigned value) so that we
12543a289941SAaron LI 			 * don't get compiler warnings about negating an
12553a289941SAaron LI 			 * unsigned value and don't get UBSan warnings
12563a289941SAaron LI 			 * about the result of negating 0x80000000 being
12573a289941SAaron LI 			 * undefined.
12583a289941SAaron LI 			 */
1259*ea16f64eSAntonio Huete Jimenez 			s->k = 0U - opt_state->vmap[val[A_ATOM]].const_val;
12601077d0bdSPeter Avalos 			val[A_ATOM] = K(s->k);
12611077d0bdSPeter Avalos 		}
12621077d0bdSPeter Avalos 		else
126397a9217aSAntonio Huete Jimenez 			val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], 0L);
12641077d0bdSPeter Avalos 		break;
12651077d0bdSPeter Avalos 
12661077d0bdSPeter Avalos 	case BPF_ALU|BPF_ADD|BPF_K:
12671077d0bdSPeter Avalos 	case BPF_ALU|BPF_SUB|BPF_K:
12681077d0bdSPeter Avalos 	case BPF_ALU|BPF_MUL|BPF_K:
12691077d0bdSPeter Avalos 	case BPF_ALU|BPF_DIV|BPF_K:
127097a9217aSAntonio Huete Jimenez 	case BPF_ALU|BPF_MOD|BPF_K:
12711077d0bdSPeter Avalos 	case BPF_ALU|BPF_AND|BPF_K:
12721077d0bdSPeter Avalos 	case BPF_ALU|BPF_OR|BPF_K:
127397a9217aSAntonio Huete Jimenez 	case BPF_ALU|BPF_XOR|BPF_K:
12741077d0bdSPeter Avalos 	case BPF_ALU|BPF_LSH|BPF_K:
12751077d0bdSPeter Avalos 	case BPF_ALU|BPF_RSH|BPF_K:
12761077d0bdSPeter Avalos 		op = BPF_OP(s->code);
12771077d0bdSPeter Avalos 		if (alter) {
12781077d0bdSPeter Avalos 			if (s->k == 0) {
12793a289941SAaron LI 				/*
12803a289941SAaron LI 				 * Optimize operations where the constant
12813a289941SAaron LI 				 * is zero.
12823a289941SAaron LI 				 *
12833a289941SAaron LI 				 * Don't optimize away "sub #0"
12841077d0bdSPeter Avalos 				 * as it may be needed later to
12853a289941SAaron LI 				 * fixup the generated math code.
12863a289941SAaron LI 				 *
12873a289941SAaron LI 				 * Fail if we're dividing by zero or taking
12883a289941SAaron LI 				 * a modulus by zero.
12893a289941SAaron LI 				 */
12901077d0bdSPeter Avalos 				if (op == BPF_ADD ||
12911077d0bdSPeter Avalos 				    op == BPF_LSH || op == BPF_RSH ||
129297a9217aSAntonio Huete Jimenez 				    op == BPF_OR || op == BPF_XOR) {
12931077d0bdSPeter Avalos 					s->code = NOP;
12941077d0bdSPeter Avalos 					break;
12951077d0bdSPeter Avalos 				}
12961077d0bdSPeter Avalos 				if (op == BPF_MUL || op == BPF_AND) {
12971077d0bdSPeter Avalos 					s->code = BPF_LD|BPF_IMM;
12981077d0bdSPeter Avalos 					val[A_ATOM] = K(s->k);
12991077d0bdSPeter Avalos 					break;
13001077d0bdSPeter Avalos 				}
13013a289941SAaron LI 				if (op == BPF_DIV)
13023a289941SAaron LI 					opt_error(opt_state,
13033a289941SAaron LI 					    "division by zero");
13043a289941SAaron LI 				if (op == BPF_MOD)
13053a289941SAaron LI 					opt_error(opt_state,
13063a289941SAaron LI 					    "modulus by zero");
13071077d0bdSPeter Avalos 			}
130897a9217aSAntonio Huete Jimenez 			if (opt_state->vmap[val[A_ATOM]].is_const) {
13093a289941SAaron LI 				fold_op(opt_state, s, val[A_ATOM], K(s->k));
13101077d0bdSPeter Avalos 				val[A_ATOM] = K(s->k);
13111077d0bdSPeter Avalos 				break;
13121077d0bdSPeter Avalos 			}
13131077d0bdSPeter Avalos 		}
131497a9217aSAntonio Huete Jimenez 		val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], K(s->k));
13151077d0bdSPeter Avalos 		break;
13161077d0bdSPeter Avalos 
13171077d0bdSPeter Avalos 	case BPF_ALU|BPF_ADD|BPF_X:
13181077d0bdSPeter Avalos 	case BPF_ALU|BPF_SUB|BPF_X:
13191077d0bdSPeter Avalos 	case BPF_ALU|BPF_MUL|BPF_X:
13201077d0bdSPeter Avalos 	case BPF_ALU|BPF_DIV|BPF_X:
132197a9217aSAntonio Huete Jimenez 	case BPF_ALU|BPF_MOD|BPF_X:
13221077d0bdSPeter Avalos 	case BPF_ALU|BPF_AND|BPF_X:
13231077d0bdSPeter Avalos 	case BPF_ALU|BPF_OR|BPF_X:
132497a9217aSAntonio Huete Jimenez 	case BPF_ALU|BPF_XOR|BPF_X:
13251077d0bdSPeter Avalos 	case BPF_ALU|BPF_LSH|BPF_X:
13261077d0bdSPeter Avalos 	case BPF_ALU|BPF_RSH|BPF_X:
13271077d0bdSPeter Avalos 		op = BPF_OP(s->code);
132897a9217aSAntonio Huete Jimenez 		if (alter && opt_state->vmap[val[X_ATOM]].is_const) {
132997a9217aSAntonio Huete Jimenez 			if (opt_state->vmap[val[A_ATOM]].is_const) {
13303a289941SAaron LI 				fold_op(opt_state, s, val[A_ATOM], val[X_ATOM]);
13311077d0bdSPeter Avalos 				val[A_ATOM] = K(s->k);
13321077d0bdSPeter Avalos 			}
13331077d0bdSPeter Avalos 			else {
13341077d0bdSPeter Avalos 				s->code = BPF_ALU|BPF_K|op;
133597a9217aSAntonio Huete Jimenez 				s->k = opt_state->vmap[val[X_ATOM]].const_val;
13363a289941SAaron LI 				if ((op == BPF_LSH || op == BPF_RSH) &&
1337*ea16f64eSAntonio Huete Jimenez 				    s->k > 31)
13383a289941SAaron LI 					opt_error(opt_state,
13393a289941SAaron LI 					    "shift by more than 31 bits");
1340*ea16f64eSAntonio Huete Jimenez 				/*
1341*ea16f64eSAntonio Huete Jimenez 				 * XXX - optimizer loop detection.
1342*ea16f64eSAntonio Huete Jimenez 				 */
1343*ea16f64eSAntonio Huete Jimenez 				opt_state->non_branch_movement_performed = 1;
134497a9217aSAntonio Huete Jimenez 				opt_state->done = 0;
13451077d0bdSPeter Avalos 				val[A_ATOM] =
134697a9217aSAntonio Huete Jimenez 					F(opt_state, s->code, val[A_ATOM], K(s->k));
13471077d0bdSPeter Avalos 			}
13481077d0bdSPeter Avalos 			break;
13491077d0bdSPeter Avalos 		}
13501077d0bdSPeter Avalos 		/*
13511077d0bdSPeter Avalos 		 * Check if we're doing something to an accumulator
13521077d0bdSPeter Avalos 		 * that is 0, and simplify.  This may not seem like
13531077d0bdSPeter Avalos 		 * much of a simplification but it could open up further
13541077d0bdSPeter Avalos 		 * optimizations.
13551077d0bdSPeter Avalos 		 * XXX We could also check for mul by 1, etc.
13561077d0bdSPeter Avalos 		 */
135797a9217aSAntonio Huete Jimenez 		if (alter && opt_state->vmap[val[A_ATOM]].is_const
135897a9217aSAntonio Huete Jimenez 		    && opt_state->vmap[val[A_ATOM]].const_val == 0) {
135997a9217aSAntonio Huete Jimenez 			if (op == BPF_ADD || op == BPF_OR || op == BPF_XOR) {
13601077d0bdSPeter Avalos 				s->code = BPF_MISC|BPF_TXA;
13611077d0bdSPeter Avalos 				vstore(s, &val[A_ATOM], val[X_ATOM], alter);
13621077d0bdSPeter Avalos 				break;
13631077d0bdSPeter Avalos 			}
136497a9217aSAntonio Huete Jimenez 			else if (op == BPF_MUL || op == BPF_DIV || op == BPF_MOD ||
13651077d0bdSPeter Avalos 				 op == BPF_AND || op == BPF_LSH || op == BPF_RSH) {
13661077d0bdSPeter Avalos 				s->code = BPF_LD|BPF_IMM;
13671077d0bdSPeter Avalos 				s->k = 0;
13681077d0bdSPeter Avalos 				vstore(s, &val[A_ATOM], K(s->k), alter);
13691077d0bdSPeter Avalos 				break;
13701077d0bdSPeter Avalos 			}
13711077d0bdSPeter Avalos 			else if (op == BPF_NEG) {
13721077d0bdSPeter Avalos 				s->code = NOP;
13731077d0bdSPeter Avalos 				break;
13741077d0bdSPeter Avalos 			}
13751077d0bdSPeter Avalos 		}
137697a9217aSAntonio Huete Jimenez 		val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], val[X_ATOM]);
13771077d0bdSPeter Avalos 		break;
13781077d0bdSPeter Avalos 
13791077d0bdSPeter Avalos 	case BPF_MISC|BPF_TXA:
13801077d0bdSPeter Avalos 		vstore(s, &val[A_ATOM], val[X_ATOM], alter);
13811077d0bdSPeter Avalos 		break;
13821077d0bdSPeter Avalos 
13831077d0bdSPeter Avalos 	case BPF_LD|BPF_MEM:
13841077d0bdSPeter Avalos 		v = val[s->k];
138597a9217aSAntonio Huete Jimenez 		if (alter && opt_state->vmap[v].is_const) {
13861077d0bdSPeter Avalos 			s->code = BPF_LD|BPF_IMM;
138797a9217aSAntonio Huete Jimenez 			s->k = opt_state->vmap[v].const_val;
1388*ea16f64eSAntonio Huete Jimenez 			/*
1389*ea16f64eSAntonio Huete Jimenez 			 * XXX - optimizer loop detection.
1390*ea16f64eSAntonio Huete Jimenez 			 */
1391*ea16f64eSAntonio Huete Jimenez 			opt_state->non_branch_movement_performed = 1;
139297a9217aSAntonio Huete Jimenez 			opt_state->done = 0;
13931077d0bdSPeter Avalos 		}
13941077d0bdSPeter Avalos 		vstore(s, &val[A_ATOM], v, alter);
13951077d0bdSPeter Avalos 		break;
13961077d0bdSPeter Avalos 
13971077d0bdSPeter Avalos 	case BPF_MISC|BPF_TAX:
13981077d0bdSPeter Avalos 		vstore(s, &val[X_ATOM], val[A_ATOM], alter);
13991077d0bdSPeter Avalos 		break;
14001077d0bdSPeter Avalos 
14011077d0bdSPeter Avalos 	case BPF_LDX|BPF_MEM:
14021077d0bdSPeter Avalos 		v = val[s->k];
140397a9217aSAntonio Huete Jimenez 		if (alter && opt_state->vmap[v].is_const) {
14041077d0bdSPeter Avalos 			s->code = BPF_LDX|BPF_IMM;
140597a9217aSAntonio Huete Jimenez 			s->k = opt_state->vmap[v].const_val;
1406*ea16f64eSAntonio Huete Jimenez 			/*
1407*ea16f64eSAntonio Huete Jimenez 			 * XXX - optimizer loop detection.
1408*ea16f64eSAntonio Huete Jimenez 			 */
1409*ea16f64eSAntonio Huete Jimenez 			opt_state->non_branch_movement_performed = 1;
141097a9217aSAntonio Huete Jimenez 			opt_state->done = 0;
14111077d0bdSPeter Avalos 		}
14121077d0bdSPeter Avalos 		vstore(s, &val[X_ATOM], v, alter);
14131077d0bdSPeter Avalos 		break;
14141077d0bdSPeter Avalos 
14151077d0bdSPeter Avalos 	case BPF_ST:
14161077d0bdSPeter Avalos 		vstore(s, &val[s->k], val[A_ATOM], alter);
14171077d0bdSPeter Avalos 		break;
14181077d0bdSPeter Avalos 
14191077d0bdSPeter Avalos 	case BPF_STX:
14201077d0bdSPeter Avalos 		vstore(s, &val[s->k], val[X_ATOM], alter);
14211077d0bdSPeter Avalos 		break;
14221077d0bdSPeter Avalos 	}
14231077d0bdSPeter Avalos }
14241077d0bdSPeter Avalos 
14251077d0bdSPeter Avalos static void
deadstmt(opt_state_t * opt_state,register struct stmt * s,register struct stmt * last[])142697a9217aSAntonio Huete Jimenez deadstmt(opt_state_t *opt_state, register struct stmt *s, register struct stmt *last[])
14271077d0bdSPeter Avalos {
14281077d0bdSPeter Avalos 	register int atom;
14291077d0bdSPeter Avalos 
14301077d0bdSPeter Avalos 	atom = atomuse(s);
14311077d0bdSPeter Avalos 	if (atom >= 0) {
14321077d0bdSPeter Avalos 		if (atom == AX_ATOM) {
14331077d0bdSPeter Avalos 			last[X_ATOM] = 0;
14341077d0bdSPeter Avalos 			last[A_ATOM] = 0;
14351077d0bdSPeter Avalos 		}
14361077d0bdSPeter Avalos 		else
14371077d0bdSPeter Avalos 			last[atom] = 0;
14381077d0bdSPeter Avalos 	}
14391077d0bdSPeter Avalos 	atom = atomdef(s);
14401077d0bdSPeter Avalos 	if (atom >= 0) {
14411077d0bdSPeter Avalos 		if (last[atom]) {
1442*ea16f64eSAntonio Huete Jimenez 			/*
1443*ea16f64eSAntonio Huete Jimenez 			 * XXX - optimizer loop detection.
1444*ea16f64eSAntonio Huete Jimenez 			 */
1445*ea16f64eSAntonio Huete Jimenez 			opt_state->non_branch_movement_performed = 1;
144697a9217aSAntonio Huete Jimenez 			opt_state->done = 0;
14471077d0bdSPeter Avalos 			last[atom]->code = NOP;
14481077d0bdSPeter Avalos 		}
14491077d0bdSPeter Avalos 		last[atom] = s;
14501077d0bdSPeter Avalos 	}
14511077d0bdSPeter Avalos }
14521077d0bdSPeter Avalos 
14531077d0bdSPeter Avalos static void
opt_deadstores(opt_state_t * opt_state,register struct block * b)145497a9217aSAntonio Huete Jimenez opt_deadstores(opt_state_t *opt_state, register struct block *b)
14551077d0bdSPeter Avalos {
14561077d0bdSPeter Avalos 	register struct slist *s;
14571077d0bdSPeter Avalos 	register int atom;
14581077d0bdSPeter Avalos 	struct stmt *last[N_ATOMS];
14591077d0bdSPeter Avalos 
14601077d0bdSPeter Avalos 	memset((char *)last, 0, sizeof last);
14611077d0bdSPeter Avalos 
14621077d0bdSPeter Avalos 	for (s = b->stmts; s != 0; s = s->next)
146397a9217aSAntonio Huete Jimenez 		deadstmt(opt_state, &s->s, last);
146497a9217aSAntonio Huete Jimenez 	deadstmt(opt_state, &b->s, last);
14651077d0bdSPeter Avalos 
14661077d0bdSPeter Avalos 	for (atom = 0; atom < N_ATOMS; ++atom)
14671077d0bdSPeter Avalos 		if (last[atom] && !ATOMELEM(b->out_use, atom)) {
14681077d0bdSPeter Avalos 			last[atom]->code = NOP;
1469*ea16f64eSAntonio Huete Jimenez 			/*
1470*ea16f64eSAntonio Huete Jimenez 			 * XXX - optimizer loop detection.
1471*ea16f64eSAntonio Huete Jimenez 			 */
1472*ea16f64eSAntonio Huete Jimenez 			opt_state->non_branch_movement_performed = 1;
147397a9217aSAntonio Huete Jimenez 			opt_state->done = 0;
14741077d0bdSPeter Avalos 		}
14751077d0bdSPeter Avalos }
14761077d0bdSPeter Avalos 
14771077d0bdSPeter Avalos static void
opt_blk(opt_state_t * opt_state,struct block * b,int do_stmts)14783a289941SAaron LI opt_blk(opt_state_t *opt_state, struct block *b, int do_stmts)
14791077d0bdSPeter Avalos {
14801077d0bdSPeter Avalos 	struct slist *s;
14811077d0bdSPeter Avalos 	struct edge *p;
14821077d0bdSPeter Avalos 	int i;
1483*ea16f64eSAntonio Huete Jimenez 	bpf_u_int32 aval, xval;
14841077d0bdSPeter Avalos 
14851077d0bdSPeter Avalos #if 0
14861077d0bdSPeter Avalos 	for (s = b->stmts; s && s->next; s = s->next)
14871077d0bdSPeter Avalos 		if (BPF_CLASS(s->s.code) == BPF_JMP) {
14881077d0bdSPeter Avalos 			do_stmts = 0;
14891077d0bdSPeter Avalos 			break;
14901077d0bdSPeter Avalos 		}
14911077d0bdSPeter Avalos #endif
14921077d0bdSPeter Avalos 
14931077d0bdSPeter Avalos 	/*
14941077d0bdSPeter Avalos 	 * Initialize the atom values.
14951077d0bdSPeter Avalos 	 */
14961077d0bdSPeter Avalos 	p = b->in_edges;
14971077d0bdSPeter Avalos 	if (p == 0) {
14981077d0bdSPeter Avalos 		/*
14991077d0bdSPeter Avalos 		 * We have no predecessors, so everything is undefined
15001077d0bdSPeter Avalos 		 * upon entry to this block.
15011077d0bdSPeter Avalos 		 */
15021077d0bdSPeter Avalos 		memset((char *)b->val, 0, sizeof(b->val));
15031077d0bdSPeter Avalos 	} else {
15041077d0bdSPeter Avalos 		/*
15051077d0bdSPeter Avalos 		 * Inherit values from our predecessors.
15061077d0bdSPeter Avalos 		 *
15071077d0bdSPeter Avalos 		 * First, get the values from the predecessor along the
15081077d0bdSPeter Avalos 		 * first edge leading to this node.
15091077d0bdSPeter Avalos 		 */
15101077d0bdSPeter Avalos 		memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
15111077d0bdSPeter Avalos 		/*
15121077d0bdSPeter Avalos 		 * Now look at all the other nodes leading to this node.
15131077d0bdSPeter Avalos 		 * If, for the predecessor along that edge, a register
15141077d0bdSPeter Avalos 		 * has a different value from the one we have (i.e.,
15151077d0bdSPeter Avalos 		 * control paths are merging, and the merging paths
15161077d0bdSPeter Avalos 		 * assign different values to that register), give the
15171077d0bdSPeter Avalos 		 * register the undefined value of 0.
15181077d0bdSPeter Avalos 		 */
15191077d0bdSPeter Avalos 		while ((p = p->next) != NULL) {
15201077d0bdSPeter Avalos 			for (i = 0; i < N_ATOMS; ++i)
15211077d0bdSPeter Avalos 				if (b->val[i] != p->pred->val[i])
15221077d0bdSPeter Avalos 					b->val[i] = 0;
15231077d0bdSPeter Avalos 		}
15241077d0bdSPeter Avalos 	}
15251077d0bdSPeter Avalos 	aval = b->val[A_ATOM];
15261077d0bdSPeter Avalos 	xval = b->val[X_ATOM];
15271077d0bdSPeter Avalos 	for (s = b->stmts; s; s = s->next)
15283a289941SAaron LI 		opt_stmt(opt_state, &s->s, b->val, do_stmts);
15291077d0bdSPeter Avalos 
15301077d0bdSPeter Avalos 	/*
15311077d0bdSPeter Avalos 	 * This is a special case: if we don't use anything from this
15321077d0bdSPeter Avalos 	 * block, and we load the accumulator or index register with a
15331077d0bdSPeter Avalos 	 * value that is already there, or if this block is a return,
15341077d0bdSPeter Avalos 	 * eliminate all the statements.
15351077d0bdSPeter Avalos 	 *
1536*ea16f64eSAntonio Huete Jimenez 	 * XXX - what if it does a store?  Presumably that falls under
1537*ea16f64eSAntonio Huete Jimenez 	 * the heading of "if we don't use anything from this block",
1538*ea16f64eSAntonio Huete Jimenez 	 * i.e., if we use any memory location set to a different
1539*ea16f64eSAntonio Huete Jimenez 	 * value by this block, then we use something from this block.
15401077d0bdSPeter Avalos 	 *
15411077d0bdSPeter Avalos 	 * XXX - why does it matter whether we use anything from this
15421077d0bdSPeter Avalos 	 * block?  If the accumulator or index register doesn't change
15431077d0bdSPeter Avalos 	 * its value, isn't that OK even if we use that value?
15441077d0bdSPeter Avalos 	 *
15451077d0bdSPeter Avalos 	 * XXX - if we load the accumulator with a different value,
15461077d0bdSPeter Avalos 	 * and the block ends with a conditional branch, we obviously
15471077d0bdSPeter Avalos 	 * can't eliminate it, as the branch depends on that value.
15481077d0bdSPeter Avalos 	 * For the index register, the conditional branch only depends
15491077d0bdSPeter Avalos 	 * on the index register value if the test is against the index
15501077d0bdSPeter Avalos 	 * register value rather than a constant; if nothing uses the
15511077d0bdSPeter Avalos 	 * value we put into the index register, and we're not testing
15521077d0bdSPeter Avalos 	 * against the index register's value, and there aren't any
15531077d0bdSPeter Avalos 	 * other problems that would keep us from eliminating this
15541077d0bdSPeter Avalos 	 * block, can we eliminate it?
15551077d0bdSPeter Avalos 	 */
15561077d0bdSPeter Avalos 	if (do_stmts &&
15573a289941SAaron LI 	    ((b->out_use == 0 &&
15583a289941SAaron LI 	      aval != VAL_UNKNOWN && b->val[A_ATOM] == aval &&
15593a289941SAaron LI 	      xval != VAL_UNKNOWN && b->val[X_ATOM] == xval) ||
15601077d0bdSPeter Avalos 	     BPF_CLASS(b->s.code) == BPF_RET)) {
15611077d0bdSPeter Avalos 		if (b->stmts != 0) {
15621077d0bdSPeter Avalos 			b->stmts = 0;
1563*ea16f64eSAntonio Huete Jimenez 			/*
1564*ea16f64eSAntonio Huete Jimenez 			 * XXX - optimizer loop detection.
1565*ea16f64eSAntonio Huete Jimenez 			 */
1566*ea16f64eSAntonio Huete Jimenez 			opt_state->non_branch_movement_performed = 1;
156797a9217aSAntonio Huete Jimenez 			opt_state->done = 0;
15681077d0bdSPeter Avalos 		}
15691077d0bdSPeter Avalos 	} else {
157097a9217aSAntonio Huete Jimenez 		opt_peep(opt_state, b);
157197a9217aSAntonio Huete Jimenez 		opt_deadstores(opt_state, b);
15721077d0bdSPeter Avalos 	}
15731077d0bdSPeter Avalos 	/*
15741077d0bdSPeter Avalos 	 * Set up values for branch optimizer.
15751077d0bdSPeter Avalos 	 */
15761077d0bdSPeter Avalos 	if (BPF_SRC(b->s.code) == BPF_K)
15771077d0bdSPeter Avalos 		b->oval = K(b->s.k);
15781077d0bdSPeter Avalos 	else
15791077d0bdSPeter Avalos 		b->oval = b->val[X_ATOM];
15801077d0bdSPeter Avalos 	b->et.code = b->s.code;
15811077d0bdSPeter Avalos 	b->ef.code = -b->s.code;
15821077d0bdSPeter Avalos }
15831077d0bdSPeter Avalos 
15841077d0bdSPeter Avalos /*
15851077d0bdSPeter Avalos  * Return true if any register that is used on exit from 'succ', has
15861077d0bdSPeter Avalos  * an exit value that is different from the corresponding exit value
15871077d0bdSPeter Avalos  * from 'b'.
15881077d0bdSPeter Avalos  */
15891077d0bdSPeter Avalos static int
use_conflict(struct block * b,struct block * succ)15900e381983SMatthew Dillon use_conflict(struct block *b, struct block *succ)
15911077d0bdSPeter Avalos {
15921077d0bdSPeter Avalos 	int atom;
15931077d0bdSPeter Avalos 	atomset use = succ->out_use;
15941077d0bdSPeter Avalos 
15951077d0bdSPeter Avalos 	if (use == 0)
15961077d0bdSPeter Avalos 		return 0;
15971077d0bdSPeter Avalos 
15981077d0bdSPeter Avalos 	for (atom = 0; atom < N_ATOMS; ++atom)
15991077d0bdSPeter Avalos 		if (ATOMELEM(use, atom))
16001077d0bdSPeter Avalos 			if (b->val[atom] != succ->val[atom])
16011077d0bdSPeter Avalos 				return 1;
16021077d0bdSPeter Avalos 	return 0;
16031077d0bdSPeter Avalos }
16041077d0bdSPeter Avalos 
1605*ea16f64eSAntonio Huete Jimenez /*
1606*ea16f64eSAntonio Huete Jimenez  * Given a block that is the successor of an edge, and an edge that
1607*ea16f64eSAntonio Huete Jimenez  * dominates that edge, return either a pointer to a child of that
1608*ea16f64eSAntonio Huete Jimenez  * block (a block to which that block jumps) if that block is a
1609*ea16f64eSAntonio Huete Jimenez  * candidate to replace the successor of the latter edge or NULL
1610*ea16f64eSAntonio Huete Jimenez  * if neither of the children of the first block are candidates.
1611*ea16f64eSAntonio Huete Jimenez  */
16121077d0bdSPeter Avalos static struct block *
fold_edge(struct block * child,struct edge * ep)16130e381983SMatthew Dillon fold_edge(struct block *child, struct edge *ep)
16141077d0bdSPeter Avalos {
16151077d0bdSPeter Avalos 	int sense;
1616*ea16f64eSAntonio Huete Jimenez 	bpf_u_int32 aval0, aval1, oval0, oval1;
16171077d0bdSPeter Avalos 	int code = ep->code;
16181077d0bdSPeter Avalos 
16191077d0bdSPeter Avalos 	if (code < 0) {
1620*ea16f64eSAntonio Huete Jimenez 		/*
1621*ea16f64eSAntonio Huete Jimenez 		 * This edge is a "branch if false" edge.
1622*ea16f64eSAntonio Huete Jimenez 		 */
16231077d0bdSPeter Avalos 		code = -code;
16241077d0bdSPeter Avalos 		sense = 0;
1625*ea16f64eSAntonio Huete Jimenez 	} else {
1626*ea16f64eSAntonio Huete Jimenez 		/*
1627*ea16f64eSAntonio Huete Jimenez 		 * This edge is a "branch if true" edge.
1628*ea16f64eSAntonio Huete Jimenez 		 */
16291077d0bdSPeter Avalos 		sense = 1;
1630*ea16f64eSAntonio Huete Jimenez 	}
16311077d0bdSPeter Avalos 
1632*ea16f64eSAntonio Huete Jimenez 	/*
1633*ea16f64eSAntonio Huete Jimenez 	 * If the opcode for the branch at the end of the block we
1634*ea16f64eSAntonio Huete Jimenez 	 * were handed isn't the same as the opcode for the branch
1635*ea16f64eSAntonio Huete Jimenez 	 * to which the edge we were handed corresponds, the tests
1636*ea16f64eSAntonio Huete Jimenez 	 * for those branches aren't testing the same conditions,
1637*ea16f64eSAntonio Huete Jimenez 	 * so the blocks to which the first block branches aren't
1638*ea16f64eSAntonio Huete Jimenez 	 * candidates to replace the successor of the edge.
1639*ea16f64eSAntonio Huete Jimenez 	 */
16401077d0bdSPeter Avalos 	if (child->s.code != code)
16411077d0bdSPeter Avalos 		return 0;
16421077d0bdSPeter Avalos 
16431077d0bdSPeter Avalos 	aval0 = child->val[A_ATOM];
16441077d0bdSPeter Avalos 	oval0 = child->oval;
16451077d0bdSPeter Avalos 	aval1 = ep->pred->val[A_ATOM];
16461077d0bdSPeter Avalos 	oval1 = ep->pred->oval;
16471077d0bdSPeter Avalos 
1648*ea16f64eSAntonio Huete Jimenez 	/*
1649*ea16f64eSAntonio Huete Jimenez 	 * If the A register value on exit from the successor block
1650*ea16f64eSAntonio Huete Jimenez 	 * isn't the same as the A register value on exit from the
1651*ea16f64eSAntonio Huete Jimenez 	 * predecessor of the edge, the blocks to which the first
1652*ea16f64eSAntonio Huete Jimenez 	 * block branches aren't candidates to replace the successor
1653*ea16f64eSAntonio Huete Jimenez 	 * of the edge.
1654*ea16f64eSAntonio Huete Jimenez 	 */
16551077d0bdSPeter Avalos 	if (aval0 != aval1)
16561077d0bdSPeter Avalos 		return 0;
16571077d0bdSPeter Avalos 
16581077d0bdSPeter Avalos 	if (oval0 == oval1)
16591077d0bdSPeter Avalos 		/*
16601077d0bdSPeter Avalos 		 * The operands of the branch instructions are
1661*ea16f64eSAntonio Huete Jimenez 		 * identical, so the branches are testing the
1662*ea16f64eSAntonio Huete Jimenez 		 * same condition, and the result is true if a true
16631077d0bdSPeter Avalos 		 * branch was taken to get here, otherwise false.
16641077d0bdSPeter Avalos 		 */
16651077d0bdSPeter Avalos 		return sense ? JT(child) : JF(child);
16661077d0bdSPeter Avalos 
16671077d0bdSPeter Avalos 	if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
16681077d0bdSPeter Avalos 		/*
16691077d0bdSPeter Avalos 		 * At this point, we only know the comparison if we
16701077d0bdSPeter Avalos 		 * came down the true branch, and it was an equality
16711077d0bdSPeter Avalos 		 * comparison with a constant.
16721077d0bdSPeter Avalos 		 *
16731077d0bdSPeter Avalos 		 * I.e., if we came down the true branch, and the branch
16741077d0bdSPeter Avalos 		 * was an equality comparison with a constant, we know the
16751077d0bdSPeter Avalos 		 * accumulator contains that constant.  If we came down
16761077d0bdSPeter Avalos 		 * the false branch, or the comparison wasn't with a
16771077d0bdSPeter Avalos 		 * constant, we don't know what was in the accumulator.
16781077d0bdSPeter Avalos 		 *
16791077d0bdSPeter Avalos 		 * We rely on the fact that distinct constants have distinct
16801077d0bdSPeter Avalos 		 * value numbers.
16811077d0bdSPeter Avalos 		 */
16821077d0bdSPeter Avalos 		return JF(child);
16831077d0bdSPeter Avalos 
16841077d0bdSPeter Avalos 	return 0;
16851077d0bdSPeter Avalos }
16861077d0bdSPeter Avalos 
1687*ea16f64eSAntonio Huete Jimenez /*
1688*ea16f64eSAntonio Huete Jimenez  * If we can make this edge go directly to a child of the edge's current
1689*ea16f64eSAntonio Huete Jimenez  * successor, do so.
1690*ea16f64eSAntonio Huete Jimenez  */
16911077d0bdSPeter Avalos static void
opt_j(opt_state_t * opt_state,struct edge * ep)169297a9217aSAntonio Huete Jimenez opt_j(opt_state_t *opt_state, struct edge *ep)
16931077d0bdSPeter Avalos {
1694*ea16f64eSAntonio Huete Jimenez 	register u_int i, k;
16951077d0bdSPeter Avalos 	register struct block *target;
16961077d0bdSPeter Avalos 
1697*ea16f64eSAntonio Huete Jimenez 	/*
1698*ea16f64eSAntonio Huete Jimenez 	 * Does this edge go to a block where, if the test
1699*ea16f64eSAntonio Huete Jimenez 	 * at the end of it succeeds, it goes to a block
1700*ea16f64eSAntonio Huete Jimenez 	 * that's a leaf node of the DAG, i.e. a return
1701*ea16f64eSAntonio Huete Jimenez 	 * statement?
1702*ea16f64eSAntonio Huete Jimenez 	 * If so, there's nothing to optimize.
1703*ea16f64eSAntonio Huete Jimenez 	 */
17041077d0bdSPeter Avalos 	if (JT(ep->succ) == 0)
17051077d0bdSPeter Avalos 		return;
17061077d0bdSPeter Avalos 
1707*ea16f64eSAntonio Huete Jimenez 	/*
1708*ea16f64eSAntonio Huete Jimenez 	 * Does this edge go to a block that goes, in turn, to
1709*ea16f64eSAntonio Huete Jimenez 	 * the same block regardless of whether the test at the
1710*ea16f64eSAntonio Huete Jimenez 	 * end succeeds or fails?
1711*ea16f64eSAntonio Huete Jimenez 	 */
17121077d0bdSPeter Avalos 	if (JT(ep->succ) == JF(ep->succ)) {
17131077d0bdSPeter Avalos 		/*
17141077d0bdSPeter Avalos 		 * Common branch targets can be eliminated, provided
17151077d0bdSPeter Avalos 		 * there is no data dependency.
1716*ea16f64eSAntonio Huete Jimenez 		 *
1717*ea16f64eSAntonio Huete Jimenez 		 * Check whether any register used on exit from the
1718*ea16f64eSAntonio Huete Jimenez 		 * block to which the successor of this edge goes
1719*ea16f64eSAntonio Huete Jimenez 		 * has a value at that point that's different from
1720*ea16f64eSAntonio Huete Jimenez 		 * the value it has on exit from the predecessor of
1721*ea16f64eSAntonio Huete Jimenez 		 * this edge.  If not, the predecessor of this edge
1722*ea16f64eSAntonio Huete Jimenez 		 * can just go to the block to which the successor
1723*ea16f64eSAntonio Huete Jimenez 		 * of this edge goes, bypassing the successor of this
1724*ea16f64eSAntonio Huete Jimenez 		 * edge, as the successor of this edge isn't doing
1725*ea16f64eSAntonio Huete Jimenez 		 * any calculations whose results are different
1726*ea16f64eSAntonio Huete Jimenez 		 * from what the blocks before it did and isn't
1727*ea16f64eSAntonio Huete Jimenez 		 * doing any tests the results of which matter.
17281077d0bdSPeter Avalos 		 */
1729*ea16f64eSAntonio Huete Jimenez 		if (!use_conflict(ep->pred, JT(ep->succ))) {
1730*ea16f64eSAntonio Huete Jimenez 			/*
1731*ea16f64eSAntonio Huete Jimenez 			 * No, there isn't.
1732*ea16f64eSAntonio Huete Jimenez 			 * Make this edge go to the block to
1733*ea16f64eSAntonio Huete Jimenez 			 * which the successor of that edge
1734*ea16f64eSAntonio Huete Jimenez 			 * goes.
1735*ea16f64eSAntonio Huete Jimenez 			 *
1736*ea16f64eSAntonio Huete Jimenez 			 * XXX - optimizer loop detection.
1737*ea16f64eSAntonio Huete Jimenez 			 */
1738*ea16f64eSAntonio Huete Jimenez 			opt_state->non_branch_movement_performed = 1;
173997a9217aSAntonio Huete Jimenez 			opt_state->done = 0;
17401077d0bdSPeter Avalos 			ep->succ = JT(ep->succ);
17411077d0bdSPeter Avalos 		}
17421077d0bdSPeter Avalos 	}
17431077d0bdSPeter Avalos 	/*
17441077d0bdSPeter Avalos 	 * For each edge dominator that matches the successor of this
17451077d0bdSPeter Avalos 	 * edge, promote the edge successor to the its grandchild.
17461077d0bdSPeter Avalos 	 *
17471077d0bdSPeter Avalos 	 * XXX We violate the set abstraction here in favor a reasonably
17481077d0bdSPeter Avalos 	 * efficient loop.
17491077d0bdSPeter Avalos 	 */
17501077d0bdSPeter Avalos  top:
175197a9217aSAntonio Huete Jimenez 	for (i = 0; i < opt_state->edgewords; ++i) {
1752*ea16f64eSAntonio Huete Jimenez 		/* i'th word in the bitset of dominators */
17531077d0bdSPeter Avalos 		register bpf_u_int32 x = ep->edom[i];
17541077d0bdSPeter Avalos 
17551077d0bdSPeter Avalos 		while (x != 0) {
1756*ea16f64eSAntonio Huete Jimenez 			/* Find the next dominator in that word and mark it as found */
17573a289941SAaron LI 			k = lowest_set_bit(x);
17583a289941SAaron LI 			x &=~ ((bpf_u_int32)1 << k);
17591077d0bdSPeter Avalos 			k += i * BITS_PER_WORD;
17601077d0bdSPeter Avalos 
176197a9217aSAntonio Huete Jimenez 			target = fold_edge(ep->succ, opt_state->edges[k]);
17621077d0bdSPeter Avalos 			/*
1763*ea16f64eSAntonio Huete Jimenez 			 * We have a candidate to replace the successor
1764*ea16f64eSAntonio Huete Jimenez 			 * of ep.
1765*ea16f64eSAntonio Huete Jimenez 			 *
17661077d0bdSPeter Avalos 			 * Check that there is no data dependency between
1767*ea16f64eSAntonio Huete Jimenez 			 * nodes that will be violated if we move the edge;
1768*ea16f64eSAntonio Huete Jimenez 			 * i.e., if any register used on exit from the
1769*ea16f64eSAntonio Huete Jimenez 			 * candidate has a value at that point different
1770*ea16f64eSAntonio Huete Jimenez 			 * from the value it has when we exit the
1771*ea16f64eSAntonio Huete Jimenez 			 * predecessor of that edge, there's a data
1772*ea16f64eSAntonio Huete Jimenez 			 * dependency that will be violated.
17731077d0bdSPeter Avalos 			 */
17741077d0bdSPeter Avalos 			if (target != 0 && !use_conflict(ep->pred, target)) {
1775*ea16f64eSAntonio Huete Jimenez 				/*
1776*ea16f64eSAntonio Huete Jimenez 				 * It's safe to replace the successor of
1777*ea16f64eSAntonio Huete Jimenez 				 * ep; do so, and note that we've made
1778*ea16f64eSAntonio Huete Jimenez 				 * at least one change.
1779*ea16f64eSAntonio Huete Jimenez 				 *
1780*ea16f64eSAntonio Huete Jimenez 				 * XXX - this is one of the operations that
1781*ea16f64eSAntonio Huete Jimenez 				 * happens when the optimizer gets into
1782*ea16f64eSAntonio Huete Jimenez 				 * one of those infinite loops.
1783*ea16f64eSAntonio Huete Jimenez 				 */
178497a9217aSAntonio Huete Jimenez 				opt_state->done = 0;
17851077d0bdSPeter Avalos 				ep->succ = target;
17861077d0bdSPeter Avalos 				if (JT(target) != 0)
17871077d0bdSPeter Avalos 					/*
17881077d0bdSPeter Avalos 					 * Start over unless we hit a leaf.
17891077d0bdSPeter Avalos 					 */
17901077d0bdSPeter Avalos 					goto top;
17911077d0bdSPeter Avalos 				return;
17921077d0bdSPeter Avalos 			}
17931077d0bdSPeter Avalos 		}
17941077d0bdSPeter Avalos 	}
17951077d0bdSPeter Avalos }
17961077d0bdSPeter Avalos 
1797*ea16f64eSAntonio Huete Jimenez /*
1798*ea16f64eSAntonio Huete Jimenez  * XXX - is this, and and_pullup(), what's described in section 6.1.2
1799*ea16f64eSAntonio Huete Jimenez  * "Predicate Assertion Propagation" in the BPF+ paper?
1800*ea16f64eSAntonio Huete Jimenez  *
1801*ea16f64eSAntonio Huete Jimenez  * Note that this looks at block dominators, not edge dominators.
1802*ea16f64eSAntonio Huete Jimenez  * Don't think so.
1803*ea16f64eSAntonio Huete Jimenez  *
1804*ea16f64eSAntonio Huete Jimenez  * "A or B" compiles into
1805*ea16f64eSAntonio Huete Jimenez  *
1806*ea16f64eSAntonio Huete Jimenez  *          A
1807*ea16f64eSAntonio Huete Jimenez  *       t / \ f
1808*ea16f64eSAntonio Huete Jimenez  *        /   B
1809*ea16f64eSAntonio Huete Jimenez  *       / t / \ f
1810*ea16f64eSAntonio Huete Jimenez  *      \   /
1811*ea16f64eSAntonio Huete Jimenez  *       \ /
1812*ea16f64eSAntonio Huete Jimenez  *        X
1813*ea16f64eSAntonio Huete Jimenez  *
1814*ea16f64eSAntonio Huete Jimenez  *
1815*ea16f64eSAntonio Huete Jimenez  */
18161077d0bdSPeter Avalos static void
or_pullup(opt_state_t * opt_state,struct block * b)181797a9217aSAntonio Huete Jimenez or_pullup(opt_state_t *opt_state, struct block *b)
18181077d0bdSPeter Avalos {
1819*ea16f64eSAntonio Huete Jimenez 	bpf_u_int32 val;
1820*ea16f64eSAntonio Huete Jimenez 	int at_top;
18211077d0bdSPeter Avalos 	struct block *pull;
18221077d0bdSPeter Avalos 	struct block **diffp, **samep;
18231077d0bdSPeter Avalos 	struct edge *ep;
18241077d0bdSPeter Avalos 
18251077d0bdSPeter Avalos 	ep = b->in_edges;
18261077d0bdSPeter Avalos 	if (ep == 0)
18271077d0bdSPeter Avalos 		return;
18281077d0bdSPeter Avalos 
18291077d0bdSPeter Avalos 	/*
18301077d0bdSPeter Avalos 	 * Make sure each predecessor loads the same value.
18311077d0bdSPeter Avalos 	 * XXX why?
18321077d0bdSPeter Avalos 	 */
18331077d0bdSPeter Avalos 	val = ep->pred->val[A_ATOM];
18341077d0bdSPeter Avalos 	for (ep = ep->next; ep != 0; ep = ep->next)
18351077d0bdSPeter Avalos 		if (val != ep->pred->val[A_ATOM])
18361077d0bdSPeter Avalos 			return;
18371077d0bdSPeter Avalos 
1838*ea16f64eSAntonio Huete Jimenez 	/*
1839*ea16f64eSAntonio Huete Jimenez 	 * For the first edge in the list of edges coming into this block,
1840*ea16f64eSAntonio Huete Jimenez 	 * see whether the predecessor of that edge comes here via a true
1841*ea16f64eSAntonio Huete Jimenez 	 * branch or a false branch.
1842*ea16f64eSAntonio Huete Jimenez 	 */
18431077d0bdSPeter Avalos 	if (JT(b->in_edges->pred) == b)
1844*ea16f64eSAntonio Huete Jimenez 		diffp = &JT(b->in_edges->pred);	/* jt */
18451077d0bdSPeter Avalos 	else
1846*ea16f64eSAntonio Huete Jimenez 		diffp = &JF(b->in_edges->pred);	/* jf */
18471077d0bdSPeter Avalos 
1848*ea16f64eSAntonio Huete Jimenez 	/*
1849*ea16f64eSAntonio Huete Jimenez 	 * diffp is a pointer to a pointer to the block.
1850*ea16f64eSAntonio Huete Jimenez 	 *
1851*ea16f64eSAntonio Huete Jimenez 	 * Go down the false chain looking as far as you can,
1852*ea16f64eSAntonio Huete Jimenez 	 * making sure that each jump-compare is doing the
1853*ea16f64eSAntonio Huete Jimenez 	 * same as the original block.
1854*ea16f64eSAntonio Huete Jimenez 	 *
1855*ea16f64eSAntonio Huete Jimenez 	 * If you reach the bottom before you reach a
1856*ea16f64eSAntonio Huete Jimenez 	 * different jump-compare, just exit.  There's nothing
1857*ea16f64eSAntonio Huete Jimenez 	 * to do here.  XXX - no, this version is checking for
1858*ea16f64eSAntonio Huete Jimenez 	 * the value leaving the block; that's from the BPF+
1859*ea16f64eSAntonio Huete Jimenez 	 * pullup routine.
1860*ea16f64eSAntonio Huete Jimenez 	 */
18611077d0bdSPeter Avalos 	at_top = 1;
18623a289941SAaron LI 	for (;;) {
1863*ea16f64eSAntonio Huete Jimenez 		/*
1864*ea16f64eSAntonio Huete Jimenez 		 * Done if that's not going anywhere XXX
1865*ea16f64eSAntonio Huete Jimenez 		 */
18661077d0bdSPeter Avalos 		if (*diffp == 0)
18671077d0bdSPeter Avalos 			return;
18681077d0bdSPeter Avalos 
1869*ea16f64eSAntonio Huete Jimenez 		/*
1870*ea16f64eSAntonio Huete Jimenez 		 * Done if that predecessor blah blah blah isn't
1871*ea16f64eSAntonio Huete Jimenez 		 * going the same place we're going XXX
1872*ea16f64eSAntonio Huete Jimenez 		 *
1873*ea16f64eSAntonio Huete Jimenez 		 * Does the true edge of this block point to the same
1874*ea16f64eSAntonio Huete Jimenez 		 * location as the true edge of b?
1875*ea16f64eSAntonio Huete Jimenez 		 */
18761077d0bdSPeter Avalos 		if (JT(*diffp) != JT(b))
18771077d0bdSPeter Avalos 			return;
18781077d0bdSPeter Avalos 
1879*ea16f64eSAntonio Huete Jimenez 		/*
1880*ea16f64eSAntonio Huete Jimenez 		 * Done if this node isn't a dominator of that
1881*ea16f64eSAntonio Huete Jimenez 		 * node blah blah blah XXX
1882*ea16f64eSAntonio Huete Jimenez 		 *
1883*ea16f64eSAntonio Huete Jimenez 		 * Does b dominate diffp?
1884*ea16f64eSAntonio Huete Jimenez 		 */
18851077d0bdSPeter Avalos 		if (!SET_MEMBER((*diffp)->dom, b->id))
18861077d0bdSPeter Avalos 			return;
18871077d0bdSPeter Avalos 
1888*ea16f64eSAntonio Huete Jimenez 		/*
1889*ea16f64eSAntonio Huete Jimenez 		 * Break out of the loop if that node's value of A
1890*ea16f64eSAntonio Huete Jimenez 		 * isn't the value of A above XXX
1891*ea16f64eSAntonio Huete Jimenez 		 */
18921077d0bdSPeter Avalos 		if ((*diffp)->val[A_ATOM] != val)
18931077d0bdSPeter Avalos 			break;
18941077d0bdSPeter Avalos 
1895*ea16f64eSAntonio Huete Jimenez 		/*
1896*ea16f64eSAntonio Huete Jimenez 		 * Get the JF for that node XXX
1897*ea16f64eSAntonio Huete Jimenez 		 * Go down the false path.
1898*ea16f64eSAntonio Huete Jimenez 		 */
18991077d0bdSPeter Avalos 		diffp = &JF(*diffp);
19001077d0bdSPeter Avalos 		at_top = 0;
19011077d0bdSPeter Avalos 	}
1902*ea16f64eSAntonio Huete Jimenez 
1903*ea16f64eSAntonio Huete Jimenez 	/*
1904*ea16f64eSAntonio Huete Jimenez 	 * Now that we've found a different jump-compare in a chain
1905*ea16f64eSAntonio Huete Jimenez 	 * below b, search further down until we find another
1906*ea16f64eSAntonio Huete Jimenez 	 * jump-compare that looks at the original value.  This
1907*ea16f64eSAntonio Huete Jimenez 	 * jump-compare should get pulled up.  XXX again we're
1908*ea16f64eSAntonio Huete Jimenez 	 * comparing values not jump-compares.
1909*ea16f64eSAntonio Huete Jimenez 	 */
19101077d0bdSPeter Avalos 	samep = &JF(*diffp);
19113a289941SAaron LI 	for (;;) {
1912*ea16f64eSAntonio Huete Jimenez 		/*
1913*ea16f64eSAntonio Huete Jimenez 		 * Done if that's not going anywhere XXX
1914*ea16f64eSAntonio Huete Jimenez 		 */
19151077d0bdSPeter Avalos 		if (*samep == 0)
19161077d0bdSPeter Avalos 			return;
19171077d0bdSPeter Avalos 
1918*ea16f64eSAntonio Huete Jimenez 		/*
1919*ea16f64eSAntonio Huete Jimenez 		 * Done if that predecessor blah blah blah isn't
1920*ea16f64eSAntonio Huete Jimenez 		 * going the same place we're going XXX
1921*ea16f64eSAntonio Huete Jimenez 		 */
19221077d0bdSPeter Avalos 		if (JT(*samep) != JT(b))
19231077d0bdSPeter Avalos 			return;
19241077d0bdSPeter Avalos 
1925*ea16f64eSAntonio Huete Jimenez 		/*
1926*ea16f64eSAntonio Huete Jimenez 		 * Done if this node isn't a dominator of that
1927*ea16f64eSAntonio Huete Jimenez 		 * node blah blah blah XXX
1928*ea16f64eSAntonio Huete Jimenez 		 *
1929*ea16f64eSAntonio Huete Jimenez 		 * Does b dominate samep?
1930*ea16f64eSAntonio Huete Jimenez 		 */
19311077d0bdSPeter Avalos 		if (!SET_MEMBER((*samep)->dom, b->id))
19321077d0bdSPeter Avalos 			return;
19331077d0bdSPeter Avalos 
1934*ea16f64eSAntonio Huete Jimenez 		/*
1935*ea16f64eSAntonio Huete Jimenez 		 * Break out of the loop if that node's value of A
1936*ea16f64eSAntonio Huete Jimenez 		 * is the value of A above XXX
1937*ea16f64eSAntonio Huete Jimenez 		 */
19381077d0bdSPeter Avalos 		if ((*samep)->val[A_ATOM] == val)
19391077d0bdSPeter Avalos 			break;
19401077d0bdSPeter Avalos 
19411077d0bdSPeter Avalos 		/* XXX Need to check that there are no data dependencies
19421077d0bdSPeter Avalos 		   between dp0 and dp1.  Currently, the code generator
19431077d0bdSPeter Avalos 		   will not produce such dependencies. */
19441077d0bdSPeter Avalos 		samep = &JF(*samep);
19451077d0bdSPeter Avalos 	}
19461077d0bdSPeter Avalos #ifdef notdef
19471077d0bdSPeter Avalos 	/* XXX This doesn't cover everything. */
19481077d0bdSPeter Avalos 	for (i = 0; i < N_ATOMS; ++i)
19491077d0bdSPeter Avalos 		if ((*samep)->val[i] != pred->val[i])
19501077d0bdSPeter Avalos 			return;
19511077d0bdSPeter Avalos #endif
19521077d0bdSPeter Avalos 	/* Pull up the node. */
19531077d0bdSPeter Avalos 	pull = *samep;
19541077d0bdSPeter Avalos 	*samep = JF(pull);
19551077d0bdSPeter Avalos 	JF(pull) = *diffp;
19561077d0bdSPeter Avalos 
19571077d0bdSPeter Avalos 	/*
19581077d0bdSPeter Avalos 	 * At the top of the chain, each predecessor needs to point at the
19591077d0bdSPeter Avalos 	 * pulled up node.  Inside the chain, there is only one predecessor
19601077d0bdSPeter Avalos 	 * to worry about.
19611077d0bdSPeter Avalos 	 */
19621077d0bdSPeter Avalos 	if (at_top) {
19631077d0bdSPeter Avalos 		for (ep = b->in_edges; ep != 0; ep = ep->next) {
19641077d0bdSPeter Avalos 			if (JT(ep->pred) == b)
19651077d0bdSPeter Avalos 				JT(ep->pred) = pull;
19661077d0bdSPeter Avalos 			else
19671077d0bdSPeter Avalos 				JF(ep->pred) = pull;
19681077d0bdSPeter Avalos 		}
19691077d0bdSPeter Avalos 	}
19701077d0bdSPeter Avalos 	else
19711077d0bdSPeter Avalos 		*diffp = pull;
19721077d0bdSPeter Avalos 
1973*ea16f64eSAntonio Huete Jimenez 	/*
1974*ea16f64eSAntonio Huete Jimenez 	 * XXX - this is one of the operations that happens when the
1975*ea16f64eSAntonio Huete Jimenez 	 * optimizer gets into one of those infinite loops.
1976*ea16f64eSAntonio Huete Jimenez 	 */
197797a9217aSAntonio Huete Jimenez 	opt_state->done = 0;
19781077d0bdSPeter Avalos }
19791077d0bdSPeter Avalos 
19801077d0bdSPeter Avalos static void
and_pullup(opt_state_t * opt_state,struct block * b)198197a9217aSAntonio Huete Jimenez and_pullup(opt_state_t *opt_state, struct block *b)
19821077d0bdSPeter Avalos {
1983*ea16f64eSAntonio Huete Jimenez 	bpf_u_int32 val;
1984*ea16f64eSAntonio Huete Jimenez 	int at_top;
19851077d0bdSPeter Avalos 	struct block *pull;
19861077d0bdSPeter Avalos 	struct block **diffp, **samep;
19871077d0bdSPeter Avalos 	struct edge *ep;
19881077d0bdSPeter Avalos 
19891077d0bdSPeter Avalos 	ep = b->in_edges;
19901077d0bdSPeter Avalos 	if (ep == 0)
19911077d0bdSPeter Avalos 		return;
19921077d0bdSPeter Avalos 
19931077d0bdSPeter Avalos 	/*
19941077d0bdSPeter Avalos 	 * Make sure each predecessor loads the same value.
19951077d0bdSPeter Avalos 	 */
19961077d0bdSPeter Avalos 	val = ep->pred->val[A_ATOM];
19971077d0bdSPeter Avalos 	for (ep = ep->next; ep != 0; ep = ep->next)
19981077d0bdSPeter Avalos 		if (val != ep->pred->val[A_ATOM])
19991077d0bdSPeter Avalos 			return;
20001077d0bdSPeter Avalos 
20011077d0bdSPeter Avalos 	if (JT(b->in_edges->pred) == b)
20021077d0bdSPeter Avalos 		diffp = &JT(b->in_edges->pred);
20031077d0bdSPeter Avalos 	else
20041077d0bdSPeter Avalos 		diffp = &JF(b->in_edges->pred);
20051077d0bdSPeter Avalos 
20061077d0bdSPeter Avalos 	at_top = 1;
20073a289941SAaron LI 	for (;;) {
20081077d0bdSPeter Avalos 		if (*diffp == 0)
20091077d0bdSPeter Avalos 			return;
20101077d0bdSPeter Avalos 
20111077d0bdSPeter Avalos 		if (JF(*diffp) != JF(b))
20121077d0bdSPeter Avalos 			return;
20131077d0bdSPeter Avalos 
20141077d0bdSPeter Avalos 		if (!SET_MEMBER((*diffp)->dom, b->id))
20151077d0bdSPeter Avalos 			return;
20161077d0bdSPeter Avalos 
20171077d0bdSPeter Avalos 		if ((*diffp)->val[A_ATOM] != val)
20181077d0bdSPeter Avalos 			break;
20191077d0bdSPeter Avalos 
20201077d0bdSPeter Avalos 		diffp = &JT(*diffp);
20211077d0bdSPeter Avalos 		at_top = 0;
20221077d0bdSPeter Avalos 	}
20231077d0bdSPeter Avalos 	samep = &JT(*diffp);
20243a289941SAaron LI 	for (;;) {
20251077d0bdSPeter Avalos 		if (*samep == 0)
20261077d0bdSPeter Avalos 			return;
20271077d0bdSPeter Avalos 
20281077d0bdSPeter Avalos 		if (JF(*samep) != JF(b))
20291077d0bdSPeter Avalos 			return;
20301077d0bdSPeter Avalos 
20311077d0bdSPeter Avalos 		if (!SET_MEMBER((*samep)->dom, b->id))
20321077d0bdSPeter Avalos 			return;
20331077d0bdSPeter Avalos 
20341077d0bdSPeter Avalos 		if ((*samep)->val[A_ATOM] == val)
20351077d0bdSPeter Avalos 			break;
20361077d0bdSPeter Avalos 
20371077d0bdSPeter Avalos 		/* XXX Need to check that there are no data dependencies
20381077d0bdSPeter Avalos 		   between diffp and samep.  Currently, the code generator
20391077d0bdSPeter Avalos 		   will not produce such dependencies. */
20401077d0bdSPeter Avalos 		samep = &JT(*samep);
20411077d0bdSPeter Avalos 	}
20421077d0bdSPeter Avalos #ifdef notdef
20431077d0bdSPeter Avalos 	/* XXX This doesn't cover everything. */
20441077d0bdSPeter Avalos 	for (i = 0; i < N_ATOMS; ++i)
20451077d0bdSPeter Avalos 		if ((*samep)->val[i] != pred->val[i])
20461077d0bdSPeter Avalos 			return;
20471077d0bdSPeter Avalos #endif
20481077d0bdSPeter Avalos 	/* Pull up the node. */
20491077d0bdSPeter Avalos 	pull = *samep;
20501077d0bdSPeter Avalos 	*samep = JT(pull);
20511077d0bdSPeter Avalos 	JT(pull) = *diffp;
20521077d0bdSPeter Avalos 
20531077d0bdSPeter Avalos 	/*
20541077d0bdSPeter Avalos 	 * At the top of the chain, each predecessor needs to point at the
20551077d0bdSPeter Avalos 	 * pulled up node.  Inside the chain, there is only one predecessor
20561077d0bdSPeter Avalos 	 * to worry about.
20571077d0bdSPeter Avalos 	 */
20581077d0bdSPeter Avalos 	if (at_top) {
20591077d0bdSPeter Avalos 		for (ep = b->in_edges; ep != 0; ep = ep->next) {
20601077d0bdSPeter Avalos 			if (JT(ep->pred) == b)
20611077d0bdSPeter Avalos 				JT(ep->pred) = pull;
20621077d0bdSPeter Avalos 			else
20631077d0bdSPeter Avalos 				JF(ep->pred) = pull;
20641077d0bdSPeter Avalos 		}
20651077d0bdSPeter Avalos 	}
20661077d0bdSPeter Avalos 	else
20671077d0bdSPeter Avalos 		*diffp = pull;
20681077d0bdSPeter Avalos 
2069*ea16f64eSAntonio Huete Jimenez 	/*
2070*ea16f64eSAntonio Huete Jimenez 	 * XXX - this is one of the operations that happens when the
2071*ea16f64eSAntonio Huete Jimenez 	 * optimizer gets into one of those infinite loops.
2072*ea16f64eSAntonio Huete Jimenez 	 */
207397a9217aSAntonio Huete Jimenez 	opt_state->done = 0;
20741077d0bdSPeter Avalos }
20751077d0bdSPeter Avalos 
20761077d0bdSPeter Avalos static void
opt_blks(opt_state_t * opt_state,struct icode * ic,int do_stmts)20773a289941SAaron LI opt_blks(opt_state_t *opt_state, struct icode *ic, int do_stmts)
20781077d0bdSPeter Avalos {
20791077d0bdSPeter Avalos 	int i, maxlevel;
20801077d0bdSPeter Avalos 	struct block *p;
20811077d0bdSPeter Avalos 
208297a9217aSAntonio Huete Jimenez 	init_val(opt_state);
208397a9217aSAntonio Huete Jimenez 	maxlevel = ic->root->level;
20841077d0bdSPeter Avalos 
208597a9217aSAntonio Huete Jimenez 	find_inedges(opt_state, ic->root);
20861077d0bdSPeter Avalos 	for (i = maxlevel; i >= 0; --i)
208797a9217aSAntonio Huete Jimenez 		for (p = opt_state->levels[i]; p; p = p->link)
20883a289941SAaron LI 			opt_blk(opt_state, p, do_stmts);
20891077d0bdSPeter Avalos 
20901077d0bdSPeter Avalos 	if (do_stmts)
20911077d0bdSPeter Avalos 		/*
20921077d0bdSPeter Avalos 		 * No point trying to move branches; it can't possibly
20931077d0bdSPeter Avalos 		 * make a difference at this point.
2094*ea16f64eSAntonio Huete Jimenez 		 *
2095*ea16f64eSAntonio Huete Jimenez 		 * XXX - this might be after we detect a loop where
2096*ea16f64eSAntonio Huete Jimenez 		 * we were just looping infinitely moving branches
2097*ea16f64eSAntonio Huete Jimenez 		 * in such a fashion that we went through two or more
2098*ea16f64eSAntonio Huete Jimenez 		 * versions of the machine code, eventually returning
2099*ea16f64eSAntonio Huete Jimenez 		 * to the first version.  (We're really not doing a
2100*ea16f64eSAntonio Huete Jimenez 		 * full loop detection, we're just testing for two
2101*ea16f64eSAntonio Huete Jimenez 		 * passes in a row where where we do nothing but
2102*ea16f64eSAntonio Huete Jimenez 		 * move branches.)
21031077d0bdSPeter Avalos 		 */
21041077d0bdSPeter Avalos 		return;
21051077d0bdSPeter Avalos 
2106*ea16f64eSAntonio Huete Jimenez 	/*
2107*ea16f64eSAntonio Huete Jimenez 	 * Is this what the BPF+ paper describes in sections 6.1.1,
2108*ea16f64eSAntonio Huete Jimenez 	 * 6.1.2, and 6.1.3?
2109*ea16f64eSAntonio Huete Jimenez 	 */
21101077d0bdSPeter Avalos 	for (i = 1; i <= maxlevel; ++i) {
211197a9217aSAntonio Huete Jimenez 		for (p = opt_state->levels[i]; p; p = p->link) {
211297a9217aSAntonio Huete Jimenez 			opt_j(opt_state, &p->et);
211397a9217aSAntonio Huete Jimenez 			opt_j(opt_state, &p->ef);
21141077d0bdSPeter Avalos 		}
21151077d0bdSPeter Avalos 	}
21161077d0bdSPeter Avalos 
211797a9217aSAntonio Huete Jimenez 	find_inedges(opt_state, ic->root);
21181077d0bdSPeter Avalos 	for (i = 1; i <= maxlevel; ++i) {
211997a9217aSAntonio Huete Jimenez 		for (p = opt_state->levels[i]; p; p = p->link) {
212097a9217aSAntonio Huete Jimenez 			or_pullup(opt_state, p);
212197a9217aSAntonio Huete Jimenez 			and_pullup(opt_state, p);
21221077d0bdSPeter Avalos 		}
21231077d0bdSPeter Avalos 	}
21241077d0bdSPeter Avalos }
21251077d0bdSPeter Avalos 
21261077d0bdSPeter Avalos static inline void
link_inedge(struct edge * parent,struct block * child)21270e381983SMatthew Dillon link_inedge(struct edge *parent, struct block *child)
21281077d0bdSPeter Avalos {
21291077d0bdSPeter Avalos 	parent->next = child->in_edges;
21301077d0bdSPeter Avalos 	child->in_edges = parent;
21311077d0bdSPeter Avalos }
21321077d0bdSPeter Avalos 
21331077d0bdSPeter Avalos static void
find_inedges(opt_state_t * opt_state,struct block * root)213497a9217aSAntonio Huete Jimenez find_inedges(opt_state_t *opt_state, struct block *root)
21351077d0bdSPeter Avalos {
2136*ea16f64eSAntonio Huete Jimenez 	u_int i;
2137*ea16f64eSAntonio Huete Jimenez 	int level;
21381077d0bdSPeter Avalos 	struct block *b;
21391077d0bdSPeter Avalos 
214097a9217aSAntonio Huete Jimenez 	for (i = 0; i < opt_state->n_blocks; ++i)
214197a9217aSAntonio Huete Jimenez 		opt_state->blocks[i]->in_edges = 0;
21421077d0bdSPeter Avalos 
21431077d0bdSPeter Avalos 	/*
21441077d0bdSPeter Avalos 	 * Traverse the graph, adding each edge to the predecessor
21451077d0bdSPeter Avalos 	 * list of its successors.  Skip the leaves (i.e. level 0).
21461077d0bdSPeter Avalos 	 */
2147*ea16f64eSAntonio Huete Jimenez 	for (level = root->level; level > 0; --level) {
2148*ea16f64eSAntonio Huete Jimenez 		for (b = opt_state->levels[level]; b != 0; b = b->link) {
21491077d0bdSPeter Avalos 			link_inedge(&b->et, JT(b));
21501077d0bdSPeter Avalos 			link_inedge(&b->ef, JF(b));
21511077d0bdSPeter Avalos 		}
21521077d0bdSPeter Avalos 	}
21531077d0bdSPeter Avalos }
21541077d0bdSPeter Avalos 
21551077d0bdSPeter Avalos static void
opt_root(struct block ** b)21560e381983SMatthew Dillon opt_root(struct block **b)
21571077d0bdSPeter Avalos {
21581077d0bdSPeter Avalos 	struct slist *tmp, *s;
21591077d0bdSPeter Avalos 
21601077d0bdSPeter Avalos 	s = (*b)->stmts;
21611077d0bdSPeter Avalos 	(*b)->stmts = 0;
21621077d0bdSPeter Avalos 	while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
21631077d0bdSPeter Avalos 		*b = JT(*b);
21641077d0bdSPeter Avalos 
21651077d0bdSPeter Avalos 	tmp = (*b)->stmts;
21661077d0bdSPeter Avalos 	if (tmp != 0)
21671077d0bdSPeter Avalos 		sappend(s, tmp);
21681077d0bdSPeter Avalos 	(*b)->stmts = s;
21691077d0bdSPeter Avalos 
21701077d0bdSPeter Avalos 	/*
21711077d0bdSPeter Avalos 	 * If the root node is a return, then there is no
21721077d0bdSPeter Avalos 	 * point executing any statements (since the bpf machine
21731077d0bdSPeter Avalos 	 * has no side effects).
21741077d0bdSPeter Avalos 	 */
21751077d0bdSPeter Avalos 	if (BPF_CLASS((*b)->s.code) == BPF_RET)
21761077d0bdSPeter Avalos 		(*b)->stmts = 0;
21771077d0bdSPeter Avalos }
21781077d0bdSPeter Avalos 
21791077d0bdSPeter Avalos static void
opt_loop(opt_state_t * opt_state,struct icode * ic,int do_stmts)21803a289941SAaron LI opt_loop(opt_state_t *opt_state, struct icode *ic, int do_stmts)
21811077d0bdSPeter Avalos {
21821077d0bdSPeter Avalos 
21831077d0bdSPeter Avalos #ifdef BDEBUG
21843a289941SAaron LI 	if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
21851077d0bdSPeter Avalos 		printf("opt_loop(root, %d) begin\n", do_stmts);
21863a289941SAaron LI 		opt_dump(opt_state, ic);
21871077d0bdSPeter Avalos 	}
21881077d0bdSPeter Avalos #endif
2189*ea16f64eSAntonio Huete Jimenez 
2190*ea16f64eSAntonio Huete Jimenez 	/*
2191*ea16f64eSAntonio Huete Jimenez 	 * XXX - optimizer loop detection.
2192*ea16f64eSAntonio Huete Jimenez 	 */
2193*ea16f64eSAntonio Huete Jimenez 	int loop_count = 0;
2194*ea16f64eSAntonio Huete Jimenez 	for (;;) {
219597a9217aSAntonio Huete Jimenez 		opt_state->done = 1;
2196*ea16f64eSAntonio Huete Jimenez 		/*
2197*ea16f64eSAntonio Huete Jimenez 		 * XXX - optimizer loop detection.
2198*ea16f64eSAntonio Huete Jimenez 		 */
2199*ea16f64eSAntonio Huete Jimenez 		opt_state->non_branch_movement_performed = 0;
220097a9217aSAntonio Huete Jimenez 		find_levels(opt_state, ic);
220197a9217aSAntonio Huete Jimenez 		find_dom(opt_state, ic->root);
220297a9217aSAntonio Huete Jimenez 		find_closure(opt_state, ic->root);
220397a9217aSAntonio Huete Jimenez 		find_ud(opt_state, ic->root);
220497a9217aSAntonio Huete Jimenez 		find_edom(opt_state, ic->root);
22053a289941SAaron LI 		opt_blks(opt_state, ic, do_stmts);
22061077d0bdSPeter Avalos #ifdef BDEBUG
22073a289941SAaron LI 		if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
220897a9217aSAntonio Huete Jimenez 			printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, opt_state->done);
22093a289941SAaron LI 			opt_dump(opt_state, ic);
22101077d0bdSPeter Avalos 		}
22111077d0bdSPeter Avalos #endif
2212*ea16f64eSAntonio Huete Jimenez 
2213*ea16f64eSAntonio Huete Jimenez 		/*
2214*ea16f64eSAntonio Huete Jimenez 		 * Was anything done in this optimizer pass?
2215*ea16f64eSAntonio Huete Jimenez 		 */
2216*ea16f64eSAntonio Huete Jimenez 		if (opt_state->done) {
2217*ea16f64eSAntonio Huete Jimenez 			/*
2218*ea16f64eSAntonio Huete Jimenez 			 * No, so we've reached a fixed point.
2219*ea16f64eSAntonio Huete Jimenez 			 * We're done.
2220*ea16f64eSAntonio Huete Jimenez 			 */
2221*ea16f64eSAntonio Huete Jimenez 			break;
2222*ea16f64eSAntonio Huete Jimenez 		}
2223*ea16f64eSAntonio Huete Jimenez 
2224*ea16f64eSAntonio Huete Jimenez 		/*
2225*ea16f64eSAntonio Huete Jimenez 		 * XXX - was anything done other than branch movement
2226*ea16f64eSAntonio Huete Jimenez 		 * in this pass?
2227*ea16f64eSAntonio Huete Jimenez 		 */
2228*ea16f64eSAntonio Huete Jimenez 		if (opt_state->non_branch_movement_performed) {
2229*ea16f64eSAntonio Huete Jimenez 			/*
2230*ea16f64eSAntonio Huete Jimenez 			 * Yes.  Clear any loop-detection counter;
2231*ea16f64eSAntonio Huete Jimenez 			 * we're making some form of progress (assuming
2232*ea16f64eSAntonio Huete Jimenez 			 * we can't get into a cycle doing *other*
2233*ea16f64eSAntonio Huete Jimenez 			 * optimizations...).
2234*ea16f64eSAntonio Huete Jimenez 			 */
2235*ea16f64eSAntonio Huete Jimenez 			loop_count = 0;
2236*ea16f64eSAntonio Huete Jimenez 		} else {
2237*ea16f64eSAntonio Huete Jimenez 			/*
2238*ea16f64eSAntonio Huete Jimenez 			 * No - increment the counter, and quit if
2239*ea16f64eSAntonio Huete Jimenez 			 * it's up to 100.
2240*ea16f64eSAntonio Huete Jimenez 			 */
2241*ea16f64eSAntonio Huete Jimenez 			loop_count++;
2242*ea16f64eSAntonio Huete Jimenez 			if (loop_count >= 100) {
2243*ea16f64eSAntonio Huete Jimenez 				/*
2244*ea16f64eSAntonio Huete Jimenez 				 * We've done nothing but branch movement
2245*ea16f64eSAntonio Huete Jimenez 				 * for 100 passes; we're probably
2246*ea16f64eSAntonio Huete Jimenez 				 * in a cycle and will never reach a
2247*ea16f64eSAntonio Huete Jimenez 				 * fixed point.
2248*ea16f64eSAntonio Huete Jimenez 				 *
2249*ea16f64eSAntonio Huete Jimenez 				 * XXX - yes, we really need a non-
2250*ea16f64eSAntonio Huete Jimenez 				 * heuristic way of detecting a cycle.
2251*ea16f64eSAntonio Huete Jimenez 				 */
2252*ea16f64eSAntonio Huete Jimenez 				opt_state->done = 1;
2253*ea16f64eSAntonio Huete Jimenez 				break;
2254*ea16f64eSAntonio Huete Jimenez 			}
2255*ea16f64eSAntonio Huete Jimenez 		}
2256*ea16f64eSAntonio Huete Jimenez 	}
22571077d0bdSPeter Avalos }
22581077d0bdSPeter Avalos 
22591077d0bdSPeter Avalos /*
22601077d0bdSPeter Avalos  * Optimize the filter code in its dag representation.
22613a289941SAaron LI  * Return 0 on success, -1 on error.
22621077d0bdSPeter Avalos  */
22633a289941SAaron LI int
bpf_optimize(struct icode * ic,char * errbuf)22643a289941SAaron LI bpf_optimize(struct icode *ic, char *errbuf)
22651077d0bdSPeter Avalos {
226697a9217aSAntonio Huete Jimenez 	opt_state_t opt_state;
22671077d0bdSPeter Avalos 
22683a289941SAaron LI 	memset(&opt_state, 0, sizeof(opt_state));
22693a289941SAaron LI 	opt_state.errbuf = errbuf;
2270*ea16f64eSAntonio Huete Jimenez 	opt_state.non_branch_movement_performed = 0;
22713a289941SAaron LI 	if (setjmp(opt_state.top_ctx)) {
22723a289941SAaron LI 		opt_cleanup(&opt_state);
22733a289941SAaron LI 		return -1;
22743a289941SAaron LI 	}
22753a289941SAaron LI 	opt_init(&opt_state, ic);
22763a289941SAaron LI 	opt_loop(&opt_state, ic, 0);
22773a289941SAaron LI 	opt_loop(&opt_state, ic, 1);
227897a9217aSAntonio Huete Jimenez 	intern_blocks(&opt_state, ic);
22791077d0bdSPeter Avalos #ifdef BDEBUG
22803a289941SAaron LI 	if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
22811077d0bdSPeter Avalos 		printf("after intern_blocks()\n");
22823a289941SAaron LI 		opt_dump(&opt_state, ic);
22831077d0bdSPeter Avalos 	}
22841077d0bdSPeter Avalos #endif
228597a9217aSAntonio Huete Jimenez 	opt_root(&ic->root);
22861077d0bdSPeter Avalos #ifdef BDEBUG
22873a289941SAaron LI 	if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
22881077d0bdSPeter Avalos 		printf("after opt_root()\n");
22893a289941SAaron LI 		opt_dump(&opt_state, ic);
22901077d0bdSPeter Avalos 	}
22911077d0bdSPeter Avalos #endif
229297a9217aSAntonio Huete Jimenez 	opt_cleanup(&opt_state);
22933a289941SAaron LI 	return 0;
22941077d0bdSPeter Avalos }
22951077d0bdSPeter Avalos 
22961077d0bdSPeter Avalos static void
make_marks(struct icode * ic,struct block * p)229797a9217aSAntonio Huete Jimenez make_marks(struct icode *ic, struct block *p)
22981077d0bdSPeter Avalos {
229997a9217aSAntonio Huete Jimenez 	if (!isMarked(ic, p)) {
230097a9217aSAntonio Huete Jimenez 		Mark(ic, p);
23011077d0bdSPeter Avalos 		if (BPF_CLASS(p->s.code) != BPF_RET) {
230297a9217aSAntonio Huete Jimenez 			make_marks(ic, JT(p));
230397a9217aSAntonio Huete Jimenez 			make_marks(ic, JF(p));
23041077d0bdSPeter Avalos 		}
23051077d0bdSPeter Avalos 	}
23061077d0bdSPeter Avalos }
23071077d0bdSPeter Avalos 
23081077d0bdSPeter Avalos /*
230997a9217aSAntonio Huete Jimenez  * Mark code array such that isMarked(ic->cur_mark, i) is true
23101077d0bdSPeter Avalos  * only for nodes that are alive.
23111077d0bdSPeter Avalos  */
23121077d0bdSPeter Avalos static void
mark_code(struct icode * ic)231397a9217aSAntonio Huete Jimenez mark_code(struct icode *ic)
23141077d0bdSPeter Avalos {
231597a9217aSAntonio Huete Jimenez 	ic->cur_mark += 1;
231697a9217aSAntonio Huete Jimenez 	make_marks(ic, ic->root);
23171077d0bdSPeter Avalos }
23181077d0bdSPeter Avalos 
23191077d0bdSPeter Avalos /*
23201077d0bdSPeter Avalos  * True iff the two stmt lists load the same value from the packet into
23211077d0bdSPeter Avalos  * the accumulator.
23221077d0bdSPeter Avalos  */
23231077d0bdSPeter Avalos static int
eq_slist(struct slist * x,struct slist * y)23240e381983SMatthew Dillon eq_slist(struct slist *x, struct slist *y)
23251077d0bdSPeter Avalos {
23263a289941SAaron LI 	for (;;) {
23271077d0bdSPeter Avalos 		while (x && x->s.code == NOP)
23281077d0bdSPeter Avalos 			x = x->next;
23291077d0bdSPeter Avalos 		while (y && y->s.code == NOP)
23301077d0bdSPeter Avalos 			y = y->next;
23311077d0bdSPeter Avalos 		if (x == 0)
23321077d0bdSPeter Avalos 			return y == 0;
23331077d0bdSPeter Avalos 		if (y == 0)
23341077d0bdSPeter Avalos 			return x == 0;
23351077d0bdSPeter Avalos 		if (x->s.code != y->s.code || x->s.k != y->s.k)
23361077d0bdSPeter Avalos 			return 0;
23371077d0bdSPeter Avalos 		x = x->next;
23381077d0bdSPeter Avalos 		y = y->next;
23391077d0bdSPeter Avalos 	}
23401077d0bdSPeter Avalos }
23411077d0bdSPeter Avalos 
23421077d0bdSPeter Avalos static inline int
eq_blk(struct block * b0,struct block * b1)23430e381983SMatthew Dillon eq_blk(struct block *b0, struct block *b1)
23441077d0bdSPeter Avalos {
23451077d0bdSPeter Avalos 	if (b0->s.code == b1->s.code &&
23461077d0bdSPeter Avalos 	    b0->s.k == b1->s.k &&
23471077d0bdSPeter Avalos 	    b0->et.succ == b1->et.succ &&
23481077d0bdSPeter Avalos 	    b0->ef.succ == b1->ef.succ)
23491077d0bdSPeter Avalos 		return eq_slist(b0->stmts, b1->stmts);
23501077d0bdSPeter Avalos 	return 0;
23511077d0bdSPeter Avalos }
23521077d0bdSPeter Avalos 
23531077d0bdSPeter Avalos static void
intern_blocks(opt_state_t * opt_state,struct icode * ic)235497a9217aSAntonio Huete Jimenez intern_blocks(opt_state_t *opt_state, struct icode *ic)
23551077d0bdSPeter Avalos {
23561077d0bdSPeter Avalos 	struct block *p;
2357*ea16f64eSAntonio Huete Jimenez 	u_int i, j;
23581077d0bdSPeter Avalos 	int done1; /* don't shadow global */
23591077d0bdSPeter Avalos  top:
23601077d0bdSPeter Avalos 	done1 = 1;
236197a9217aSAntonio Huete Jimenez 	for (i = 0; i < opt_state->n_blocks; ++i)
236297a9217aSAntonio Huete Jimenez 		opt_state->blocks[i]->link = 0;
23631077d0bdSPeter Avalos 
236497a9217aSAntonio Huete Jimenez 	mark_code(ic);
23651077d0bdSPeter Avalos 
2366*ea16f64eSAntonio Huete Jimenez 	for (i = opt_state->n_blocks - 1; i != 0; ) {
2367*ea16f64eSAntonio Huete Jimenez 		--i;
236897a9217aSAntonio Huete Jimenez 		if (!isMarked(ic, opt_state->blocks[i]))
23691077d0bdSPeter Avalos 			continue;
237097a9217aSAntonio Huete Jimenez 		for (j = i + 1; j < opt_state->n_blocks; ++j) {
237197a9217aSAntonio Huete Jimenez 			if (!isMarked(ic, opt_state->blocks[j]))
23721077d0bdSPeter Avalos 				continue;
237397a9217aSAntonio Huete Jimenez 			if (eq_blk(opt_state->blocks[i], opt_state->blocks[j])) {
237497a9217aSAntonio Huete Jimenez 				opt_state->blocks[i]->link = opt_state->blocks[j]->link ?
237597a9217aSAntonio Huete Jimenez 					opt_state->blocks[j]->link : opt_state->blocks[j];
23761077d0bdSPeter Avalos 				break;
23771077d0bdSPeter Avalos 			}
23781077d0bdSPeter Avalos 		}
23791077d0bdSPeter Avalos 	}
238097a9217aSAntonio Huete Jimenez 	for (i = 0; i < opt_state->n_blocks; ++i) {
238197a9217aSAntonio Huete Jimenez 		p = opt_state->blocks[i];
23821077d0bdSPeter Avalos 		if (JT(p) == 0)
23831077d0bdSPeter Avalos 			continue;
23841077d0bdSPeter Avalos 		if (JT(p)->link) {
23851077d0bdSPeter Avalos 			done1 = 0;
23861077d0bdSPeter Avalos 			JT(p) = JT(p)->link;
23871077d0bdSPeter Avalos 		}
23881077d0bdSPeter Avalos 		if (JF(p)->link) {
23891077d0bdSPeter Avalos 			done1 = 0;
23901077d0bdSPeter Avalos 			JF(p) = JF(p)->link;
23911077d0bdSPeter Avalos 		}
23921077d0bdSPeter Avalos 	}
23931077d0bdSPeter Avalos 	if (!done1)
23941077d0bdSPeter Avalos 		goto top;
23951077d0bdSPeter Avalos }
23961077d0bdSPeter Avalos 
23971077d0bdSPeter Avalos static void
opt_cleanup(opt_state_t * opt_state)239897a9217aSAntonio Huete Jimenez opt_cleanup(opt_state_t *opt_state)
23991077d0bdSPeter Avalos {
240097a9217aSAntonio Huete Jimenez 	free((void *)opt_state->vnode_base);
240197a9217aSAntonio Huete Jimenez 	free((void *)opt_state->vmap);
240297a9217aSAntonio Huete Jimenez 	free((void *)opt_state->edges);
240397a9217aSAntonio Huete Jimenez 	free((void *)opt_state->space);
240497a9217aSAntonio Huete Jimenez 	free((void *)opt_state->levels);
240597a9217aSAntonio Huete Jimenez 	free((void *)opt_state->blocks);
24061077d0bdSPeter Avalos }
24071077d0bdSPeter Avalos 
24081077d0bdSPeter Avalos /*
24093a289941SAaron LI  * For optimizer errors.
24103a289941SAaron LI  */
24113a289941SAaron LI static void PCAP_NORETURN
opt_error(opt_state_t * opt_state,const char * fmt,...)24123a289941SAaron LI opt_error(opt_state_t *opt_state, const char *fmt, ...)
24133a289941SAaron LI {
24143a289941SAaron LI 	va_list ap;
24153a289941SAaron LI 
24163a289941SAaron LI 	if (opt_state->errbuf != NULL) {
24173a289941SAaron LI 		va_start(ap, fmt);
2418*ea16f64eSAntonio Huete Jimenez 		(void)vsnprintf(opt_state->errbuf,
24193a289941SAaron LI 		    PCAP_ERRBUF_SIZE, fmt, ap);
24203a289941SAaron LI 		va_end(ap);
24213a289941SAaron LI 	}
24223a289941SAaron LI 	longjmp(opt_state->top_ctx, 1);
24233a289941SAaron LI 	/* NOTREACHED */
24243a289941SAaron LI }
24253a289941SAaron LI 
24263a289941SAaron LI /*
24271077d0bdSPeter Avalos  * Return the number of stmts in 's'.
24281077d0bdSPeter Avalos  */
24290e1eae1fSPeter Avalos static u_int
slength(struct slist * s)24300e381983SMatthew Dillon slength(struct slist *s)
24311077d0bdSPeter Avalos {
24320e1eae1fSPeter Avalos 	u_int n = 0;
24331077d0bdSPeter Avalos 
24341077d0bdSPeter Avalos 	for (; s; s = s->next)
24351077d0bdSPeter Avalos 		if (s->s.code != NOP)
24361077d0bdSPeter Avalos 			++n;
24371077d0bdSPeter Avalos 	return n;
24381077d0bdSPeter Avalos }
24391077d0bdSPeter Avalos 
24401077d0bdSPeter Avalos /*
24411077d0bdSPeter Avalos  * Return the number of nodes reachable by 'p'.
24421077d0bdSPeter Avalos  * All nodes should be initially unmarked.
24431077d0bdSPeter Avalos  */
24441077d0bdSPeter Avalos static int
count_blocks(struct icode * ic,struct block * p)244597a9217aSAntonio Huete Jimenez count_blocks(struct icode *ic, struct block *p)
24461077d0bdSPeter Avalos {
244797a9217aSAntonio Huete Jimenez 	if (p == 0 || isMarked(ic, p))
24481077d0bdSPeter Avalos 		return 0;
244997a9217aSAntonio Huete Jimenez 	Mark(ic, p);
245097a9217aSAntonio Huete Jimenez 	return count_blocks(ic, JT(p)) + count_blocks(ic, JF(p)) + 1;
24511077d0bdSPeter Avalos }
24521077d0bdSPeter Avalos 
24531077d0bdSPeter Avalos /*
24541077d0bdSPeter Avalos  * Do a depth first search on the flow graph, numbering the
24551077d0bdSPeter Avalos  * the basic blocks, and entering them into the 'blocks' array.`
24561077d0bdSPeter Avalos  */
24571077d0bdSPeter Avalos static void
number_blks_r(opt_state_t * opt_state,struct icode * ic,struct block * p)245897a9217aSAntonio Huete Jimenez number_blks_r(opt_state_t *opt_state, struct icode *ic, struct block *p)
24591077d0bdSPeter Avalos {
2460*ea16f64eSAntonio Huete Jimenez 	u_int n;
24611077d0bdSPeter Avalos 
246297a9217aSAntonio Huete Jimenez 	if (p == 0 || isMarked(ic, p))
24631077d0bdSPeter Avalos 		return;
24641077d0bdSPeter Avalos 
246597a9217aSAntonio Huete Jimenez 	Mark(ic, p);
246697a9217aSAntonio Huete Jimenez 	n = opt_state->n_blocks++;
2467*ea16f64eSAntonio Huete Jimenez 	if (opt_state->n_blocks == 0) {
2468*ea16f64eSAntonio Huete Jimenez 		/*
2469*ea16f64eSAntonio Huete Jimenez 		 * Overflow.
2470*ea16f64eSAntonio Huete Jimenez 		 */
2471*ea16f64eSAntonio Huete Jimenez 		opt_error(opt_state, "filter is too complex to optimize");
2472*ea16f64eSAntonio Huete Jimenez 	}
24731077d0bdSPeter Avalos 	p->id = n;
247497a9217aSAntonio Huete Jimenez 	opt_state->blocks[n] = p;
24751077d0bdSPeter Avalos 
247697a9217aSAntonio Huete Jimenez 	number_blks_r(opt_state, ic, JT(p));
247797a9217aSAntonio Huete Jimenez 	number_blks_r(opt_state, ic, JF(p));
24781077d0bdSPeter Avalos }
24791077d0bdSPeter Avalos 
24801077d0bdSPeter Avalos /*
24811077d0bdSPeter Avalos  * Return the number of stmts in the flowgraph reachable by 'p'.
24821077d0bdSPeter Avalos  * The nodes should be unmarked before calling.
24831077d0bdSPeter Avalos  *
24841077d0bdSPeter Avalos  * Note that "stmts" means "instructions", and that this includes
24851077d0bdSPeter Avalos  *
24861077d0bdSPeter Avalos  *	side-effect statements in 'p' (slength(p->stmts));
24871077d0bdSPeter Avalos  *
24881077d0bdSPeter Avalos  *	statements in the true branch from 'p' (count_stmts(JT(p)));
24891077d0bdSPeter Avalos  *
24901077d0bdSPeter Avalos  *	statements in the false branch from 'p' (count_stmts(JF(p)));
24911077d0bdSPeter Avalos  *
24921077d0bdSPeter Avalos  *	the conditional jump itself (1);
24931077d0bdSPeter Avalos  *
24941077d0bdSPeter Avalos  *	an extra long jump if the true branch requires it (p->longjt);
24951077d0bdSPeter Avalos  *
24961077d0bdSPeter Avalos  *	an extra long jump if the false branch requires it (p->longjf).
24971077d0bdSPeter Avalos  */
24980e1eae1fSPeter Avalos static u_int
count_stmts(struct icode * ic,struct block * p)249997a9217aSAntonio Huete Jimenez count_stmts(struct icode *ic, struct block *p)
25001077d0bdSPeter Avalos {
25010e1eae1fSPeter Avalos 	u_int n;
25021077d0bdSPeter Avalos 
250397a9217aSAntonio Huete Jimenez 	if (p == 0 || isMarked(ic, p))
25041077d0bdSPeter Avalos 		return 0;
250597a9217aSAntonio Huete Jimenez 	Mark(ic, p);
250697a9217aSAntonio Huete Jimenez 	n = count_stmts(ic, JT(p)) + count_stmts(ic, JF(p));
25071077d0bdSPeter Avalos 	return slength(p->stmts) + n + 1 + p->longjt + p->longjf;
25081077d0bdSPeter Avalos }
25091077d0bdSPeter Avalos 
25101077d0bdSPeter Avalos /*
25111077d0bdSPeter Avalos  * Allocate memory.  All allocation is done before optimization
25121077d0bdSPeter Avalos  * is begun.  A linear bound on the size of all data structures is computed
25131077d0bdSPeter Avalos  * from the total number of blocks and/or statements.
25141077d0bdSPeter Avalos  */
25151077d0bdSPeter Avalos static void
opt_init(opt_state_t * opt_state,struct icode * ic)25163a289941SAaron LI opt_init(opt_state_t *opt_state, struct icode *ic)
25171077d0bdSPeter Avalos {
25181077d0bdSPeter Avalos 	bpf_u_int32 *p;
25191077d0bdSPeter Avalos 	int i, n, max_stmts;
2520*ea16f64eSAntonio Huete Jimenez 	u_int product;
2521*ea16f64eSAntonio Huete Jimenez 	size_t block_memsize, edge_memsize;
25221077d0bdSPeter Avalos 
25231077d0bdSPeter Avalos 	/*
25241077d0bdSPeter Avalos 	 * First, count the blocks, so we can malloc an array to map
25251077d0bdSPeter Avalos 	 * block number to block.  Then, put the blocks into the array.
25261077d0bdSPeter Avalos 	 */
252797a9217aSAntonio Huete Jimenez 	unMarkAll(ic);
252897a9217aSAntonio Huete Jimenez 	n = count_blocks(ic, ic->root);
252997a9217aSAntonio Huete Jimenez 	opt_state->blocks = (struct block **)calloc(n, sizeof(*opt_state->blocks));
253097a9217aSAntonio Huete Jimenez 	if (opt_state->blocks == NULL)
25313a289941SAaron LI 		opt_error(opt_state, "malloc");
253297a9217aSAntonio Huete Jimenez 	unMarkAll(ic);
253397a9217aSAntonio Huete Jimenez 	opt_state->n_blocks = 0;
253497a9217aSAntonio Huete Jimenez 	number_blks_r(opt_state, ic, ic->root);
25351077d0bdSPeter Avalos 
2536*ea16f64eSAntonio Huete Jimenez 	/*
2537*ea16f64eSAntonio Huete Jimenez 	 * This "should not happen".
2538*ea16f64eSAntonio Huete Jimenez 	 */
2539*ea16f64eSAntonio Huete Jimenez 	if (opt_state->n_blocks == 0)
2540*ea16f64eSAntonio Huete Jimenez 		opt_error(opt_state, "filter has no instructions; please report this as a libpcap issue");
2541*ea16f64eSAntonio Huete Jimenez 
254297a9217aSAntonio Huete Jimenez 	opt_state->n_edges = 2 * opt_state->n_blocks;
2543*ea16f64eSAntonio Huete Jimenez 	if ((opt_state->n_edges / 2) != opt_state->n_blocks) {
2544*ea16f64eSAntonio Huete Jimenez 		/*
2545*ea16f64eSAntonio Huete Jimenez 		 * Overflow.
2546*ea16f64eSAntonio Huete Jimenez 		 */
2547*ea16f64eSAntonio Huete Jimenez 		opt_error(opt_state, "filter is too complex to optimize");
2548*ea16f64eSAntonio Huete Jimenez 	}
254997a9217aSAntonio Huete Jimenez 	opt_state->edges = (struct edge **)calloc(opt_state->n_edges, sizeof(*opt_state->edges));
25503a289941SAaron LI 	if (opt_state->edges == NULL) {
25513a289941SAaron LI 		opt_error(opt_state, "malloc");
25523a289941SAaron LI 	}
25531077d0bdSPeter Avalos 
25541077d0bdSPeter Avalos 	/*
25551077d0bdSPeter Avalos 	 * The number of levels is bounded by the number of nodes.
25561077d0bdSPeter Avalos 	 */
255797a9217aSAntonio Huete Jimenez 	opt_state->levels = (struct block **)calloc(opt_state->n_blocks, sizeof(*opt_state->levels));
25583a289941SAaron LI 	if (opt_state->levels == NULL) {
25593a289941SAaron LI 		opt_error(opt_state, "malloc");
25603a289941SAaron LI 	}
25611077d0bdSPeter Avalos 
2562*ea16f64eSAntonio Huete Jimenez 	opt_state->edgewords = opt_state->n_edges / BITS_PER_WORD + 1;
2563*ea16f64eSAntonio Huete Jimenez 	opt_state->nodewords = opt_state->n_blocks / BITS_PER_WORD + 1;
2564*ea16f64eSAntonio Huete Jimenez 
2565*ea16f64eSAntonio Huete Jimenez 	/*
2566*ea16f64eSAntonio Huete Jimenez 	 * Make sure opt_state->n_blocks * opt_state->nodewords fits
2567*ea16f64eSAntonio Huete Jimenez 	 * in a u_int; we use it as a u_int number-of-iterations
2568*ea16f64eSAntonio Huete Jimenez 	 * value.
2569*ea16f64eSAntonio Huete Jimenez 	 */
2570*ea16f64eSAntonio Huete Jimenez 	product = opt_state->n_blocks * opt_state->nodewords;
2571*ea16f64eSAntonio Huete Jimenez 	if ((product / opt_state->n_blocks) != opt_state->nodewords) {
2572*ea16f64eSAntonio Huete Jimenez 		/*
2573*ea16f64eSAntonio Huete Jimenez 		 * XXX - just punt and don't try to optimize?
2574*ea16f64eSAntonio Huete Jimenez 		 * In practice, this is unlikely to happen with
2575*ea16f64eSAntonio Huete Jimenez 		 * a normal filter.
2576*ea16f64eSAntonio Huete Jimenez 		 */
2577*ea16f64eSAntonio Huete Jimenez 		opt_error(opt_state, "filter is too complex to optimize");
2578*ea16f64eSAntonio Huete Jimenez 	}
2579*ea16f64eSAntonio Huete Jimenez 
2580*ea16f64eSAntonio Huete Jimenez 	/*
2581*ea16f64eSAntonio Huete Jimenez 	 * Make sure the total memory required for that doesn't
2582*ea16f64eSAntonio Huete Jimenez 	 * overflow.
2583*ea16f64eSAntonio Huete Jimenez 	 */
2584*ea16f64eSAntonio Huete Jimenez 	block_memsize = (size_t)2 * product * sizeof(*opt_state->space);
2585*ea16f64eSAntonio Huete Jimenez 	if ((block_memsize / product) != 2 * sizeof(*opt_state->space)) {
2586*ea16f64eSAntonio Huete Jimenez 		opt_error(opt_state, "filter is too complex to optimize");
2587*ea16f64eSAntonio Huete Jimenez 	}
2588*ea16f64eSAntonio Huete Jimenez 
2589*ea16f64eSAntonio Huete Jimenez 	/*
2590*ea16f64eSAntonio Huete Jimenez 	 * Make sure opt_state->n_edges * opt_state->edgewords fits
2591*ea16f64eSAntonio Huete Jimenez 	 * in a u_int; we use it as a u_int number-of-iterations
2592*ea16f64eSAntonio Huete Jimenez 	 * value.
2593*ea16f64eSAntonio Huete Jimenez 	 */
2594*ea16f64eSAntonio Huete Jimenez 	product = opt_state->n_edges * opt_state->edgewords;
2595*ea16f64eSAntonio Huete Jimenez 	if ((product / opt_state->n_edges) != opt_state->edgewords) {
2596*ea16f64eSAntonio Huete Jimenez 		opt_error(opt_state, "filter is too complex to optimize");
2597*ea16f64eSAntonio Huete Jimenez 	}
2598*ea16f64eSAntonio Huete Jimenez 
2599*ea16f64eSAntonio Huete Jimenez 	/*
2600*ea16f64eSAntonio Huete Jimenez 	 * Make sure the total memory required for that doesn't
2601*ea16f64eSAntonio Huete Jimenez 	 * overflow.
2602*ea16f64eSAntonio Huete Jimenez 	 */
2603*ea16f64eSAntonio Huete Jimenez 	edge_memsize = (size_t)product * sizeof(*opt_state->space);
2604*ea16f64eSAntonio Huete Jimenez 	if (edge_memsize / product != sizeof(*opt_state->space)) {
2605*ea16f64eSAntonio Huete Jimenez 		opt_error(opt_state, "filter is too complex to optimize");
2606*ea16f64eSAntonio Huete Jimenez 	}
2607*ea16f64eSAntonio Huete Jimenez 
2608*ea16f64eSAntonio Huete Jimenez 	/*
2609*ea16f64eSAntonio Huete Jimenez 	 * Make sure the total memory required for both of them dosn't
2610*ea16f64eSAntonio Huete Jimenez 	 * overflow.
2611*ea16f64eSAntonio Huete Jimenez 	 */
2612*ea16f64eSAntonio Huete Jimenez 	if (block_memsize > SIZE_MAX - edge_memsize) {
2613*ea16f64eSAntonio Huete Jimenez 		opt_error(opt_state, "filter is too complex to optimize");
2614*ea16f64eSAntonio Huete Jimenez 	}
26151077d0bdSPeter Avalos 
26161077d0bdSPeter Avalos 	/* XXX */
2617*ea16f64eSAntonio Huete Jimenez 	opt_state->space = (bpf_u_int32 *)malloc(block_memsize + edge_memsize);
26183a289941SAaron LI 	if (opt_state->space == NULL) {
26193a289941SAaron LI 		opt_error(opt_state, "malloc");
26203a289941SAaron LI 	}
262197a9217aSAntonio Huete Jimenez 	p = opt_state->space;
262297a9217aSAntonio Huete Jimenez 	opt_state->all_dom_sets = p;
26231077d0bdSPeter Avalos 	for (i = 0; i < n; ++i) {
262497a9217aSAntonio Huete Jimenez 		opt_state->blocks[i]->dom = p;
262597a9217aSAntonio Huete Jimenez 		p += opt_state->nodewords;
26261077d0bdSPeter Avalos 	}
262797a9217aSAntonio Huete Jimenez 	opt_state->all_closure_sets = p;
26281077d0bdSPeter Avalos 	for (i = 0; i < n; ++i) {
262997a9217aSAntonio Huete Jimenez 		opt_state->blocks[i]->closure = p;
263097a9217aSAntonio Huete Jimenez 		p += opt_state->nodewords;
26311077d0bdSPeter Avalos 	}
263297a9217aSAntonio Huete Jimenez 	opt_state->all_edge_sets = p;
26331077d0bdSPeter Avalos 	for (i = 0; i < n; ++i) {
263497a9217aSAntonio Huete Jimenez 		register struct block *b = opt_state->blocks[i];
26351077d0bdSPeter Avalos 
26361077d0bdSPeter Avalos 		b->et.edom = p;
263797a9217aSAntonio Huete Jimenez 		p += opt_state->edgewords;
26381077d0bdSPeter Avalos 		b->ef.edom = p;
263997a9217aSAntonio Huete Jimenez 		p += opt_state->edgewords;
26401077d0bdSPeter Avalos 		b->et.id = i;
264197a9217aSAntonio Huete Jimenez 		opt_state->edges[i] = &b->et;
264297a9217aSAntonio Huete Jimenez 		b->ef.id = opt_state->n_blocks + i;
264397a9217aSAntonio Huete Jimenez 		opt_state->edges[opt_state->n_blocks + i] = &b->ef;
26441077d0bdSPeter Avalos 		b->et.pred = b;
26451077d0bdSPeter Avalos 		b->ef.pred = b;
26461077d0bdSPeter Avalos 	}
26471077d0bdSPeter Avalos 	max_stmts = 0;
26481077d0bdSPeter Avalos 	for (i = 0; i < n; ++i)
264997a9217aSAntonio Huete Jimenez 		max_stmts += slength(opt_state->blocks[i]->stmts) + 1;
26501077d0bdSPeter Avalos 	/*
26511077d0bdSPeter Avalos 	 * We allocate at most 3 value numbers per statement,
26521077d0bdSPeter Avalos 	 * so this is an upper bound on the number of valnodes
26531077d0bdSPeter Avalos 	 * we'll need.
26541077d0bdSPeter Avalos 	 */
265597a9217aSAntonio Huete Jimenez 	opt_state->maxval = 3 * max_stmts;
265697a9217aSAntonio Huete Jimenez 	opt_state->vmap = (struct vmapinfo *)calloc(opt_state->maxval, sizeof(*opt_state->vmap));
26573a289941SAaron LI 	if (opt_state->vmap == NULL) {
26583a289941SAaron LI 		opt_error(opt_state, "malloc");
26593a289941SAaron LI 	}
266097a9217aSAntonio Huete Jimenez 	opt_state->vnode_base = (struct valnode *)calloc(opt_state->maxval, sizeof(*opt_state->vnode_base));
26613a289941SAaron LI 	if (opt_state->vnode_base == NULL) {
26623a289941SAaron LI 		opt_error(opt_state, "malloc");
26633a289941SAaron LI 	}
26641077d0bdSPeter Avalos }
26651077d0bdSPeter Avalos 
26661077d0bdSPeter Avalos /*
266797a9217aSAntonio Huete Jimenez  * This is only used when supporting optimizer debugging.  It is
266897a9217aSAntonio Huete Jimenez  * global state, so do *not* do more than one compile in parallel
266997a9217aSAntonio Huete Jimenez  * and expect it to provide meaningful information.
26701077d0bdSPeter Avalos  */
26711077d0bdSPeter Avalos #ifdef BDEBUG
26723a289941SAaron LI int bids[NBIDS];
26731077d0bdSPeter Avalos #endif
26741077d0bdSPeter Avalos 
26753a289941SAaron LI static void PCAP_NORETURN conv_error(conv_state_t *, const char *, ...)
26763a289941SAaron LI     PCAP_PRINTFLIKE(2, 3);
26773a289941SAaron LI 
26781077d0bdSPeter Avalos /*
26791077d0bdSPeter Avalos  * Returns true if successful.  Returns false if a branch has
26801077d0bdSPeter Avalos  * an offset that is too large.  If so, we have marked that
26811077d0bdSPeter Avalos  * branch so that on a subsequent iteration, it will be treated
26821077d0bdSPeter Avalos  * properly.
26831077d0bdSPeter Avalos  */
26841077d0bdSPeter Avalos static int
convert_code_r(conv_state_t * conv_state,struct icode * ic,struct block * p)26853a289941SAaron LI convert_code_r(conv_state_t *conv_state, struct icode *ic, struct block *p)
26861077d0bdSPeter Avalos {
26871077d0bdSPeter Avalos 	struct bpf_insn *dst;
26881077d0bdSPeter Avalos 	struct slist *src;
268997a9217aSAntonio Huete Jimenez 	u_int slen;
26901077d0bdSPeter Avalos 	u_int off;
26911077d0bdSPeter Avalos 	struct slist **offset = NULL;
26921077d0bdSPeter Avalos 
269397a9217aSAntonio Huete Jimenez 	if (p == 0 || isMarked(ic, p))
26941077d0bdSPeter Avalos 		return (1);
269597a9217aSAntonio Huete Jimenez 	Mark(ic, p);
26961077d0bdSPeter Avalos 
26973a289941SAaron LI 	if (convert_code_r(conv_state, ic, JF(p)) == 0)
26981077d0bdSPeter Avalos 		return (0);
26993a289941SAaron LI 	if (convert_code_r(conv_state, ic, JT(p)) == 0)
27001077d0bdSPeter Avalos 		return (0);
27011077d0bdSPeter Avalos 
27021077d0bdSPeter Avalos 	slen = slength(p->stmts);
270397a9217aSAntonio Huete Jimenez 	dst = conv_state->ftail -= (slen + 1 + p->longjt + p->longjf);
27041077d0bdSPeter Avalos 		/* inflate length by any extra jumps */
27051077d0bdSPeter Avalos 
270697a9217aSAntonio Huete Jimenez 	p->offset = (int)(dst - conv_state->fstart);
27071077d0bdSPeter Avalos 
27081077d0bdSPeter Avalos 	/* generate offset[] for convenience  */
27091077d0bdSPeter Avalos 	if (slen) {
27101077d0bdSPeter Avalos 		offset = (struct slist **)calloc(slen, sizeof(struct slist *));
27111077d0bdSPeter Avalos 		if (!offset) {
27123a289941SAaron LI 			conv_error(conv_state, "not enough core");
27131077d0bdSPeter Avalos 			/*NOTREACHED*/
27141077d0bdSPeter Avalos 		}
27151077d0bdSPeter Avalos 	}
27161077d0bdSPeter Avalos 	src = p->stmts;
27171077d0bdSPeter Avalos 	for (off = 0; off < slen && src; off++) {
27181077d0bdSPeter Avalos #if 0
27191077d0bdSPeter Avalos 		printf("off=%d src=%x\n", off, src);
27201077d0bdSPeter Avalos #endif
27211077d0bdSPeter Avalos 		offset[off] = src;
27221077d0bdSPeter Avalos 		src = src->next;
27231077d0bdSPeter Avalos 	}
27241077d0bdSPeter Avalos 
27251077d0bdSPeter Avalos 	off = 0;
27261077d0bdSPeter Avalos 	for (src = p->stmts; src; src = src->next) {
27271077d0bdSPeter Avalos 		if (src->s.code == NOP)
27281077d0bdSPeter Avalos 			continue;
27291077d0bdSPeter Avalos 		dst->code = (u_short)src->s.code;
27301077d0bdSPeter Avalos 		dst->k = src->s.k;
27311077d0bdSPeter Avalos 
27321077d0bdSPeter Avalos 		/* fill block-local relative jump */
27331077d0bdSPeter Avalos 		if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) {
27341077d0bdSPeter Avalos #if 0
27351077d0bdSPeter Avalos 			if (src->s.jt || src->s.jf) {
27363a289941SAaron LI 				free(offset);
27373a289941SAaron LI 				conv_error(conv_state, "illegal jmp destination");
27381077d0bdSPeter Avalos 				/*NOTREACHED*/
27391077d0bdSPeter Avalos 			}
27401077d0bdSPeter Avalos #endif
27411077d0bdSPeter Avalos 			goto filled;
27421077d0bdSPeter Avalos 		}
27431077d0bdSPeter Avalos 		if (off == slen - 2)	/*???*/
27441077d0bdSPeter Avalos 			goto filled;
27451077d0bdSPeter Avalos 
27461077d0bdSPeter Avalos 	    {
274797a9217aSAntonio Huete Jimenez 		u_int i;
27481077d0bdSPeter Avalos 		int jt, jf;
27493a289941SAaron LI 		const char ljerr[] = "%s for block-local relative jump: off=%d";
27501077d0bdSPeter Avalos 
27511077d0bdSPeter Avalos #if 0
27521077d0bdSPeter Avalos 		printf("code=%x off=%d %x %x\n", src->s.code,
27531077d0bdSPeter Avalos 			off, src->s.jt, src->s.jf);
27541077d0bdSPeter Avalos #endif
27551077d0bdSPeter Avalos 
27561077d0bdSPeter Avalos 		if (!src->s.jt || !src->s.jf) {
27573a289941SAaron LI 			free(offset);
27583a289941SAaron LI 			conv_error(conv_state, ljerr, "no jmp destination", off);
27591077d0bdSPeter Avalos 			/*NOTREACHED*/
27601077d0bdSPeter Avalos 		}
27611077d0bdSPeter Avalos 
27621077d0bdSPeter Avalos 		jt = jf = 0;
27631077d0bdSPeter Avalos 		for (i = 0; i < slen; i++) {
27641077d0bdSPeter Avalos 			if (offset[i] == src->s.jt) {
27651077d0bdSPeter Avalos 				if (jt) {
27663a289941SAaron LI 					free(offset);
27673a289941SAaron LI 					conv_error(conv_state, ljerr, "multiple matches", off);
27681077d0bdSPeter Avalos 					/*NOTREACHED*/
27691077d0bdSPeter Avalos 				}
27701077d0bdSPeter Avalos 
27713a289941SAaron LI 				if (i - off - 1 >= 256) {
27723a289941SAaron LI 					free(offset);
27733a289941SAaron LI 					conv_error(conv_state, ljerr, "out-of-range jump", off);
27743a289941SAaron LI 					/*NOTREACHED*/
27753a289941SAaron LI 				}
27763a289941SAaron LI 				dst->jt = (u_char)(i - off - 1);
27771077d0bdSPeter Avalos 				jt++;
27781077d0bdSPeter Avalos 			}
27791077d0bdSPeter Avalos 			if (offset[i] == src->s.jf) {
27801077d0bdSPeter Avalos 				if (jf) {
27813a289941SAaron LI 					free(offset);
27823a289941SAaron LI 					conv_error(conv_state, ljerr, "multiple matches", off);
27831077d0bdSPeter Avalos 					/*NOTREACHED*/
27841077d0bdSPeter Avalos 				}
27853a289941SAaron LI 				if (i - off - 1 >= 256) {
27863a289941SAaron LI 					free(offset);
27873a289941SAaron LI 					conv_error(conv_state, ljerr, "out-of-range jump", off);
27883a289941SAaron LI 					/*NOTREACHED*/
27893a289941SAaron LI 				}
27903a289941SAaron LI 				dst->jf = (u_char)(i - off - 1);
27911077d0bdSPeter Avalos 				jf++;
27921077d0bdSPeter Avalos 			}
27931077d0bdSPeter Avalos 		}
27941077d0bdSPeter Avalos 		if (!jt || !jf) {
27953a289941SAaron LI 			free(offset);
27963a289941SAaron LI 			conv_error(conv_state, ljerr, "no destination found", off);
27971077d0bdSPeter Avalos 			/*NOTREACHED*/
27981077d0bdSPeter Avalos 		}
27991077d0bdSPeter Avalos 	    }
28001077d0bdSPeter Avalos filled:
28011077d0bdSPeter Avalos 		++dst;
28021077d0bdSPeter Avalos 		++off;
28031077d0bdSPeter Avalos 	}
28041077d0bdSPeter Avalos 	if (offset)
28051077d0bdSPeter Avalos 		free(offset);
28061077d0bdSPeter Avalos 
28071077d0bdSPeter Avalos #ifdef BDEBUG
28083a289941SAaron LI 	if (dst - conv_state->fstart < NBIDS)
280997a9217aSAntonio Huete Jimenez 		bids[dst - conv_state->fstart] = p->id + 1;
28101077d0bdSPeter Avalos #endif
28111077d0bdSPeter Avalos 	dst->code = (u_short)p->s.code;
28121077d0bdSPeter Avalos 	dst->k = p->s.k;
28131077d0bdSPeter Avalos 	if (JT(p)) {
2814*ea16f64eSAntonio Huete Jimenez 		/* number of extra jumps inserted */
2815*ea16f64eSAntonio Huete Jimenez 		u_char extrajmps = 0;
28161077d0bdSPeter Avalos 		off = JT(p)->offset - (p->offset + slen) - 1;
28171077d0bdSPeter Avalos 		if (off >= 256) {
28181077d0bdSPeter Avalos 		    /* offset too large for branch, must add a jump */
28191077d0bdSPeter Avalos 		    if (p->longjt == 0) {
28201077d0bdSPeter Avalos 			/* mark this instruction and retry */
28211077d0bdSPeter Avalos 			p->longjt++;
28221077d0bdSPeter Avalos 			return(0);
28231077d0bdSPeter Avalos 		    }
2824*ea16f64eSAntonio Huete Jimenez 		    dst->jt = extrajmps;
28251077d0bdSPeter Avalos 		    extrajmps++;
28261077d0bdSPeter Avalos 		    dst[extrajmps].code = BPF_JMP|BPF_JA;
28271077d0bdSPeter Avalos 		    dst[extrajmps].k = off - extrajmps;
28281077d0bdSPeter Avalos 		}
28291077d0bdSPeter Avalos 		else
28303a289941SAaron LI 		    dst->jt = (u_char)off;
28311077d0bdSPeter Avalos 		off = JF(p)->offset - (p->offset + slen) - 1;
28321077d0bdSPeter Avalos 		if (off >= 256) {
28331077d0bdSPeter Avalos 		    /* offset too large for branch, must add a jump */
28341077d0bdSPeter Avalos 		    if (p->longjf == 0) {
28351077d0bdSPeter Avalos 			/* mark this instruction and retry */
28361077d0bdSPeter Avalos 			p->longjf++;
28371077d0bdSPeter Avalos 			return(0);
28381077d0bdSPeter Avalos 		    }
28391077d0bdSPeter Avalos 		    /* branch if F to following jump */
28401077d0bdSPeter Avalos 		    /* if two jumps are inserted, F goes to second one */
2841*ea16f64eSAntonio Huete Jimenez 		    dst->jf = extrajmps;
28421077d0bdSPeter Avalos 		    extrajmps++;
28431077d0bdSPeter Avalos 		    dst[extrajmps].code = BPF_JMP|BPF_JA;
28441077d0bdSPeter Avalos 		    dst[extrajmps].k = off - extrajmps;
28451077d0bdSPeter Avalos 		}
28461077d0bdSPeter Avalos 		else
28473a289941SAaron LI 		    dst->jf = (u_char)off;
28481077d0bdSPeter Avalos 	}
28491077d0bdSPeter Avalos 	return (1);
28501077d0bdSPeter Avalos }
28511077d0bdSPeter Avalos 
28521077d0bdSPeter Avalos 
28531077d0bdSPeter Avalos /*
28541077d0bdSPeter Avalos  * Convert flowgraph intermediate representation to the
28551077d0bdSPeter Avalos  * BPF array representation.  Set *lenp to the number of instructions.
28561077d0bdSPeter Avalos  *
28571077d0bdSPeter Avalos  * This routine does *NOT* leak the memory pointed to by fp.  It *must
28581077d0bdSPeter Avalos  * not* do free(fp) before returning fp; doing so would make no sense,
28591077d0bdSPeter Avalos  * as the BPF array pointed to by the return value of icode_to_fcode()
28601077d0bdSPeter Avalos  * must be valid - it's being returned for use in a bpf_program structure.
28611077d0bdSPeter Avalos  *
28621077d0bdSPeter Avalos  * If it appears that icode_to_fcode() is leaking, the problem is that
28631077d0bdSPeter Avalos  * the program using pcap_compile() is failing to free the memory in
28641077d0bdSPeter Avalos  * the BPF program when it's done - the leak is in the program, not in
28651077d0bdSPeter Avalos  * the routine that happens to be allocating the memory.  (By analogy, if
28661077d0bdSPeter Avalos  * a program calls fopen() without ever calling fclose() on the FILE *,
28671077d0bdSPeter Avalos  * it will leak the FILE structure; the leak is not in fopen(), it's in
28681077d0bdSPeter Avalos  * the program.)  Change the program to use pcap_freecode() when it's
28691077d0bdSPeter Avalos  * done with the filter program.  See the pcap man page.
28701077d0bdSPeter Avalos  */
28711077d0bdSPeter Avalos struct bpf_insn *
icode_to_fcode(struct icode * ic,struct block * root,u_int * lenp,char * errbuf)28723a289941SAaron LI icode_to_fcode(struct icode *ic, struct block *root, u_int *lenp,
28733a289941SAaron LI     char *errbuf)
28741077d0bdSPeter Avalos {
28750e1eae1fSPeter Avalos 	u_int n;
28761077d0bdSPeter Avalos 	struct bpf_insn *fp;
287797a9217aSAntonio Huete Jimenez 	conv_state_t conv_state;
28781077d0bdSPeter Avalos 
28793a289941SAaron LI 	conv_state.fstart = NULL;
28803a289941SAaron LI 	conv_state.errbuf = errbuf;
28813a289941SAaron LI 	if (setjmp(conv_state.top_ctx) != 0) {
28823a289941SAaron LI 		free(conv_state.fstart);
28833a289941SAaron LI 		return NULL;
28843a289941SAaron LI 	}
28853a289941SAaron LI 
28861077d0bdSPeter Avalos 	/*
28871077d0bdSPeter Avalos 	 * Loop doing convert_code_r() until no branches remain
28881077d0bdSPeter Avalos 	 * with too-large offsets.
28891077d0bdSPeter Avalos 	 */
28903a289941SAaron LI 	for (;;) {
289197a9217aSAntonio Huete Jimenez 	    unMarkAll(ic);
289297a9217aSAntonio Huete Jimenez 	    n = *lenp = count_stmts(ic, root);
28931077d0bdSPeter Avalos 
28941077d0bdSPeter Avalos 	    fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
28953a289941SAaron LI 	    if (fp == NULL) {
2896*ea16f64eSAntonio Huete Jimenez 		(void)snprintf(errbuf, PCAP_ERRBUF_SIZE,
28973a289941SAaron LI 		    "malloc");
28983a289941SAaron LI 		free(fp);
28993a289941SAaron LI 		return NULL;
29003a289941SAaron LI 	    }
29011077d0bdSPeter Avalos 	    memset((char *)fp, 0, sizeof(*fp) * n);
290297a9217aSAntonio Huete Jimenez 	    conv_state.fstart = fp;
290397a9217aSAntonio Huete Jimenez 	    conv_state.ftail = fp + n;
29041077d0bdSPeter Avalos 
290597a9217aSAntonio Huete Jimenez 	    unMarkAll(ic);
29063a289941SAaron LI 	    if (convert_code_r(&conv_state, ic, root))
29071077d0bdSPeter Avalos 		break;
29081077d0bdSPeter Avalos 	    free(fp);
29091077d0bdSPeter Avalos 	}
29101077d0bdSPeter Avalos 
29111077d0bdSPeter Avalos 	return fp;
29121077d0bdSPeter Avalos }
29131077d0bdSPeter Avalos 
29141077d0bdSPeter Avalos /*
29153a289941SAaron LI  * For iconv_to_fconv() errors.
29163a289941SAaron LI  */
29173a289941SAaron LI static void PCAP_NORETURN
conv_error(conv_state_t * conv_state,const char * fmt,...)29183a289941SAaron LI conv_error(conv_state_t *conv_state, const char *fmt, ...)
29193a289941SAaron LI {
29203a289941SAaron LI 	va_list ap;
29213a289941SAaron LI 
29223a289941SAaron LI 	va_start(ap, fmt);
2923*ea16f64eSAntonio Huete Jimenez 	(void)vsnprintf(conv_state->errbuf,
29243a289941SAaron LI 	    PCAP_ERRBUF_SIZE, fmt, ap);
29253a289941SAaron LI 	va_end(ap);
29263a289941SAaron LI 	longjmp(conv_state->top_ctx, 1);
29273a289941SAaron LI 	/* NOTREACHED */
29283a289941SAaron LI }
29293a289941SAaron LI 
29303a289941SAaron LI /*
29311077d0bdSPeter Avalos  * Make a copy of a BPF program and put it in the "fcode" member of
29321077d0bdSPeter Avalos  * a "pcap_t".
29331077d0bdSPeter Avalos  *
29341077d0bdSPeter Avalos  * If we fail to allocate memory for the copy, fill in the "errbuf"
29351077d0bdSPeter Avalos  * member of the "pcap_t" with an error message, and return -1;
29361077d0bdSPeter Avalos  * otherwise, return 0.
29371077d0bdSPeter Avalos  */
29381077d0bdSPeter Avalos int
install_bpf_program(pcap_t * p,struct bpf_program * fp)29391077d0bdSPeter Avalos install_bpf_program(pcap_t *p, struct bpf_program *fp)
29401077d0bdSPeter Avalos {
29411077d0bdSPeter Avalos 	size_t prog_size;
29421077d0bdSPeter Avalos 
29431077d0bdSPeter Avalos 	/*
2944de0d3203SPeter Avalos 	 * Validate the program.
2945de0d3203SPeter Avalos 	 */
2946*ea16f64eSAntonio Huete Jimenez 	if (!pcap_validate_filter(fp->bf_insns, fp->bf_len)) {
2947*ea16f64eSAntonio Huete Jimenez 		snprintf(p->errbuf, sizeof(p->errbuf),
2948de0d3203SPeter Avalos 			"BPF program is not valid");
2949de0d3203SPeter Avalos 		return (-1);
2950de0d3203SPeter Avalos 	}
2951de0d3203SPeter Avalos 
2952de0d3203SPeter Avalos 	/*
29531077d0bdSPeter Avalos 	 * Free up any already installed program.
29541077d0bdSPeter Avalos 	 */
29551077d0bdSPeter Avalos 	pcap_freecode(&p->fcode);
29561077d0bdSPeter Avalos 
29571077d0bdSPeter Avalos 	prog_size = sizeof(*fp->bf_insns) * fp->bf_len;
29581077d0bdSPeter Avalos 	p->fcode.bf_len = fp->bf_len;
29591077d0bdSPeter Avalos 	p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size);
29601077d0bdSPeter Avalos 	if (p->fcode.bf_insns == NULL) {
29613a289941SAaron LI 		pcap_fmt_errmsg_for_errno(p->errbuf, sizeof(p->errbuf),
29623a289941SAaron LI 		    errno, "malloc");
29631077d0bdSPeter Avalos 		return (-1);
29641077d0bdSPeter Avalos 	}
29651077d0bdSPeter Avalos 	memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size);
29661077d0bdSPeter Avalos 	return (0);
29671077d0bdSPeter Avalos }
29681077d0bdSPeter Avalos 
29691077d0bdSPeter Avalos #ifdef BDEBUG
29701077d0bdSPeter Avalos static void
dot_dump_node(struct icode * ic,struct block * block,struct bpf_program * prog,FILE * out)297197a9217aSAntonio Huete Jimenez dot_dump_node(struct icode *ic, struct block *block, struct bpf_program *prog,
297297a9217aSAntonio Huete Jimenez     FILE *out)
297397a9217aSAntonio Huete Jimenez {
297497a9217aSAntonio Huete Jimenez 	int icount, noffset;
297597a9217aSAntonio Huete Jimenez 	int i;
297697a9217aSAntonio Huete Jimenez 
297797a9217aSAntonio Huete Jimenez 	if (block == NULL || isMarked(ic, block))
297897a9217aSAntonio Huete Jimenez 		return;
297997a9217aSAntonio Huete Jimenez 	Mark(ic, block);
298097a9217aSAntonio Huete Jimenez 
298197a9217aSAntonio Huete Jimenez 	icount = slength(block->stmts) + 1 + block->longjt + block->longjf;
298297a9217aSAntonio Huete Jimenez 	noffset = min(block->offset + icount, (int)prog->bf_len);
298397a9217aSAntonio Huete Jimenez 
2984*ea16f64eSAntonio Huete Jimenez 	fprintf(out, "\tblock%u [shape=ellipse, id=\"block-%u\" label=\"BLOCK%u\\n", block->id, block->id, block->id);
298597a9217aSAntonio Huete Jimenez 	for (i = block->offset; i < noffset; i++) {
298697a9217aSAntonio Huete Jimenez 		fprintf(out, "\\n%s", bpf_image(prog->bf_insns + i, i));
298797a9217aSAntonio Huete Jimenez 	}
298897a9217aSAntonio Huete Jimenez 	fprintf(out, "\" tooltip=\"");
298997a9217aSAntonio Huete Jimenez 	for (i = 0; i < BPF_MEMWORDS; i++)
29903a289941SAaron LI 		if (block->val[i] != VAL_UNKNOWN)
299197a9217aSAntonio Huete Jimenez 			fprintf(out, "val[%d]=%d ", i, block->val[i]);
299297a9217aSAntonio Huete Jimenez 	fprintf(out, "val[A]=%d ", block->val[A_ATOM]);
299397a9217aSAntonio Huete Jimenez 	fprintf(out, "val[X]=%d", block->val[X_ATOM]);
299497a9217aSAntonio Huete Jimenez 	fprintf(out, "\"");
299597a9217aSAntonio Huete Jimenez 	if (JT(block) == NULL)
299697a9217aSAntonio Huete Jimenez 		fprintf(out, ", peripheries=2");
299797a9217aSAntonio Huete Jimenez 	fprintf(out, "];\n");
299897a9217aSAntonio Huete Jimenez 
299997a9217aSAntonio Huete Jimenez 	dot_dump_node(ic, JT(block), prog, out);
300097a9217aSAntonio Huete Jimenez 	dot_dump_node(ic, JF(block), prog, out);
300197a9217aSAntonio Huete Jimenez }
300297a9217aSAntonio Huete Jimenez 
300397a9217aSAntonio Huete Jimenez static void
dot_dump_edge(struct icode * ic,struct block * block,FILE * out)300497a9217aSAntonio Huete Jimenez dot_dump_edge(struct icode *ic, struct block *block, FILE *out)
300597a9217aSAntonio Huete Jimenez {
300697a9217aSAntonio Huete Jimenez 	if (block == NULL || isMarked(ic, block))
300797a9217aSAntonio Huete Jimenez 		return;
300897a9217aSAntonio Huete Jimenez 	Mark(ic, block);
300997a9217aSAntonio Huete Jimenez 
301097a9217aSAntonio Huete Jimenez 	if (JT(block)) {
3011*ea16f64eSAntonio Huete Jimenez 		fprintf(out, "\t\"block%u\":se -> \"block%u\":n [label=\"T\"]; \n",
301297a9217aSAntonio Huete Jimenez 				block->id, JT(block)->id);
3013*ea16f64eSAntonio Huete Jimenez 		fprintf(out, "\t\"block%u\":sw -> \"block%u\":n [label=\"F\"]; \n",
301497a9217aSAntonio Huete Jimenez 			   block->id, JF(block)->id);
301597a9217aSAntonio Huete Jimenez 	}
301697a9217aSAntonio Huete Jimenez 	dot_dump_edge(ic, JT(block), out);
301797a9217aSAntonio Huete Jimenez 	dot_dump_edge(ic, JF(block), out);
301897a9217aSAntonio Huete Jimenez }
301997a9217aSAntonio Huete Jimenez 
302097a9217aSAntonio Huete Jimenez /* Output the block CFG using graphviz/DOT language
302197a9217aSAntonio Huete Jimenez  * In the CFG, block's code, value index for each registers at EXIT,
302297a9217aSAntonio Huete Jimenez  * and the jump relationship is show.
302397a9217aSAntonio Huete Jimenez  *
302497a9217aSAntonio Huete Jimenez  * example DOT for BPF `ip src host 1.1.1.1' is:
302597a9217aSAntonio Huete Jimenez     digraph BPF {
302697a9217aSAntonio Huete Jimenez     	block0 [shape=ellipse, id="block-0" label="BLOCK0\n\n(000) ldh      [12]\n(001) jeq      #0x800           jt 2	jf 5" tooltip="val[A]=0 val[X]=0"];
302797a9217aSAntonio Huete Jimenez     	block1 [shape=ellipse, id="block-1" label="BLOCK1\n\n(002) ld       [26]\n(003) jeq      #0x1010101       jt 4	jf 5" tooltip="val[A]=0 val[X]=0"];
302897a9217aSAntonio Huete Jimenez     	block2 [shape=ellipse, id="block-2" label="BLOCK2\n\n(004) ret      #68" tooltip="val[A]=0 val[X]=0", peripheries=2];
302997a9217aSAntonio Huete Jimenez     	block3 [shape=ellipse, id="block-3" label="BLOCK3\n\n(005) ret      #0" tooltip="val[A]=0 val[X]=0", peripheries=2];
303097a9217aSAntonio Huete Jimenez     	"block0":se -> "block1":n [label="T"];
303197a9217aSAntonio Huete Jimenez     	"block0":sw -> "block3":n [label="F"];
303297a9217aSAntonio Huete Jimenez     	"block1":se -> "block2":n [label="T"];
303397a9217aSAntonio Huete Jimenez     	"block1":sw -> "block3":n [label="F"];
303497a9217aSAntonio Huete Jimenez     }
303597a9217aSAntonio Huete Jimenez  *
3036*ea16f64eSAntonio Huete Jimenez  *  After install graphviz on https://www.graphviz.org/, save it as bpf.dot
303797a9217aSAntonio Huete Jimenez  *  and run `dot -Tpng -O bpf.dot' to draw the graph.
303897a9217aSAntonio Huete Jimenez  */
30393a289941SAaron LI static int
dot_dump(struct icode * ic,char * errbuf)30403a289941SAaron LI dot_dump(struct icode *ic, char *errbuf)
304197a9217aSAntonio Huete Jimenez {
304297a9217aSAntonio Huete Jimenez 	struct bpf_program f;
304397a9217aSAntonio Huete Jimenez 	FILE *out = stdout;
304497a9217aSAntonio Huete Jimenez 
304597a9217aSAntonio Huete Jimenez 	memset(bids, 0, sizeof bids);
30463a289941SAaron LI 	f.bf_insns = icode_to_fcode(ic, ic->root, &f.bf_len, errbuf);
30473a289941SAaron LI 	if (f.bf_insns == NULL)
30483a289941SAaron LI 		return -1;
304997a9217aSAntonio Huete Jimenez 
305097a9217aSAntonio Huete Jimenez 	fprintf(out, "digraph BPF {\n");
305197a9217aSAntonio Huete Jimenez 	unMarkAll(ic);
305297a9217aSAntonio Huete Jimenez 	dot_dump_node(ic, ic->root, &f, out);
305397a9217aSAntonio Huete Jimenez 	unMarkAll(ic);
305497a9217aSAntonio Huete Jimenez 	dot_dump_edge(ic, ic->root, out);
305597a9217aSAntonio Huete Jimenez 	fprintf(out, "}\n");
305697a9217aSAntonio Huete Jimenez 
305797a9217aSAntonio Huete Jimenez 	free((char *)f.bf_insns);
30583a289941SAaron LI 	return 0;
305997a9217aSAntonio Huete Jimenez }
306097a9217aSAntonio Huete Jimenez 
30613a289941SAaron LI static int
plain_dump(struct icode * ic,char * errbuf)30623a289941SAaron LI plain_dump(struct icode *ic, char *errbuf)
30631077d0bdSPeter Avalos {
30641077d0bdSPeter Avalos 	struct bpf_program f;
30651077d0bdSPeter Avalos 
30661077d0bdSPeter Avalos 	memset(bids, 0, sizeof bids);
30673a289941SAaron LI 	f.bf_insns = icode_to_fcode(ic, ic->root, &f.bf_len, errbuf);
30683a289941SAaron LI 	if (f.bf_insns == NULL)
30693a289941SAaron LI 		return -1;
30701077d0bdSPeter Avalos 	bpf_dump(&f, 1);
30711077d0bdSPeter Avalos 	putchar('\n');
30721077d0bdSPeter Avalos 	free((char *)f.bf_insns);
30733a289941SAaron LI 	return 0;
30741077d0bdSPeter Avalos }
307597a9217aSAntonio Huete Jimenez 
307697a9217aSAntonio Huete Jimenez static void
opt_dump(opt_state_t * opt_state,struct icode * ic)30773a289941SAaron LI opt_dump(opt_state_t *opt_state, struct icode *ic)
307897a9217aSAntonio Huete Jimenez {
30793a289941SAaron LI 	int status;
30803a289941SAaron LI 	char errbuf[PCAP_ERRBUF_SIZE];
30813a289941SAaron LI 
30823a289941SAaron LI 	/*
30833a289941SAaron LI 	 * If the CFG, in DOT format, is requested, output it rather than
30843a289941SAaron LI 	 * the code that would be generated from that graph.
308597a9217aSAntonio Huete Jimenez 	 */
30863a289941SAaron LI 	if (pcap_print_dot_graph)
30873a289941SAaron LI 		status = dot_dump(ic, errbuf);
308897a9217aSAntonio Huete Jimenez 	else
30893a289941SAaron LI 		status = plain_dump(ic, errbuf);
30903a289941SAaron LI 	if (status == -1)
30913a289941SAaron LI 		opt_error(opt_state, "opt_dump: icode_to_fcode failed: %s", errbuf);
309297a9217aSAntonio Huete Jimenez }
30931077d0bdSPeter Avalos #endif
3094