11077d0bdSPeter Avalos /*
21077d0bdSPeter Avalos * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
31077d0bdSPeter Avalos * The Regents of the University of California. All rights reserved.
41077d0bdSPeter Avalos *
51077d0bdSPeter Avalos * Redistribution and use in source and binary forms, with or without
61077d0bdSPeter Avalos * modification, are permitted provided that: (1) source code distributions
71077d0bdSPeter Avalos * retain the above copyright notice and this paragraph in its entirety, (2)
81077d0bdSPeter Avalos * distributions including binary code include the above copyright notice and
91077d0bdSPeter Avalos * this paragraph in its entirety in the documentation or other materials
101077d0bdSPeter Avalos * provided with the distribution, and (3) all advertising materials mentioning
111077d0bdSPeter Avalos * features or use of this software display the following acknowledgement:
121077d0bdSPeter Avalos * ``This product includes software developed by the University of California,
131077d0bdSPeter Avalos * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
141077d0bdSPeter Avalos * the University nor the names of its contributors may be used to endorse
151077d0bdSPeter Avalos * or promote products derived from this software without specific prior
161077d0bdSPeter Avalos * written permission.
171077d0bdSPeter Avalos * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
181077d0bdSPeter Avalos * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
191077d0bdSPeter Avalos * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
201077d0bdSPeter Avalos *
213a289941SAaron LI * Optimization module for BPF code intermediate representation.
221077d0bdSPeter Avalos */
231077d0bdSPeter Avalos
241077d0bdSPeter Avalos #ifdef HAVE_CONFIG_H
253a289941SAaron LI #include <config.h>
261077d0bdSPeter Avalos #endif
271077d0bdSPeter Avalos
283a289941SAaron LI #include <pcap-types.h>
29a85e14b0SPeter Avalos
301077d0bdSPeter Avalos #include <stdio.h>
311077d0bdSPeter Avalos #include <stdlib.h>
321077d0bdSPeter Avalos #include <memory.h>
333a289941SAaron LI #include <setjmp.h>
341077d0bdSPeter Avalos #include <string.h>
351077d0bdSPeter Avalos
361077d0bdSPeter Avalos #include <errno.h>
371077d0bdSPeter Avalos
381077d0bdSPeter Avalos #include "pcap-int.h"
391077d0bdSPeter Avalos
401077d0bdSPeter Avalos #include "gencode.h"
413a289941SAaron LI #include "optimize.h"
421077d0bdSPeter Avalos
431077d0bdSPeter Avalos #ifdef HAVE_OS_PROTO_H
441077d0bdSPeter Avalos #include "os-proto.h"
451077d0bdSPeter Avalos #endif
461077d0bdSPeter Avalos
471077d0bdSPeter Avalos #ifdef BDEBUG
4897a9217aSAntonio Huete Jimenez /*
493a289941SAaron LI * The internal "debug printout" flag for the filter expression optimizer.
503a289941SAaron LI * The code to print that stuff is present only if BDEBUG is defined, so
513a289941SAaron LI * the flag, and the routine to set it, are defined only if BDEBUG is
523a289941SAaron LI * defined.
5397a9217aSAntonio Huete Jimenez */
543a289941SAaron LI static int pcap_optimizer_debug;
553a289941SAaron LI
5697a9217aSAntonio Huete Jimenez /*
573a289941SAaron LI * Routine to set that flag.
5897a9217aSAntonio Huete Jimenez *
593a289941SAaron LI * This is intended for libpcap developers, not for general use.
603a289941SAaron LI * If you want to set these in a program, you'll have to declare this
613a289941SAaron LI * routine yourself, with the appropriate DLL import attribute on Windows;
623a289941SAaron LI * it's not declared in any header file, and won't be declared in any
633a289941SAaron LI * header file provided by libpcap.
643a289941SAaron LI */
653a289941SAaron LI PCAP_API void pcap_set_optimizer_debug(int value);
663a289941SAaron LI
673a289941SAaron LI PCAP_API_DEF void
pcap_set_optimizer_debug(int value)683a289941SAaron LI pcap_set_optimizer_debug(int value)
693a289941SAaron LI {
703a289941SAaron LI pcap_optimizer_debug = value;
713a289941SAaron LI }
723a289941SAaron LI
733a289941SAaron LI /*
743a289941SAaron LI * The internal "print dot graph" flag for the filter expression optimizer.
753a289941SAaron LI * The code to print that stuff is present only if BDEBUG is defined, so
763a289941SAaron LI * the flag, and the routine to set it, are defined only if BDEBUG is
773a289941SAaron LI * defined.
783a289941SAaron LI */
793a289941SAaron LI static int pcap_print_dot_graph;
803a289941SAaron LI
813a289941SAaron LI /*
823a289941SAaron LI * Routine to set that flag.
833a289941SAaron LI *
843a289941SAaron LI * This is intended for libpcap developers, not for general use.
853a289941SAaron LI * If you want to set these in a program, you'll have to declare this
863a289941SAaron LI * routine yourself, with the appropriate DLL import attribute on Windows;
873a289941SAaron LI * it's not declared in any header file, and won't be declared in any
883a289941SAaron LI * header file provided by libpcap.
893a289941SAaron LI */
903a289941SAaron LI PCAP_API void pcap_set_print_dot_graph(int value);
913a289941SAaron LI
923a289941SAaron LI PCAP_API_DEF void
pcap_set_print_dot_graph(int value)933a289941SAaron LI pcap_set_print_dot_graph(int value)
943a289941SAaron LI {
953a289941SAaron LI pcap_print_dot_graph = value;
963a289941SAaron LI }
973a289941SAaron LI
983a289941SAaron LI #endif
993a289941SAaron LI
1003a289941SAaron LI /*
1013a289941SAaron LI * lowest_set_bit().
1023a289941SAaron LI *
1033a289941SAaron LI * Takes a 32-bit integer as an argument.
1043a289941SAaron LI *
1053a289941SAaron LI * If handed a non-zero value, returns the index of the lowest set bit,
106*ea16f64eSAntonio Huete Jimenez * counting upwards from zero.
1073a289941SAaron LI *
1083a289941SAaron LI * If handed zero, the results are platform- and compiler-dependent.
1093a289941SAaron LI * Keep it out of the light, don't give it any water, don't feed it
1103a289941SAaron LI * after midnight, and don't pass zero to it.
1113a289941SAaron LI *
1123a289941SAaron LI * This is the same as the count of trailing zeroes in the word.
1133a289941SAaron LI */
1143a289941SAaron LI #if PCAP_IS_AT_LEAST_GNUC_VERSION(3,4)
1153a289941SAaron LI /*
1163a289941SAaron LI * GCC 3.4 and later; we have __builtin_ctz().
1173a289941SAaron LI */
118*ea16f64eSAntonio Huete Jimenez #define lowest_set_bit(mask) ((u_int)__builtin_ctz(mask))
1193a289941SAaron LI #elif defined(_MSC_VER)
1203a289941SAaron LI /*
1213a289941SAaron LI * Visual Studio; we support only 2005 and later, so use
1223a289941SAaron LI * _BitScanForward().
1233a289941SAaron LI */
1243a289941SAaron LI #include <intrin.h>
1253a289941SAaron LI
1263a289941SAaron LI #ifndef __clang__
1273a289941SAaron LI #pragma intrinsic(_BitScanForward)
1283a289941SAaron LI #endif
1293a289941SAaron LI
130*ea16f64eSAntonio Huete Jimenez static __forceinline u_int
lowest_set_bit(int mask)1313a289941SAaron LI lowest_set_bit(int mask)
1323a289941SAaron LI {
1333a289941SAaron LI unsigned long bit;
1343a289941SAaron LI
1353a289941SAaron LI /*
1363a289941SAaron LI * Don't sign-extend mask if long is longer than int.
1373a289941SAaron LI * (It's currently not, in MSVC, even on 64-bit platforms, but....)
1383a289941SAaron LI */
1393a289941SAaron LI if (_BitScanForward(&bit, (unsigned int)mask) == 0)
140*ea16f64eSAntonio Huete Jimenez abort(); /* mask is zero */
141*ea16f64eSAntonio Huete Jimenez return (u_int)bit;
1423a289941SAaron LI }
1433a289941SAaron LI #elif defined(MSDOS) && defined(__DJGPP__)
1443a289941SAaron LI /*
1453a289941SAaron LI * MS-DOS with DJGPP, which declares ffs() in <string.h>, which
1463a289941SAaron LI * we've already included.
1473a289941SAaron LI */
148*ea16f64eSAntonio Huete Jimenez #define lowest_set_bit(mask) ((u_int)(ffs((mask)) - 1))
1493a289941SAaron LI #elif (defined(MSDOS) && defined(__WATCOMC__)) || defined(STRINGS_H_DECLARES_FFS)
1503a289941SAaron LI /*
1513a289941SAaron LI * MS-DOS with Watcom C, which has <strings.h> and declares ffs() there,
1523a289941SAaron LI * or some other platform (UN*X conforming to a sufficient recent version
1533a289941SAaron LI * of the Single UNIX Specification).
1543a289941SAaron LI */
1553a289941SAaron LI #include <strings.h>
156*ea16f64eSAntonio Huete Jimenez #define lowest_set_bit(mask) (u_int)((ffs((mask)) - 1))
1573a289941SAaron LI #else
1583a289941SAaron LI /*
1593a289941SAaron LI * None of the above.
1603a289941SAaron LI * Use a perfect-hash-function-based function.
16197a9217aSAntonio Huete Jimenez */
162*ea16f64eSAntonio Huete Jimenez static u_int
lowest_set_bit(int mask)1633a289941SAaron LI lowest_set_bit(int mask)
16497a9217aSAntonio Huete Jimenez {
1653a289941SAaron LI unsigned int v = (unsigned int)mask;
16697a9217aSAntonio Huete Jimenez
167*ea16f64eSAntonio Huete Jimenez static const u_int MultiplyDeBruijnBitPosition[32] = {
1683a289941SAaron LI 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
1693a289941SAaron LI 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
1703a289941SAaron LI };
1713a289941SAaron LI
1723a289941SAaron LI /*
1733a289941SAaron LI * We strip off all but the lowermost set bit (v & ~v),
1743a289941SAaron LI * and perform a minimal perfect hash on it to look up the
1753a289941SAaron LI * number of low-order zero bits in a table.
1763a289941SAaron LI *
1773a289941SAaron LI * See:
1783a289941SAaron LI *
1793a289941SAaron LI * http://7ooo.mooo.com/text/ComputingTrailingZerosHOWTO.pdf
1803a289941SAaron LI *
1813a289941SAaron LI * http://supertech.csail.mit.edu/papers/debruijn.pdf
1823a289941SAaron LI */
1833a289941SAaron LI return (MultiplyDeBruijnBitPosition[((v & -v) * 0x077CB531U) >> 27]);
18497a9217aSAntonio Huete Jimenez }
185de0d3203SPeter Avalos #endif
186de0d3203SPeter Avalos
1871077d0bdSPeter Avalos /*
1881077d0bdSPeter Avalos * Represents a deleted instruction.
1891077d0bdSPeter Avalos */
1901077d0bdSPeter Avalos #define NOP -1
1911077d0bdSPeter Avalos
1921077d0bdSPeter Avalos /*
1931077d0bdSPeter Avalos * Register numbers for use-def values.
1941077d0bdSPeter Avalos * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory
1951077d0bdSPeter Avalos * location. A_ATOM is the accumulator and X_ATOM is the index
1961077d0bdSPeter Avalos * register.
1971077d0bdSPeter Avalos */
1981077d0bdSPeter Avalos #define A_ATOM BPF_MEMWORDS
1991077d0bdSPeter Avalos #define X_ATOM (BPF_MEMWORDS+1)
2001077d0bdSPeter Avalos
2011077d0bdSPeter Avalos /*
2021077d0bdSPeter Avalos * This define is used to represent *both* the accumulator and
2031077d0bdSPeter Avalos * x register in use-def computations.
2041077d0bdSPeter Avalos * Currently, the use-def code assumes only one definition per instruction.
2051077d0bdSPeter Avalos */
2061077d0bdSPeter Avalos #define AX_ATOM N_ATOMS
2071077d0bdSPeter Avalos
2081077d0bdSPeter Avalos /*
20997a9217aSAntonio Huete Jimenez * These data structures are used in a Cocke and Shwarz style
21097a9217aSAntonio Huete Jimenez * value numbering scheme. Since the flowgraph is acyclic,
21197a9217aSAntonio Huete Jimenez * exit values can be propagated from a node's predecessors
21297a9217aSAntonio Huete Jimenez * provided it is uniquely defined.
21397a9217aSAntonio Huete Jimenez */
21497a9217aSAntonio Huete Jimenez struct valnode {
21597a9217aSAntonio Huete Jimenez int code;
216*ea16f64eSAntonio Huete Jimenez bpf_u_int32 v0, v1;
217*ea16f64eSAntonio Huete Jimenez int val; /* the value number */
21897a9217aSAntonio Huete Jimenez struct valnode *next;
21997a9217aSAntonio Huete Jimenez };
22097a9217aSAntonio Huete Jimenez
22197a9217aSAntonio Huete Jimenez /* Integer constants mapped with the load immediate opcode. */
222*ea16f64eSAntonio Huete Jimenez #define K(i) F(opt_state, BPF_LD|BPF_IMM|BPF_W, i, 0U)
22397a9217aSAntonio Huete Jimenez
22497a9217aSAntonio Huete Jimenez struct vmapinfo {
22597a9217aSAntonio Huete Jimenez int is_const;
226*ea16f64eSAntonio Huete Jimenez bpf_u_int32 const_val;
22797a9217aSAntonio Huete Jimenez };
22897a9217aSAntonio Huete Jimenez
2293a289941SAaron LI typedef struct {
2303a289941SAaron LI /*
2313a289941SAaron LI * Place to longjmp to on an error.
2323a289941SAaron LI */
2333a289941SAaron LI jmp_buf top_ctx;
2343a289941SAaron LI
2353a289941SAaron LI /*
2363a289941SAaron LI * The buffer into which to put error message.
2373a289941SAaron LI */
2383a289941SAaron LI char *errbuf;
2393a289941SAaron LI
24097a9217aSAntonio Huete Jimenez /*
2411077d0bdSPeter Avalos * A flag to indicate that further optimization is needed.
2421077d0bdSPeter Avalos * Iterative passes are continued until a given pass yields no
243*ea16f64eSAntonio Huete Jimenez * code simplification or branch movement.
2441077d0bdSPeter Avalos */
24597a9217aSAntonio Huete Jimenez int done;
2461077d0bdSPeter Avalos
247*ea16f64eSAntonio Huete Jimenez /*
248*ea16f64eSAntonio Huete Jimenez * XXX - detect loops that do nothing but repeated AND/OR pullups
249*ea16f64eSAntonio Huete Jimenez * and edge moves.
250*ea16f64eSAntonio Huete Jimenez * If 100 passes in a row do nothing but that, treat that as a
251*ea16f64eSAntonio Huete Jimenez * sign that we're in a loop that just shuffles in a cycle in
252*ea16f64eSAntonio Huete Jimenez * which each pass just shuffles the code and we eventually
253*ea16f64eSAntonio Huete Jimenez * get back to the original configuration.
254*ea16f64eSAntonio Huete Jimenez *
255*ea16f64eSAntonio Huete Jimenez * XXX - we need a non-heuristic way of detecting, or preventing,
256*ea16f64eSAntonio Huete Jimenez * such a cycle.
257*ea16f64eSAntonio Huete Jimenez */
258*ea16f64eSAntonio Huete Jimenez int non_branch_movement_performed;
259*ea16f64eSAntonio Huete Jimenez
260*ea16f64eSAntonio Huete Jimenez u_int n_blocks; /* number of blocks in the CFG; guaranteed to be > 0, as it's a RET instruction at a minimum */
2611077d0bdSPeter Avalos struct block **blocks;
262*ea16f64eSAntonio Huete Jimenez u_int n_edges; /* twice n_blocks, so guaranteed to be > 0 */
2631077d0bdSPeter Avalos struct edge **edges;
2641077d0bdSPeter Avalos
2651077d0bdSPeter Avalos /*
2661077d0bdSPeter Avalos * A bit vector set representation of the dominators.
2671077d0bdSPeter Avalos * We round up the set size to the next power of two.
2681077d0bdSPeter Avalos */
269*ea16f64eSAntonio Huete Jimenez u_int nodewords; /* number of 32-bit words for a bit vector of "number of nodes" bits; guaranteed to be > 0 */
270*ea16f64eSAntonio Huete Jimenez u_int edgewords; /* number of 32-bit words for a bit vector of "number of edges" bits; guaranteed to be > 0 */
2711077d0bdSPeter Avalos struct block **levels;
2721077d0bdSPeter Avalos bpf_u_int32 *space;
27397a9217aSAntonio Huete Jimenez
2741077d0bdSPeter Avalos #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
2751077d0bdSPeter Avalos /*
2761077d0bdSPeter Avalos * True if a is in uset {p}
2771077d0bdSPeter Avalos */
2781077d0bdSPeter Avalos #define SET_MEMBER(p, a) \
2793a289941SAaron LI ((p)[(unsigned)(a) / BITS_PER_WORD] & ((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD)))
2801077d0bdSPeter Avalos
2811077d0bdSPeter Avalos /*
2821077d0bdSPeter Avalos * Add 'a' to uset p.
2831077d0bdSPeter Avalos */
2841077d0bdSPeter Avalos #define SET_INSERT(p, a) \
2853a289941SAaron LI (p)[(unsigned)(a) / BITS_PER_WORD] |= ((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD))
2861077d0bdSPeter Avalos
2871077d0bdSPeter Avalos /*
2881077d0bdSPeter Avalos * Delete 'a' from uset p.
2891077d0bdSPeter Avalos */
2901077d0bdSPeter Avalos #define SET_DELETE(p, a) \
2913a289941SAaron LI (p)[(unsigned)(a) / BITS_PER_WORD] &= ~((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD))
2921077d0bdSPeter Avalos
2931077d0bdSPeter Avalos /*
2941077d0bdSPeter Avalos * a := a intersect b
295*ea16f64eSAntonio Huete Jimenez * n must be guaranteed to be > 0
2961077d0bdSPeter Avalos */
2971077d0bdSPeter Avalos #define SET_INTERSECT(a, b, n)\
2981077d0bdSPeter Avalos {\
2991077d0bdSPeter Avalos register bpf_u_int32 *_x = a, *_y = b;\
300*ea16f64eSAntonio Huete Jimenez register u_int _n = n;\
301*ea16f64eSAntonio Huete Jimenez do *_x++ &= *_y++; while (--_n != 0);\
3021077d0bdSPeter Avalos }
3031077d0bdSPeter Avalos
3041077d0bdSPeter Avalos /*
3051077d0bdSPeter Avalos * a := a - b
306*ea16f64eSAntonio Huete Jimenez * n must be guaranteed to be > 0
3071077d0bdSPeter Avalos */
3081077d0bdSPeter Avalos #define SET_SUBTRACT(a, b, n)\
3091077d0bdSPeter Avalos {\
3101077d0bdSPeter Avalos register bpf_u_int32 *_x = a, *_y = b;\
311*ea16f64eSAntonio Huete Jimenez register u_int _n = n;\
312*ea16f64eSAntonio Huete Jimenez do *_x++ &=~ *_y++; while (--_n != 0);\
3131077d0bdSPeter Avalos }
3141077d0bdSPeter Avalos
3151077d0bdSPeter Avalos /*
3161077d0bdSPeter Avalos * a := a union b
317*ea16f64eSAntonio Huete Jimenez * n must be guaranteed to be > 0
3181077d0bdSPeter Avalos */
3191077d0bdSPeter Avalos #define SET_UNION(a, b, n)\
3201077d0bdSPeter Avalos {\
3211077d0bdSPeter Avalos register bpf_u_int32 *_x = a, *_y = b;\
322*ea16f64eSAntonio Huete Jimenez register u_int _n = n;\
323*ea16f64eSAntonio Huete Jimenez do *_x++ |= *_y++; while (--_n != 0);\
3241077d0bdSPeter Avalos }
3251077d0bdSPeter Avalos
32697a9217aSAntonio Huete Jimenez uset all_dom_sets;
32797a9217aSAntonio Huete Jimenez uset all_closure_sets;
32897a9217aSAntonio Huete Jimenez uset all_edge_sets;
32997a9217aSAntonio Huete Jimenez
33097a9217aSAntonio Huete Jimenez #define MODULUS 213
33197a9217aSAntonio Huete Jimenez struct valnode *hashtbl[MODULUS];
332*ea16f64eSAntonio Huete Jimenez bpf_u_int32 curval;
333*ea16f64eSAntonio Huete Jimenez bpf_u_int32 maxval;
33497a9217aSAntonio Huete Jimenez
33597a9217aSAntonio Huete Jimenez struct vmapinfo *vmap;
33697a9217aSAntonio Huete Jimenez struct valnode *vnode_base;
33797a9217aSAntonio Huete Jimenez struct valnode *next_vnode;
3383a289941SAaron LI } opt_state_t;
33997a9217aSAntonio Huete Jimenez
34097a9217aSAntonio Huete Jimenez typedef struct {
34197a9217aSAntonio Huete Jimenez /*
3423a289941SAaron LI * Place to longjmp to on an error.
3433a289941SAaron LI */
3443a289941SAaron LI jmp_buf top_ctx;
3453a289941SAaron LI
3463a289941SAaron LI /*
3473a289941SAaron LI * The buffer into which to put error message.
3483a289941SAaron LI */
3493a289941SAaron LI char *errbuf;
3503a289941SAaron LI
3513a289941SAaron LI /*
35297a9217aSAntonio Huete Jimenez * Some pointers used to convert the basic block form of the code,
35397a9217aSAntonio Huete Jimenez * into the array form that BPF requires. 'fstart' will point to
35497a9217aSAntonio Huete Jimenez * the malloc'd array while 'ftail' is used during the recursive
35597a9217aSAntonio Huete Jimenez * traversal.
35697a9217aSAntonio Huete Jimenez */
35797a9217aSAntonio Huete Jimenez struct bpf_insn *fstart;
35897a9217aSAntonio Huete Jimenez struct bpf_insn *ftail;
35997a9217aSAntonio Huete Jimenez } conv_state_t;
36097a9217aSAntonio Huete Jimenez
3613a289941SAaron LI static void opt_init(opt_state_t *, struct icode *);
36297a9217aSAntonio Huete Jimenez static void opt_cleanup(opt_state_t *);
3633a289941SAaron LI static void PCAP_NORETURN opt_error(opt_state_t *, const char *, ...)
3643a289941SAaron LI PCAP_PRINTFLIKE(2, 3);
36597a9217aSAntonio Huete Jimenez
36697a9217aSAntonio Huete Jimenez static void intern_blocks(opt_state_t *, struct icode *);
36797a9217aSAntonio Huete Jimenez
36897a9217aSAntonio Huete Jimenez static void find_inedges(opt_state_t *, struct block *);
36997a9217aSAntonio Huete Jimenez #ifdef BDEBUG
3703a289941SAaron LI static void opt_dump(opt_state_t *, struct icode *);
37197a9217aSAntonio Huete Jimenez #endif
3721077d0bdSPeter Avalos
3731077d0bdSPeter Avalos #ifndef MAX
3741077d0bdSPeter Avalos #define MAX(a,b) ((a)>(b)?(a):(b))
3751077d0bdSPeter Avalos #endif
3761077d0bdSPeter Avalos
3771077d0bdSPeter Avalos static void
find_levels_r(opt_state_t * opt_state,struct icode * ic,struct block * b)37897a9217aSAntonio Huete Jimenez find_levels_r(opt_state_t *opt_state, struct icode *ic, struct block *b)
3791077d0bdSPeter Avalos {
3801077d0bdSPeter Avalos int level;
3811077d0bdSPeter Avalos
38297a9217aSAntonio Huete Jimenez if (isMarked(ic, b))
3831077d0bdSPeter Avalos return;
3841077d0bdSPeter Avalos
38597a9217aSAntonio Huete Jimenez Mark(ic, b);
3861077d0bdSPeter Avalos b->link = 0;
3871077d0bdSPeter Avalos
3881077d0bdSPeter Avalos if (JT(b)) {
38997a9217aSAntonio Huete Jimenez find_levels_r(opt_state, ic, JT(b));
39097a9217aSAntonio Huete Jimenez find_levels_r(opt_state, ic, JF(b));
3911077d0bdSPeter Avalos level = MAX(JT(b)->level, JF(b)->level) + 1;
3921077d0bdSPeter Avalos } else
3931077d0bdSPeter Avalos level = 0;
3941077d0bdSPeter Avalos b->level = level;
39597a9217aSAntonio Huete Jimenez b->link = opt_state->levels[level];
39697a9217aSAntonio Huete Jimenez opt_state->levels[level] = b;
3971077d0bdSPeter Avalos }
3981077d0bdSPeter Avalos
3991077d0bdSPeter Avalos /*
4001077d0bdSPeter Avalos * Level graph. The levels go from 0 at the leaves to
40197a9217aSAntonio Huete Jimenez * N_LEVELS at the root. The opt_state->levels[] array points to the
4021077d0bdSPeter Avalos * first node of the level list, whose elements are linked
4031077d0bdSPeter Avalos * with the 'link' field of the struct block.
4041077d0bdSPeter Avalos */
4051077d0bdSPeter Avalos static void
find_levels(opt_state_t * opt_state,struct icode * ic)40697a9217aSAntonio Huete Jimenez find_levels(opt_state_t *opt_state, struct icode *ic)
4071077d0bdSPeter Avalos {
40897a9217aSAntonio Huete Jimenez memset((char *)opt_state->levels, 0, opt_state->n_blocks * sizeof(*opt_state->levels));
40997a9217aSAntonio Huete Jimenez unMarkAll(ic);
41097a9217aSAntonio Huete Jimenez find_levels_r(opt_state, ic, ic->root);
4111077d0bdSPeter Avalos }
4121077d0bdSPeter Avalos
4131077d0bdSPeter Avalos /*
4141077d0bdSPeter Avalos * Find dominator relationships.
4151077d0bdSPeter Avalos * Assumes graph has been leveled.
4161077d0bdSPeter Avalos */
4171077d0bdSPeter Avalos static void
find_dom(opt_state_t * opt_state,struct block * root)41897a9217aSAntonio Huete Jimenez find_dom(opt_state_t *opt_state, struct block *root)
4191077d0bdSPeter Avalos {
420*ea16f64eSAntonio Huete Jimenez u_int i;
421*ea16f64eSAntonio Huete Jimenez int level;
4221077d0bdSPeter Avalos struct block *b;
4231077d0bdSPeter Avalos bpf_u_int32 *x;
4241077d0bdSPeter Avalos
4251077d0bdSPeter Avalos /*
4261077d0bdSPeter Avalos * Initialize sets to contain all nodes.
4271077d0bdSPeter Avalos */
42897a9217aSAntonio Huete Jimenez x = opt_state->all_dom_sets;
429*ea16f64eSAntonio Huete Jimenez /*
430*ea16f64eSAntonio Huete Jimenez * In opt_init(), we've made sure the product doesn't overflow.
431*ea16f64eSAntonio Huete Jimenez */
43297a9217aSAntonio Huete Jimenez i = opt_state->n_blocks * opt_state->nodewords;
433*ea16f64eSAntonio Huete Jimenez while (i != 0) {
434*ea16f64eSAntonio Huete Jimenez --i;
4353a289941SAaron LI *x++ = 0xFFFFFFFFU;
436*ea16f64eSAntonio Huete Jimenez }
4371077d0bdSPeter Avalos /* Root starts off empty. */
438*ea16f64eSAntonio Huete Jimenez for (i = opt_state->nodewords; i != 0;) {
439*ea16f64eSAntonio Huete Jimenez --i;
4401077d0bdSPeter Avalos root->dom[i] = 0;
441*ea16f64eSAntonio Huete Jimenez }
4421077d0bdSPeter Avalos
4431077d0bdSPeter Avalos /* root->level is the highest level no found. */
444*ea16f64eSAntonio Huete Jimenez for (level = root->level; level >= 0; --level) {
445*ea16f64eSAntonio Huete Jimenez for (b = opt_state->levels[level]; b; b = b->link) {
4461077d0bdSPeter Avalos SET_INSERT(b->dom, b->id);
4471077d0bdSPeter Avalos if (JT(b) == 0)
4481077d0bdSPeter Avalos continue;
44997a9217aSAntonio Huete Jimenez SET_INTERSECT(JT(b)->dom, b->dom, opt_state->nodewords);
45097a9217aSAntonio Huete Jimenez SET_INTERSECT(JF(b)->dom, b->dom, opt_state->nodewords);
4511077d0bdSPeter Avalos }
4521077d0bdSPeter Avalos }
4531077d0bdSPeter Avalos }
4541077d0bdSPeter Avalos
4551077d0bdSPeter Avalos static void
propedom(opt_state_t * opt_state,struct edge * ep)45697a9217aSAntonio Huete Jimenez propedom(opt_state_t *opt_state, struct edge *ep)
4571077d0bdSPeter Avalos {
4581077d0bdSPeter Avalos SET_INSERT(ep->edom, ep->id);
4591077d0bdSPeter Avalos if (ep->succ) {
46097a9217aSAntonio Huete Jimenez SET_INTERSECT(ep->succ->et.edom, ep->edom, opt_state->edgewords);
46197a9217aSAntonio Huete Jimenez SET_INTERSECT(ep->succ->ef.edom, ep->edom, opt_state->edgewords);
4621077d0bdSPeter Avalos }
4631077d0bdSPeter Avalos }
4641077d0bdSPeter Avalos
4651077d0bdSPeter Avalos /*
4661077d0bdSPeter Avalos * Compute edge dominators.
4671077d0bdSPeter Avalos * Assumes graph has been leveled and predecessors established.
4681077d0bdSPeter Avalos */
4691077d0bdSPeter Avalos static void
find_edom(opt_state_t * opt_state,struct block * root)47097a9217aSAntonio Huete Jimenez find_edom(opt_state_t *opt_state, struct block *root)
4711077d0bdSPeter Avalos {
472*ea16f64eSAntonio Huete Jimenez u_int i;
4731077d0bdSPeter Avalos uset x;
474*ea16f64eSAntonio Huete Jimenez int level;
4751077d0bdSPeter Avalos struct block *b;
4761077d0bdSPeter Avalos
47797a9217aSAntonio Huete Jimenez x = opt_state->all_edge_sets;
478*ea16f64eSAntonio Huete Jimenez /*
479*ea16f64eSAntonio Huete Jimenez * In opt_init(), we've made sure the product doesn't overflow.
480*ea16f64eSAntonio Huete Jimenez */
481*ea16f64eSAntonio Huete Jimenez for (i = opt_state->n_edges * opt_state->edgewords; i != 0; ) {
482*ea16f64eSAntonio Huete Jimenez --i;
4833a289941SAaron LI x[i] = 0xFFFFFFFFU;
484*ea16f64eSAntonio Huete Jimenez }
4851077d0bdSPeter Avalos
4861077d0bdSPeter Avalos /* root->level is the highest level no found. */
48797a9217aSAntonio Huete Jimenez memset(root->et.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
48897a9217aSAntonio Huete Jimenez memset(root->ef.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
489*ea16f64eSAntonio Huete Jimenez for (level = root->level; level >= 0; --level) {
490*ea16f64eSAntonio Huete Jimenez for (b = opt_state->levels[level]; b != 0; b = b->link) {
49197a9217aSAntonio Huete Jimenez propedom(opt_state, &b->et);
49297a9217aSAntonio Huete Jimenez propedom(opt_state, &b->ef);
4931077d0bdSPeter Avalos }
4941077d0bdSPeter Avalos }
4951077d0bdSPeter Avalos }
4961077d0bdSPeter Avalos
4971077d0bdSPeter Avalos /*
4981077d0bdSPeter Avalos * Find the backwards transitive closure of the flow graph. These sets
4991077d0bdSPeter Avalos * are backwards in the sense that we find the set of nodes that reach
5001077d0bdSPeter Avalos * a given node, not the set of nodes that can be reached by a node.
5011077d0bdSPeter Avalos *
5021077d0bdSPeter Avalos * Assumes graph has been leveled.
5031077d0bdSPeter Avalos */
5041077d0bdSPeter Avalos static void
find_closure(opt_state_t * opt_state,struct block * root)50597a9217aSAntonio Huete Jimenez find_closure(opt_state_t *opt_state, struct block *root)
5061077d0bdSPeter Avalos {
507*ea16f64eSAntonio Huete Jimenez int level;
5081077d0bdSPeter Avalos struct block *b;
5091077d0bdSPeter Avalos
5101077d0bdSPeter Avalos /*
5111077d0bdSPeter Avalos * Initialize sets to contain no nodes.
5121077d0bdSPeter Avalos */
51397a9217aSAntonio Huete Jimenez memset((char *)opt_state->all_closure_sets, 0,
51497a9217aSAntonio Huete Jimenez opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->all_closure_sets));
5151077d0bdSPeter Avalos
5161077d0bdSPeter Avalos /* root->level is the highest level no found. */
517*ea16f64eSAntonio Huete Jimenez for (level = root->level; level >= 0; --level) {
518*ea16f64eSAntonio Huete Jimenez for (b = opt_state->levels[level]; b; b = b->link) {
5191077d0bdSPeter Avalos SET_INSERT(b->closure, b->id);
5201077d0bdSPeter Avalos if (JT(b) == 0)
5211077d0bdSPeter Avalos continue;
52297a9217aSAntonio Huete Jimenez SET_UNION(JT(b)->closure, b->closure, opt_state->nodewords);
52397a9217aSAntonio Huete Jimenez SET_UNION(JF(b)->closure, b->closure, opt_state->nodewords);
5241077d0bdSPeter Avalos }
5251077d0bdSPeter Avalos }
5261077d0bdSPeter Avalos }
5271077d0bdSPeter Avalos
5281077d0bdSPeter Avalos /*
529*ea16f64eSAntonio Huete Jimenez * Return the register number that is used by s.
530*ea16f64eSAntonio Huete Jimenez *
531*ea16f64eSAntonio Huete Jimenez * Returns ATOM_A if A is used, ATOM_X if X is used, AX_ATOM if both A and X
532*ea16f64eSAntonio Huete Jimenez * are used, the scratch memory location's number if a scratch memory
533*ea16f64eSAntonio Huete Jimenez * location is used (e.g., 0 for M[0]), or -1 if none of those are used.
5341077d0bdSPeter Avalos *
5351077d0bdSPeter Avalos * The implementation should probably change to an array access.
5361077d0bdSPeter Avalos */
5371077d0bdSPeter Avalos static int
atomuse(struct stmt * s)5380e381983SMatthew Dillon atomuse(struct stmt *s)
5391077d0bdSPeter Avalos {
5401077d0bdSPeter Avalos register int c = s->code;
5411077d0bdSPeter Avalos
5421077d0bdSPeter Avalos if (c == NOP)
5431077d0bdSPeter Avalos return -1;
5441077d0bdSPeter Avalos
5451077d0bdSPeter Avalos switch (BPF_CLASS(c)) {
5461077d0bdSPeter Avalos
5471077d0bdSPeter Avalos case BPF_RET:
5481077d0bdSPeter Avalos return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
5491077d0bdSPeter Avalos (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
5501077d0bdSPeter Avalos
5511077d0bdSPeter Avalos case BPF_LD:
5521077d0bdSPeter Avalos case BPF_LDX:
553*ea16f64eSAntonio Huete Jimenez /*
554*ea16f64eSAntonio Huete Jimenez * As there are fewer than 2^31 memory locations,
555*ea16f64eSAntonio Huete Jimenez * s->k should be convertible to int without problems.
556*ea16f64eSAntonio Huete Jimenez */
5571077d0bdSPeter Avalos return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
558*ea16f64eSAntonio Huete Jimenez (BPF_MODE(c) == BPF_MEM) ? (int)s->k : -1;
5591077d0bdSPeter Avalos
5601077d0bdSPeter Avalos case BPF_ST:
5611077d0bdSPeter Avalos return A_ATOM;
5621077d0bdSPeter Avalos
5631077d0bdSPeter Avalos case BPF_STX:
5641077d0bdSPeter Avalos return X_ATOM;
5651077d0bdSPeter Avalos
5661077d0bdSPeter Avalos case BPF_JMP:
5671077d0bdSPeter Avalos case BPF_ALU:
5681077d0bdSPeter Avalos if (BPF_SRC(c) == BPF_X)
5691077d0bdSPeter Avalos return AX_ATOM;
5701077d0bdSPeter Avalos return A_ATOM;
5711077d0bdSPeter Avalos
5721077d0bdSPeter Avalos case BPF_MISC:
5731077d0bdSPeter Avalos return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
5741077d0bdSPeter Avalos }
5751077d0bdSPeter Avalos abort();
5761077d0bdSPeter Avalos /* NOTREACHED */
5771077d0bdSPeter Avalos }
5781077d0bdSPeter Avalos
5791077d0bdSPeter Avalos /*
5801077d0bdSPeter Avalos * Return the register number that is defined by 's'. We assume that
5811077d0bdSPeter Avalos * a single stmt cannot define more than one register. If no register
5821077d0bdSPeter Avalos * is defined, return -1.
5831077d0bdSPeter Avalos *
5841077d0bdSPeter Avalos * The implementation should probably change to an array access.
5851077d0bdSPeter Avalos */
5861077d0bdSPeter Avalos static int
atomdef(struct stmt * s)5870e381983SMatthew Dillon atomdef(struct stmt *s)
5881077d0bdSPeter Avalos {
5891077d0bdSPeter Avalos if (s->code == NOP)
5901077d0bdSPeter Avalos return -1;
5911077d0bdSPeter Avalos
5921077d0bdSPeter Avalos switch (BPF_CLASS(s->code)) {
5931077d0bdSPeter Avalos
5941077d0bdSPeter Avalos case BPF_LD:
5951077d0bdSPeter Avalos case BPF_ALU:
5961077d0bdSPeter Avalos return A_ATOM;
5971077d0bdSPeter Avalos
5981077d0bdSPeter Avalos case BPF_LDX:
5991077d0bdSPeter Avalos return X_ATOM;
6001077d0bdSPeter Avalos
6011077d0bdSPeter Avalos case BPF_ST:
6021077d0bdSPeter Avalos case BPF_STX:
6031077d0bdSPeter Avalos return s->k;
6041077d0bdSPeter Avalos
6051077d0bdSPeter Avalos case BPF_MISC:
6061077d0bdSPeter Avalos return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
6071077d0bdSPeter Avalos }
6081077d0bdSPeter Avalos return -1;
6091077d0bdSPeter Avalos }
6101077d0bdSPeter Avalos
6111077d0bdSPeter Avalos /*
6121077d0bdSPeter Avalos * Compute the sets of registers used, defined, and killed by 'b'.
6131077d0bdSPeter Avalos *
6141077d0bdSPeter Avalos * "Used" means that a statement in 'b' uses the register before any
6151077d0bdSPeter Avalos * statement in 'b' defines it, i.e. it uses the value left in
6161077d0bdSPeter Avalos * that register by a predecessor block of this block.
6171077d0bdSPeter Avalos * "Defined" means that a statement in 'b' defines it.
6181077d0bdSPeter Avalos * "Killed" means that a statement in 'b' defines it before any
6191077d0bdSPeter Avalos * statement in 'b' uses it, i.e. it kills the value left in that
6201077d0bdSPeter Avalos * register by a predecessor block of this block.
6211077d0bdSPeter Avalos */
6221077d0bdSPeter Avalos static void
compute_local_ud(struct block * b)6230e381983SMatthew Dillon compute_local_ud(struct block *b)
6241077d0bdSPeter Avalos {
6251077d0bdSPeter Avalos struct slist *s;
62697a9217aSAntonio Huete Jimenez atomset def = 0, use = 0, killed = 0;
6271077d0bdSPeter Avalos int atom;
6281077d0bdSPeter Avalos
6291077d0bdSPeter Avalos for (s = b->stmts; s; s = s->next) {
6301077d0bdSPeter Avalos if (s->s.code == NOP)
6311077d0bdSPeter Avalos continue;
6321077d0bdSPeter Avalos atom = atomuse(&s->s);
6331077d0bdSPeter Avalos if (atom >= 0) {
6341077d0bdSPeter Avalos if (atom == AX_ATOM) {
6351077d0bdSPeter Avalos if (!ATOMELEM(def, X_ATOM))
6361077d0bdSPeter Avalos use |= ATOMMASK(X_ATOM);
6371077d0bdSPeter Avalos if (!ATOMELEM(def, A_ATOM))
6381077d0bdSPeter Avalos use |= ATOMMASK(A_ATOM);
6391077d0bdSPeter Avalos }
6401077d0bdSPeter Avalos else if (atom < N_ATOMS) {
6411077d0bdSPeter Avalos if (!ATOMELEM(def, atom))
6421077d0bdSPeter Avalos use |= ATOMMASK(atom);
6431077d0bdSPeter Avalos }
6441077d0bdSPeter Avalos else
6451077d0bdSPeter Avalos abort();
6461077d0bdSPeter Avalos }
6471077d0bdSPeter Avalos atom = atomdef(&s->s);
6481077d0bdSPeter Avalos if (atom >= 0) {
6491077d0bdSPeter Avalos if (!ATOMELEM(use, atom))
65097a9217aSAntonio Huete Jimenez killed |= ATOMMASK(atom);
6511077d0bdSPeter Avalos def |= ATOMMASK(atom);
6521077d0bdSPeter Avalos }
6531077d0bdSPeter Avalos }
6541077d0bdSPeter Avalos if (BPF_CLASS(b->s.code) == BPF_JMP) {
6551077d0bdSPeter Avalos /*
6561077d0bdSPeter Avalos * XXX - what about RET?
6571077d0bdSPeter Avalos */
6581077d0bdSPeter Avalos atom = atomuse(&b->s);
6591077d0bdSPeter Avalos if (atom >= 0) {
6601077d0bdSPeter Avalos if (atom == AX_ATOM) {
6611077d0bdSPeter Avalos if (!ATOMELEM(def, X_ATOM))
6621077d0bdSPeter Avalos use |= ATOMMASK(X_ATOM);
6631077d0bdSPeter Avalos if (!ATOMELEM(def, A_ATOM))
6641077d0bdSPeter Avalos use |= ATOMMASK(A_ATOM);
6651077d0bdSPeter Avalos }
6661077d0bdSPeter Avalos else if (atom < N_ATOMS) {
6671077d0bdSPeter Avalos if (!ATOMELEM(def, atom))
6681077d0bdSPeter Avalos use |= ATOMMASK(atom);
6691077d0bdSPeter Avalos }
6701077d0bdSPeter Avalos else
6711077d0bdSPeter Avalos abort();
6721077d0bdSPeter Avalos }
6731077d0bdSPeter Avalos }
6741077d0bdSPeter Avalos
6751077d0bdSPeter Avalos b->def = def;
67697a9217aSAntonio Huete Jimenez b->kill = killed;
6771077d0bdSPeter Avalos b->in_use = use;
6781077d0bdSPeter Avalos }
6791077d0bdSPeter Avalos
6801077d0bdSPeter Avalos /*
6811077d0bdSPeter Avalos * Assume graph is already leveled.
6821077d0bdSPeter Avalos */
6831077d0bdSPeter Avalos static void
find_ud(opt_state_t * opt_state,struct block * root)68497a9217aSAntonio Huete Jimenez find_ud(opt_state_t *opt_state, struct block *root)
6851077d0bdSPeter Avalos {
6861077d0bdSPeter Avalos int i, maxlevel;
6871077d0bdSPeter Avalos struct block *p;
6881077d0bdSPeter Avalos
6891077d0bdSPeter Avalos /*
6901077d0bdSPeter Avalos * root->level is the highest level no found;
6911077d0bdSPeter Avalos * count down from there.
6921077d0bdSPeter Avalos */
6931077d0bdSPeter Avalos maxlevel = root->level;
6941077d0bdSPeter Avalos for (i = maxlevel; i >= 0; --i)
69597a9217aSAntonio Huete Jimenez for (p = opt_state->levels[i]; p; p = p->link) {
6961077d0bdSPeter Avalos compute_local_ud(p);
6971077d0bdSPeter Avalos p->out_use = 0;
6981077d0bdSPeter Avalos }
6991077d0bdSPeter Avalos
7001077d0bdSPeter Avalos for (i = 1; i <= maxlevel; ++i) {
70197a9217aSAntonio Huete Jimenez for (p = opt_state->levels[i]; p; p = p->link) {
7021077d0bdSPeter Avalos p->out_use |= JT(p)->in_use | JF(p)->in_use;
7031077d0bdSPeter Avalos p->in_use |= p->out_use &~ p->kill;
7041077d0bdSPeter Avalos }
7051077d0bdSPeter Avalos }
7061077d0bdSPeter Avalos }
7071077d0bdSPeter Avalos static void
init_val(opt_state_t * opt_state)70897a9217aSAntonio Huete Jimenez init_val(opt_state_t *opt_state)
7091077d0bdSPeter Avalos {
71097a9217aSAntonio Huete Jimenez opt_state->curval = 0;
71197a9217aSAntonio Huete Jimenez opt_state->next_vnode = opt_state->vnode_base;
71297a9217aSAntonio Huete Jimenez memset((char *)opt_state->vmap, 0, opt_state->maxval * sizeof(*opt_state->vmap));
71397a9217aSAntonio Huete Jimenez memset((char *)opt_state->hashtbl, 0, sizeof opt_state->hashtbl);
7141077d0bdSPeter Avalos }
7151077d0bdSPeter Avalos
716*ea16f64eSAntonio Huete Jimenez /*
717*ea16f64eSAntonio Huete Jimenez * Because we really don't have an IR, this stuff is a little messy.
718*ea16f64eSAntonio Huete Jimenez *
719*ea16f64eSAntonio Huete Jimenez * This routine looks in the table of existing value number for a value
720*ea16f64eSAntonio Huete Jimenez * with generated from an operation with the specified opcode and
721*ea16f64eSAntonio Huete Jimenez * the specified values. If it finds it, it returns its value number,
722*ea16f64eSAntonio Huete Jimenez * otherwise it makes a new entry in the table and returns the
723*ea16f64eSAntonio Huete Jimenez * value number of that entry.
724*ea16f64eSAntonio Huete Jimenez */
725*ea16f64eSAntonio Huete Jimenez static bpf_u_int32
F(opt_state_t * opt_state,int code,bpf_u_int32 v0,bpf_u_int32 v1)726*ea16f64eSAntonio Huete Jimenez F(opt_state_t *opt_state, int code, bpf_u_int32 v0, bpf_u_int32 v1)
7271077d0bdSPeter Avalos {
7281077d0bdSPeter Avalos u_int hash;
729*ea16f64eSAntonio Huete Jimenez bpf_u_int32 val;
7301077d0bdSPeter Avalos struct valnode *p;
7311077d0bdSPeter Avalos
732*ea16f64eSAntonio Huete Jimenez hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8);
7331077d0bdSPeter Avalos hash %= MODULUS;
7341077d0bdSPeter Avalos
73597a9217aSAntonio Huete Jimenez for (p = opt_state->hashtbl[hash]; p; p = p->next)
7361077d0bdSPeter Avalos if (p->code == code && p->v0 == v0 && p->v1 == v1)
7371077d0bdSPeter Avalos return p->val;
7381077d0bdSPeter Avalos
739*ea16f64eSAntonio Huete Jimenez /*
740*ea16f64eSAntonio Huete Jimenez * Not found. Allocate a new value, and assign it a new
741*ea16f64eSAntonio Huete Jimenez * value number.
742*ea16f64eSAntonio Huete Jimenez *
743*ea16f64eSAntonio Huete Jimenez * opt_state->curval starts out as 0, which means VAL_UNKNOWN; we
744*ea16f64eSAntonio Huete Jimenez * increment it before using it as the new value number, which
745*ea16f64eSAntonio Huete Jimenez * means we never assign VAL_UNKNOWN.
746*ea16f64eSAntonio Huete Jimenez *
747*ea16f64eSAntonio Huete Jimenez * XXX - unless we overflow, but we probably won't have 2^32-1
748*ea16f64eSAntonio Huete Jimenez * values; we treat 32 bits as effectively infinite.
749*ea16f64eSAntonio Huete Jimenez */
75097a9217aSAntonio Huete Jimenez val = ++opt_state->curval;
7511077d0bdSPeter Avalos if (BPF_MODE(code) == BPF_IMM &&
7521077d0bdSPeter Avalos (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
75397a9217aSAntonio Huete Jimenez opt_state->vmap[val].const_val = v0;
75497a9217aSAntonio Huete Jimenez opt_state->vmap[val].is_const = 1;
7551077d0bdSPeter Avalos }
75697a9217aSAntonio Huete Jimenez p = opt_state->next_vnode++;
7571077d0bdSPeter Avalos p->val = val;
7581077d0bdSPeter Avalos p->code = code;
7591077d0bdSPeter Avalos p->v0 = v0;
7601077d0bdSPeter Avalos p->v1 = v1;
76197a9217aSAntonio Huete Jimenez p->next = opt_state->hashtbl[hash];
76297a9217aSAntonio Huete Jimenez opt_state->hashtbl[hash] = p;
7631077d0bdSPeter Avalos
7641077d0bdSPeter Avalos return val;
7651077d0bdSPeter Avalos }
7661077d0bdSPeter Avalos
7671077d0bdSPeter Avalos static inline void
vstore(struct stmt * s,bpf_u_int32 * valp,bpf_u_int32 newval,int alter)768*ea16f64eSAntonio Huete Jimenez vstore(struct stmt *s, bpf_u_int32 *valp, bpf_u_int32 newval, int alter)
7691077d0bdSPeter Avalos {
7703a289941SAaron LI if (alter && newval != VAL_UNKNOWN && *valp == newval)
7711077d0bdSPeter Avalos s->code = NOP;
7721077d0bdSPeter Avalos else
7731077d0bdSPeter Avalos *valp = newval;
7741077d0bdSPeter Avalos }
7751077d0bdSPeter Avalos
77697a9217aSAntonio Huete Jimenez /*
77797a9217aSAntonio Huete Jimenez * Do constant-folding on binary operators.
77897a9217aSAntonio Huete Jimenez * (Unary operators are handled elsewhere.)
77997a9217aSAntonio Huete Jimenez */
7801077d0bdSPeter Avalos static void
fold_op(opt_state_t * opt_state,struct stmt * s,bpf_u_int32 v0,bpf_u_int32 v1)781*ea16f64eSAntonio Huete Jimenez fold_op(opt_state_t *opt_state, struct stmt *s, bpf_u_int32 v0, bpf_u_int32 v1)
7821077d0bdSPeter Avalos {
7831077d0bdSPeter Avalos bpf_u_int32 a, b;
7841077d0bdSPeter Avalos
78597a9217aSAntonio Huete Jimenez a = opt_state->vmap[v0].const_val;
78697a9217aSAntonio Huete Jimenez b = opt_state->vmap[v1].const_val;
7871077d0bdSPeter Avalos
7881077d0bdSPeter Avalos switch (BPF_OP(s->code)) {
7891077d0bdSPeter Avalos case BPF_ADD:
7901077d0bdSPeter Avalos a += b;
7911077d0bdSPeter Avalos break;
7921077d0bdSPeter Avalos
7931077d0bdSPeter Avalos case BPF_SUB:
7941077d0bdSPeter Avalos a -= b;
7951077d0bdSPeter Avalos break;
7961077d0bdSPeter Avalos
7971077d0bdSPeter Avalos case BPF_MUL:
7981077d0bdSPeter Avalos a *= b;
7991077d0bdSPeter Avalos break;
8001077d0bdSPeter Avalos
8011077d0bdSPeter Avalos case BPF_DIV:
8021077d0bdSPeter Avalos if (b == 0)
8033a289941SAaron LI opt_error(opt_state, "division by zero");
8041077d0bdSPeter Avalos a /= b;
8051077d0bdSPeter Avalos break;
8061077d0bdSPeter Avalos
80797a9217aSAntonio Huete Jimenez case BPF_MOD:
80897a9217aSAntonio Huete Jimenez if (b == 0)
8093a289941SAaron LI opt_error(opt_state, "modulus by zero");
81097a9217aSAntonio Huete Jimenez a %= b;
81197a9217aSAntonio Huete Jimenez break;
81297a9217aSAntonio Huete Jimenez
8131077d0bdSPeter Avalos case BPF_AND:
8141077d0bdSPeter Avalos a &= b;
8151077d0bdSPeter Avalos break;
8161077d0bdSPeter Avalos
8171077d0bdSPeter Avalos case BPF_OR:
8181077d0bdSPeter Avalos a |= b;
8191077d0bdSPeter Avalos break;
8201077d0bdSPeter Avalos
82197a9217aSAntonio Huete Jimenez case BPF_XOR:
82297a9217aSAntonio Huete Jimenez a ^= b;
82397a9217aSAntonio Huete Jimenez break;
82497a9217aSAntonio Huete Jimenez
8251077d0bdSPeter Avalos case BPF_LSH:
8263a289941SAaron LI /*
8273a289941SAaron LI * A left shift of more than the width of the type
8283a289941SAaron LI * is undefined in C; we'll just treat it as shifting
8293a289941SAaron LI * all the bits out.
8303a289941SAaron LI *
8313a289941SAaron LI * XXX - the BPF interpreter doesn't check for this,
8323a289941SAaron LI * so its behavior is dependent on the behavior of
8333a289941SAaron LI * the processor on which it's running. There are
8343a289941SAaron LI * processors on which it shifts all the bits out
8353a289941SAaron LI * and processors on which it does no shift.
8363a289941SAaron LI */
8373a289941SAaron LI if (b < 32)
8381077d0bdSPeter Avalos a <<= b;
8393a289941SAaron LI else
8403a289941SAaron LI a = 0;
8411077d0bdSPeter Avalos break;
8421077d0bdSPeter Avalos
8431077d0bdSPeter Avalos case BPF_RSH:
8443a289941SAaron LI /*
8453a289941SAaron LI * A right shift of more than the width of the type
8463a289941SAaron LI * is undefined in C; we'll just treat it as shifting
8473a289941SAaron LI * all the bits out.
8483a289941SAaron LI *
8493a289941SAaron LI * XXX - the BPF interpreter doesn't check for this,
8503a289941SAaron LI * so its behavior is dependent on the behavior of
8513a289941SAaron LI * the processor on which it's running. There are
8523a289941SAaron LI * processors on which it shifts all the bits out
8533a289941SAaron LI * and processors on which it does no shift.
8543a289941SAaron LI */
8553a289941SAaron LI if (b < 32)
8561077d0bdSPeter Avalos a >>= b;
8573a289941SAaron LI else
8583a289941SAaron LI a = 0;
8591077d0bdSPeter Avalos break;
8601077d0bdSPeter Avalos
8611077d0bdSPeter Avalos default:
8621077d0bdSPeter Avalos abort();
8631077d0bdSPeter Avalos }
8641077d0bdSPeter Avalos s->k = a;
8651077d0bdSPeter Avalos s->code = BPF_LD|BPF_IMM;
866*ea16f64eSAntonio Huete Jimenez /*
867*ea16f64eSAntonio Huete Jimenez * XXX - optimizer loop detection.
868*ea16f64eSAntonio Huete Jimenez */
869*ea16f64eSAntonio Huete Jimenez opt_state->non_branch_movement_performed = 1;
87097a9217aSAntonio Huete Jimenez opt_state->done = 0;
8711077d0bdSPeter Avalos }
8721077d0bdSPeter Avalos
8731077d0bdSPeter Avalos static inline struct slist *
this_op(struct slist * s)8740e381983SMatthew Dillon this_op(struct slist *s)
8751077d0bdSPeter Avalos {
8761077d0bdSPeter Avalos while (s != 0 && s->s.code == NOP)
8771077d0bdSPeter Avalos s = s->next;
8781077d0bdSPeter Avalos return s;
8791077d0bdSPeter Avalos }
8801077d0bdSPeter Avalos
8811077d0bdSPeter Avalos static void
opt_not(struct block * b)8820e381983SMatthew Dillon opt_not(struct block *b)
8831077d0bdSPeter Avalos {
8841077d0bdSPeter Avalos struct block *tmp = JT(b);
8851077d0bdSPeter Avalos
8861077d0bdSPeter Avalos JT(b) = JF(b);
8871077d0bdSPeter Avalos JF(b) = tmp;
8881077d0bdSPeter Avalos }
8891077d0bdSPeter Avalos
8901077d0bdSPeter Avalos static void
opt_peep(opt_state_t * opt_state,struct block * b)89197a9217aSAntonio Huete Jimenez opt_peep(opt_state_t *opt_state, struct block *b)
8921077d0bdSPeter Avalos {
8931077d0bdSPeter Avalos struct slist *s;
8941077d0bdSPeter Avalos struct slist *next, *last;
895*ea16f64eSAntonio Huete Jimenez bpf_u_int32 val;
8961077d0bdSPeter Avalos
8971077d0bdSPeter Avalos s = b->stmts;
8981077d0bdSPeter Avalos if (s == 0)
8991077d0bdSPeter Avalos return;
9001077d0bdSPeter Avalos
9011077d0bdSPeter Avalos last = s;
9021077d0bdSPeter Avalos for (/*empty*/; /*empty*/; s = next) {
9031077d0bdSPeter Avalos /*
9041077d0bdSPeter Avalos * Skip over nops.
9051077d0bdSPeter Avalos */
9061077d0bdSPeter Avalos s = this_op(s);
9071077d0bdSPeter Avalos if (s == 0)
9081077d0bdSPeter Avalos break; /* nothing left in the block */
9091077d0bdSPeter Avalos
9101077d0bdSPeter Avalos /*
9111077d0bdSPeter Avalos * Find the next real instruction after that one
9121077d0bdSPeter Avalos * (skipping nops).
9131077d0bdSPeter Avalos */
9141077d0bdSPeter Avalos next = this_op(s->next);
9151077d0bdSPeter Avalos if (next == 0)
9161077d0bdSPeter Avalos break; /* no next instruction */
9171077d0bdSPeter Avalos last = next;
9181077d0bdSPeter Avalos
9191077d0bdSPeter Avalos /*
9201077d0bdSPeter Avalos * st M[k] --> st M[k]
9211077d0bdSPeter Avalos * ldx M[k] tax
9221077d0bdSPeter Avalos */
9231077d0bdSPeter Avalos if (s->s.code == BPF_ST &&
9241077d0bdSPeter Avalos next->s.code == (BPF_LDX|BPF_MEM) &&
9251077d0bdSPeter Avalos s->s.k == next->s.k) {
926*ea16f64eSAntonio Huete Jimenez /*
927*ea16f64eSAntonio Huete Jimenez * XXX - optimizer loop detection.
928*ea16f64eSAntonio Huete Jimenez */
929*ea16f64eSAntonio Huete Jimenez opt_state->non_branch_movement_performed = 1;
93097a9217aSAntonio Huete Jimenez opt_state->done = 0;
9311077d0bdSPeter Avalos next->s.code = BPF_MISC|BPF_TAX;
9321077d0bdSPeter Avalos }
9331077d0bdSPeter Avalos /*
9341077d0bdSPeter Avalos * ld #k --> ldx #k
9351077d0bdSPeter Avalos * tax txa
9361077d0bdSPeter Avalos */
9371077d0bdSPeter Avalos if (s->s.code == (BPF_LD|BPF_IMM) &&
9381077d0bdSPeter Avalos next->s.code == (BPF_MISC|BPF_TAX)) {
9391077d0bdSPeter Avalos s->s.code = BPF_LDX|BPF_IMM;
9401077d0bdSPeter Avalos next->s.code = BPF_MISC|BPF_TXA;
941*ea16f64eSAntonio Huete Jimenez /*
942*ea16f64eSAntonio Huete Jimenez * XXX - optimizer loop detection.
943*ea16f64eSAntonio Huete Jimenez */
944*ea16f64eSAntonio Huete Jimenez opt_state->non_branch_movement_performed = 1;
94597a9217aSAntonio Huete Jimenez opt_state->done = 0;
9461077d0bdSPeter Avalos }
9471077d0bdSPeter Avalos /*
9481077d0bdSPeter Avalos * This is an ugly special case, but it happens
9491077d0bdSPeter Avalos * when you say tcp[k] or udp[k] where k is a constant.
9501077d0bdSPeter Avalos */
9511077d0bdSPeter Avalos if (s->s.code == (BPF_LD|BPF_IMM)) {
9521077d0bdSPeter Avalos struct slist *add, *tax, *ild;
9531077d0bdSPeter Avalos
9541077d0bdSPeter Avalos /*
9551077d0bdSPeter Avalos * Check that X isn't used on exit from this
9561077d0bdSPeter Avalos * block (which the optimizer might cause).
9571077d0bdSPeter Avalos * We know the code generator won't generate
9581077d0bdSPeter Avalos * any local dependencies.
9591077d0bdSPeter Avalos */
9601077d0bdSPeter Avalos if (ATOMELEM(b->out_use, X_ATOM))
9611077d0bdSPeter Avalos continue;
9621077d0bdSPeter Avalos
9631077d0bdSPeter Avalos /*
9641077d0bdSPeter Avalos * Check that the instruction following the ldi
9651077d0bdSPeter Avalos * is an addx, or it's an ldxms with an addx
9661077d0bdSPeter Avalos * following it (with 0 or more nops between the
9671077d0bdSPeter Avalos * ldxms and addx).
9681077d0bdSPeter Avalos */
9691077d0bdSPeter Avalos if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
9701077d0bdSPeter Avalos add = next;
9711077d0bdSPeter Avalos else
9721077d0bdSPeter Avalos add = this_op(next->next);
9731077d0bdSPeter Avalos if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
9741077d0bdSPeter Avalos continue;
9751077d0bdSPeter Avalos
9761077d0bdSPeter Avalos /*
9771077d0bdSPeter Avalos * Check that a tax follows that (with 0 or more
9781077d0bdSPeter Avalos * nops between them).
9791077d0bdSPeter Avalos */
9801077d0bdSPeter Avalos tax = this_op(add->next);
9811077d0bdSPeter Avalos if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
9821077d0bdSPeter Avalos continue;
9831077d0bdSPeter Avalos
9841077d0bdSPeter Avalos /*
9851077d0bdSPeter Avalos * Check that an ild follows that (with 0 or more
9861077d0bdSPeter Avalos * nops between them).
9871077d0bdSPeter Avalos */
9881077d0bdSPeter Avalos ild = this_op(tax->next);
9891077d0bdSPeter Avalos if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
9901077d0bdSPeter Avalos BPF_MODE(ild->s.code) != BPF_IND)
9911077d0bdSPeter Avalos continue;
9921077d0bdSPeter Avalos /*
9931077d0bdSPeter Avalos * We want to turn this sequence:
9941077d0bdSPeter Avalos *
9951077d0bdSPeter Avalos * (004) ldi #0x2 {s}
9961077d0bdSPeter Avalos * (005) ldxms [14] {next} -- optional
9971077d0bdSPeter Avalos * (006) addx {add}
9981077d0bdSPeter Avalos * (007) tax {tax}
9991077d0bdSPeter Avalos * (008) ild [x+0] {ild}
10001077d0bdSPeter Avalos *
10011077d0bdSPeter Avalos * into this sequence:
10021077d0bdSPeter Avalos *
10031077d0bdSPeter Avalos * (004) nop
10041077d0bdSPeter Avalos * (005) ldxms [14]
10051077d0bdSPeter Avalos * (006) nop
10061077d0bdSPeter Avalos * (007) nop
10071077d0bdSPeter Avalos * (008) ild [x+2]
10081077d0bdSPeter Avalos *
10091077d0bdSPeter Avalos * XXX We need to check that X is not
10101077d0bdSPeter Avalos * subsequently used, because we want to change
10111077d0bdSPeter Avalos * what'll be in it after this sequence.
10121077d0bdSPeter Avalos *
10131077d0bdSPeter Avalos * We know we can eliminate the accumulator
10141077d0bdSPeter Avalos * modifications earlier in the sequence since
10151077d0bdSPeter Avalos * it is defined by the last stmt of this sequence
10161077d0bdSPeter Avalos * (i.e., the last statement of the sequence loads
10171077d0bdSPeter Avalos * a value into the accumulator, so we can eliminate
10181077d0bdSPeter Avalos * earlier operations on the accumulator).
10191077d0bdSPeter Avalos */
10201077d0bdSPeter Avalos ild->s.k += s->s.k;
10211077d0bdSPeter Avalos s->s.code = NOP;
10221077d0bdSPeter Avalos add->s.code = NOP;
10231077d0bdSPeter Avalos tax->s.code = NOP;
1024*ea16f64eSAntonio Huete Jimenez /*
1025*ea16f64eSAntonio Huete Jimenez * XXX - optimizer loop detection.
1026*ea16f64eSAntonio Huete Jimenez */
1027*ea16f64eSAntonio Huete Jimenez opt_state->non_branch_movement_performed = 1;
102897a9217aSAntonio Huete Jimenez opt_state->done = 0;
10291077d0bdSPeter Avalos }
10301077d0bdSPeter Avalos }
10311077d0bdSPeter Avalos /*
10321077d0bdSPeter Avalos * If the comparison at the end of a block is an equality
10331077d0bdSPeter Avalos * comparison against a constant, and nobody uses the value
10341077d0bdSPeter Avalos * we leave in the A register at the end of a block, and
10351077d0bdSPeter Avalos * the operation preceding the comparison is an arithmetic
10361077d0bdSPeter Avalos * operation, we can sometime optimize it away.
10371077d0bdSPeter Avalos */
10381077d0bdSPeter Avalos if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) &&
10391077d0bdSPeter Avalos !ATOMELEM(b->out_use, A_ATOM)) {
10401077d0bdSPeter Avalos /*
10411077d0bdSPeter Avalos * We can optimize away certain subtractions of the
10421077d0bdSPeter Avalos * X register.
10431077d0bdSPeter Avalos */
10441077d0bdSPeter Avalos if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) {
10451077d0bdSPeter Avalos val = b->val[X_ATOM];
104697a9217aSAntonio Huete Jimenez if (opt_state->vmap[val].is_const) {
10471077d0bdSPeter Avalos /*
10481077d0bdSPeter Avalos * If we have a subtract to do a comparison,
10491077d0bdSPeter Avalos * and the X register is a known constant,
10501077d0bdSPeter Avalos * we can merge this value into the
10511077d0bdSPeter Avalos * comparison:
10521077d0bdSPeter Avalos *
10531077d0bdSPeter Avalos * sub x -> nop
10541077d0bdSPeter Avalos * jeq #y jeq #(x+y)
10551077d0bdSPeter Avalos */
105697a9217aSAntonio Huete Jimenez b->s.k += opt_state->vmap[val].const_val;
10571077d0bdSPeter Avalos last->s.code = NOP;
1058*ea16f64eSAntonio Huete Jimenez /*
1059*ea16f64eSAntonio Huete Jimenez * XXX - optimizer loop detection.
1060*ea16f64eSAntonio Huete Jimenez */
1061*ea16f64eSAntonio Huete Jimenez opt_state->non_branch_movement_performed = 1;
106297a9217aSAntonio Huete Jimenez opt_state->done = 0;
10631077d0bdSPeter Avalos } else if (b->s.k == 0) {
10641077d0bdSPeter Avalos /*
10651077d0bdSPeter Avalos * If the X register isn't a constant,
10661077d0bdSPeter Avalos * and the comparison in the test is
10671077d0bdSPeter Avalos * against 0, we can compare with the
10681077d0bdSPeter Avalos * X register, instead:
10691077d0bdSPeter Avalos *
10701077d0bdSPeter Avalos * sub x -> nop
10711077d0bdSPeter Avalos * jeq #0 jeq x
10721077d0bdSPeter Avalos */
10731077d0bdSPeter Avalos last->s.code = NOP;
10741077d0bdSPeter Avalos b->s.code = BPF_JMP|BPF_JEQ|BPF_X;
1075*ea16f64eSAntonio Huete Jimenez /*
1076*ea16f64eSAntonio Huete Jimenez * XXX - optimizer loop detection.
1077*ea16f64eSAntonio Huete Jimenez */
1078*ea16f64eSAntonio Huete Jimenez opt_state->non_branch_movement_performed = 1;
107997a9217aSAntonio Huete Jimenez opt_state->done = 0;
10801077d0bdSPeter Avalos }
10811077d0bdSPeter Avalos }
10821077d0bdSPeter Avalos /*
10831077d0bdSPeter Avalos * Likewise, a constant subtract can be simplified:
10841077d0bdSPeter Avalos *
10851077d0bdSPeter Avalos * sub #x -> nop
10861077d0bdSPeter Avalos * jeq #y -> jeq #(x+y)
10871077d0bdSPeter Avalos */
10881077d0bdSPeter Avalos else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) {
10891077d0bdSPeter Avalos last->s.code = NOP;
10901077d0bdSPeter Avalos b->s.k += last->s.k;
1091*ea16f64eSAntonio Huete Jimenez /*
1092*ea16f64eSAntonio Huete Jimenez * XXX - optimizer loop detection.
1093*ea16f64eSAntonio Huete Jimenez */
1094*ea16f64eSAntonio Huete Jimenez opt_state->non_branch_movement_performed = 1;
109597a9217aSAntonio Huete Jimenez opt_state->done = 0;
10961077d0bdSPeter Avalos }
10971077d0bdSPeter Avalos /*
10981077d0bdSPeter Avalos * And, similarly, a constant AND can be simplified
10991077d0bdSPeter Avalos * if we're testing against 0, i.e.:
11001077d0bdSPeter Avalos *
11011077d0bdSPeter Avalos * and #k nop
11021077d0bdSPeter Avalos * jeq #0 -> jset #k
11031077d0bdSPeter Avalos */
11041077d0bdSPeter Avalos else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
11051077d0bdSPeter Avalos b->s.k == 0) {
11061077d0bdSPeter Avalos b->s.k = last->s.k;
11071077d0bdSPeter Avalos b->s.code = BPF_JMP|BPF_K|BPF_JSET;
11081077d0bdSPeter Avalos last->s.code = NOP;
1109*ea16f64eSAntonio Huete Jimenez /*
1110*ea16f64eSAntonio Huete Jimenez * XXX - optimizer loop detection.
1111*ea16f64eSAntonio Huete Jimenez */
1112*ea16f64eSAntonio Huete Jimenez opt_state->non_branch_movement_performed = 1;
111397a9217aSAntonio Huete Jimenez opt_state->done = 0;
11141077d0bdSPeter Avalos opt_not(b);
11151077d0bdSPeter Avalos }
11161077d0bdSPeter Avalos }
11171077d0bdSPeter Avalos /*
11181077d0bdSPeter Avalos * jset #0 -> never
11191077d0bdSPeter Avalos * jset #ffffffff -> always
11201077d0bdSPeter Avalos */
11211077d0bdSPeter Avalos if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) {
11221077d0bdSPeter Avalos if (b->s.k == 0)
11231077d0bdSPeter Avalos JT(b) = JF(b);
1124*ea16f64eSAntonio Huete Jimenez if (b->s.k == 0xffffffffU)
11251077d0bdSPeter Avalos JF(b) = JT(b);
11261077d0bdSPeter Avalos }
11271077d0bdSPeter Avalos /*
1128de0d3203SPeter Avalos * If we're comparing against the index register, and the index
1129de0d3203SPeter Avalos * register is a known constant, we can just compare against that
1130de0d3203SPeter Avalos * constant.
1131de0d3203SPeter Avalos */
1132de0d3203SPeter Avalos val = b->val[X_ATOM];
113397a9217aSAntonio Huete Jimenez if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) {
1134*ea16f64eSAntonio Huete Jimenez bpf_u_int32 v = opt_state->vmap[val].const_val;
1135de0d3203SPeter Avalos b->s.code &= ~BPF_X;
1136de0d3203SPeter Avalos b->s.k = v;
1137de0d3203SPeter Avalos }
1138de0d3203SPeter Avalos /*
11391077d0bdSPeter Avalos * If the accumulator is a known constant, we can compute the
11401077d0bdSPeter Avalos * comparison result.
11411077d0bdSPeter Avalos */
11421077d0bdSPeter Avalos val = b->val[A_ATOM];
114397a9217aSAntonio Huete Jimenez if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
1144*ea16f64eSAntonio Huete Jimenez bpf_u_int32 v = opt_state->vmap[val].const_val;
11451077d0bdSPeter Avalos switch (BPF_OP(b->s.code)) {
11461077d0bdSPeter Avalos
11471077d0bdSPeter Avalos case BPF_JEQ:
11481077d0bdSPeter Avalos v = v == b->s.k;
11491077d0bdSPeter Avalos break;
11501077d0bdSPeter Avalos
11511077d0bdSPeter Avalos case BPF_JGT:
1152*ea16f64eSAntonio Huete Jimenez v = v > b->s.k;
11531077d0bdSPeter Avalos break;
11541077d0bdSPeter Avalos
11551077d0bdSPeter Avalos case BPF_JGE:
1156*ea16f64eSAntonio Huete Jimenez v = v >= b->s.k;
11571077d0bdSPeter Avalos break;
11581077d0bdSPeter Avalos
11591077d0bdSPeter Avalos case BPF_JSET:
11601077d0bdSPeter Avalos v &= b->s.k;
11611077d0bdSPeter Avalos break;
11621077d0bdSPeter Avalos
11631077d0bdSPeter Avalos default:
11641077d0bdSPeter Avalos abort();
11651077d0bdSPeter Avalos }
1166*ea16f64eSAntonio Huete Jimenez if (JF(b) != JT(b)) {
1167*ea16f64eSAntonio Huete Jimenez /*
1168*ea16f64eSAntonio Huete Jimenez * XXX - optimizer loop detection.
1169*ea16f64eSAntonio Huete Jimenez */
1170*ea16f64eSAntonio Huete Jimenez opt_state->non_branch_movement_performed = 1;
117197a9217aSAntonio Huete Jimenez opt_state->done = 0;
1172*ea16f64eSAntonio Huete Jimenez }
11731077d0bdSPeter Avalos if (v)
11741077d0bdSPeter Avalos JF(b) = JT(b);
11751077d0bdSPeter Avalos else
11761077d0bdSPeter Avalos JT(b) = JF(b);
11771077d0bdSPeter Avalos }
11781077d0bdSPeter Avalos }
11791077d0bdSPeter Avalos
11801077d0bdSPeter Avalos /*
11811077d0bdSPeter Avalos * Compute the symbolic value of expression of 's', and update
11821077d0bdSPeter Avalos * anything it defines in the value table 'val'. If 'alter' is true,
11831077d0bdSPeter Avalos * do various optimizations. This code would be cleaner if symbolic
11841077d0bdSPeter Avalos * evaluation and code transformations weren't folded together.
11851077d0bdSPeter Avalos */
11861077d0bdSPeter Avalos static void
opt_stmt(opt_state_t * opt_state,struct stmt * s,bpf_u_int32 val[],int alter)1187*ea16f64eSAntonio Huete Jimenez opt_stmt(opt_state_t *opt_state, struct stmt *s, bpf_u_int32 val[], int alter)
11881077d0bdSPeter Avalos {
11891077d0bdSPeter Avalos int op;
1190*ea16f64eSAntonio Huete Jimenez bpf_u_int32 v;
11911077d0bdSPeter Avalos
11921077d0bdSPeter Avalos switch (s->code) {
11931077d0bdSPeter Avalos
11941077d0bdSPeter Avalos case BPF_LD|BPF_ABS|BPF_W:
11951077d0bdSPeter Avalos case BPF_LD|BPF_ABS|BPF_H:
11961077d0bdSPeter Avalos case BPF_LD|BPF_ABS|BPF_B:
119797a9217aSAntonio Huete Jimenez v = F(opt_state, s->code, s->k, 0L);
11981077d0bdSPeter Avalos vstore(s, &val[A_ATOM], v, alter);
11991077d0bdSPeter Avalos break;
12001077d0bdSPeter Avalos
12011077d0bdSPeter Avalos case BPF_LD|BPF_IND|BPF_W:
12021077d0bdSPeter Avalos case BPF_LD|BPF_IND|BPF_H:
12031077d0bdSPeter Avalos case BPF_LD|BPF_IND|BPF_B:
12041077d0bdSPeter Avalos v = val[X_ATOM];
120597a9217aSAntonio Huete Jimenez if (alter && opt_state->vmap[v].is_const) {
12061077d0bdSPeter Avalos s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
120797a9217aSAntonio Huete Jimenez s->k += opt_state->vmap[v].const_val;
120897a9217aSAntonio Huete Jimenez v = F(opt_state, s->code, s->k, 0L);
1209*ea16f64eSAntonio Huete Jimenez /*
1210*ea16f64eSAntonio Huete Jimenez * XXX - optimizer loop detection.
1211*ea16f64eSAntonio Huete Jimenez */
1212*ea16f64eSAntonio Huete Jimenez opt_state->non_branch_movement_performed = 1;
121397a9217aSAntonio Huete Jimenez opt_state->done = 0;
12141077d0bdSPeter Avalos }
12151077d0bdSPeter Avalos else
121697a9217aSAntonio Huete Jimenez v = F(opt_state, s->code, s->k, v);
12171077d0bdSPeter Avalos vstore(s, &val[A_ATOM], v, alter);
12181077d0bdSPeter Avalos break;
12191077d0bdSPeter Avalos
12201077d0bdSPeter Avalos case BPF_LD|BPF_LEN:
122197a9217aSAntonio Huete Jimenez v = F(opt_state, s->code, 0L, 0L);
12221077d0bdSPeter Avalos vstore(s, &val[A_ATOM], v, alter);
12231077d0bdSPeter Avalos break;
12241077d0bdSPeter Avalos
12251077d0bdSPeter Avalos case BPF_LD|BPF_IMM:
12261077d0bdSPeter Avalos v = K(s->k);
12271077d0bdSPeter Avalos vstore(s, &val[A_ATOM], v, alter);
12281077d0bdSPeter Avalos break;
12291077d0bdSPeter Avalos
12301077d0bdSPeter Avalos case BPF_LDX|BPF_IMM:
12311077d0bdSPeter Avalos v = K(s->k);
12321077d0bdSPeter Avalos vstore(s, &val[X_ATOM], v, alter);
12331077d0bdSPeter Avalos break;
12341077d0bdSPeter Avalos
12351077d0bdSPeter Avalos case BPF_LDX|BPF_MSH|BPF_B:
123697a9217aSAntonio Huete Jimenez v = F(opt_state, s->code, s->k, 0L);
12371077d0bdSPeter Avalos vstore(s, &val[X_ATOM], v, alter);
12381077d0bdSPeter Avalos break;
12391077d0bdSPeter Avalos
12401077d0bdSPeter Avalos case BPF_ALU|BPF_NEG:
124197a9217aSAntonio Huete Jimenez if (alter && opt_state->vmap[val[A_ATOM]].is_const) {
12421077d0bdSPeter Avalos s->code = BPF_LD|BPF_IMM;
12433a289941SAaron LI /*
12443a289941SAaron LI * Do this negation as unsigned arithmetic; that's
12453a289941SAaron LI * what modern BPF engines do, and it guarantees
12463a289941SAaron LI * that all possible values can be negated. (Yeah,
12473a289941SAaron LI * negating 0x80000000, the minimum signed 32-bit
12483a289941SAaron LI * two's-complement value, results in 0x80000000,
12493a289941SAaron LI * so it's still negative, but we *should* be doing
12503a289941SAaron LI * all unsigned arithmetic here, to match what
12513a289941SAaron LI * modern BPF engines do.)
12523a289941SAaron LI *
12533a289941SAaron LI * Express it as 0U - (unsigned value) so that we
12543a289941SAaron LI * don't get compiler warnings about negating an
12553a289941SAaron LI * unsigned value and don't get UBSan warnings
12563a289941SAaron LI * about the result of negating 0x80000000 being
12573a289941SAaron LI * undefined.
12583a289941SAaron LI */
1259*ea16f64eSAntonio Huete Jimenez s->k = 0U - opt_state->vmap[val[A_ATOM]].const_val;
12601077d0bdSPeter Avalos val[A_ATOM] = K(s->k);
12611077d0bdSPeter Avalos }
12621077d0bdSPeter Avalos else
126397a9217aSAntonio Huete Jimenez val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], 0L);
12641077d0bdSPeter Avalos break;
12651077d0bdSPeter Avalos
12661077d0bdSPeter Avalos case BPF_ALU|BPF_ADD|BPF_K:
12671077d0bdSPeter Avalos case BPF_ALU|BPF_SUB|BPF_K:
12681077d0bdSPeter Avalos case BPF_ALU|BPF_MUL|BPF_K:
12691077d0bdSPeter Avalos case BPF_ALU|BPF_DIV|BPF_K:
127097a9217aSAntonio Huete Jimenez case BPF_ALU|BPF_MOD|BPF_K:
12711077d0bdSPeter Avalos case BPF_ALU|BPF_AND|BPF_K:
12721077d0bdSPeter Avalos case BPF_ALU|BPF_OR|BPF_K:
127397a9217aSAntonio Huete Jimenez case BPF_ALU|BPF_XOR|BPF_K:
12741077d0bdSPeter Avalos case BPF_ALU|BPF_LSH|BPF_K:
12751077d0bdSPeter Avalos case BPF_ALU|BPF_RSH|BPF_K:
12761077d0bdSPeter Avalos op = BPF_OP(s->code);
12771077d0bdSPeter Avalos if (alter) {
12781077d0bdSPeter Avalos if (s->k == 0) {
12793a289941SAaron LI /*
12803a289941SAaron LI * Optimize operations where the constant
12813a289941SAaron LI * is zero.
12823a289941SAaron LI *
12833a289941SAaron LI * Don't optimize away "sub #0"
12841077d0bdSPeter Avalos * as it may be needed later to
12853a289941SAaron LI * fixup the generated math code.
12863a289941SAaron LI *
12873a289941SAaron LI * Fail if we're dividing by zero or taking
12883a289941SAaron LI * a modulus by zero.
12893a289941SAaron LI */
12901077d0bdSPeter Avalos if (op == BPF_ADD ||
12911077d0bdSPeter Avalos op == BPF_LSH || op == BPF_RSH ||
129297a9217aSAntonio Huete Jimenez op == BPF_OR || op == BPF_XOR) {
12931077d0bdSPeter Avalos s->code = NOP;
12941077d0bdSPeter Avalos break;
12951077d0bdSPeter Avalos }
12961077d0bdSPeter Avalos if (op == BPF_MUL || op == BPF_AND) {
12971077d0bdSPeter Avalos s->code = BPF_LD|BPF_IMM;
12981077d0bdSPeter Avalos val[A_ATOM] = K(s->k);
12991077d0bdSPeter Avalos break;
13001077d0bdSPeter Avalos }
13013a289941SAaron LI if (op == BPF_DIV)
13023a289941SAaron LI opt_error(opt_state,
13033a289941SAaron LI "division by zero");
13043a289941SAaron LI if (op == BPF_MOD)
13053a289941SAaron LI opt_error(opt_state,
13063a289941SAaron LI "modulus by zero");
13071077d0bdSPeter Avalos }
130897a9217aSAntonio Huete Jimenez if (opt_state->vmap[val[A_ATOM]].is_const) {
13093a289941SAaron LI fold_op(opt_state, s, val[A_ATOM], K(s->k));
13101077d0bdSPeter Avalos val[A_ATOM] = K(s->k);
13111077d0bdSPeter Avalos break;
13121077d0bdSPeter Avalos }
13131077d0bdSPeter Avalos }
131497a9217aSAntonio Huete Jimenez val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], K(s->k));
13151077d0bdSPeter Avalos break;
13161077d0bdSPeter Avalos
13171077d0bdSPeter Avalos case BPF_ALU|BPF_ADD|BPF_X:
13181077d0bdSPeter Avalos case BPF_ALU|BPF_SUB|BPF_X:
13191077d0bdSPeter Avalos case BPF_ALU|BPF_MUL|BPF_X:
13201077d0bdSPeter Avalos case BPF_ALU|BPF_DIV|BPF_X:
132197a9217aSAntonio Huete Jimenez case BPF_ALU|BPF_MOD|BPF_X:
13221077d0bdSPeter Avalos case BPF_ALU|BPF_AND|BPF_X:
13231077d0bdSPeter Avalos case BPF_ALU|BPF_OR|BPF_X:
132497a9217aSAntonio Huete Jimenez case BPF_ALU|BPF_XOR|BPF_X:
13251077d0bdSPeter Avalos case BPF_ALU|BPF_LSH|BPF_X:
13261077d0bdSPeter Avalos case BPF_ALU|BPF_RSH|BPF_X:
13271077d0bdSPeter Avalos op = BPF_OP(s->code);
132897a9217aSAntonio Huete Jimenez if (alter && opt_state->vmap[val[X_ATOM]].is_const) {
132997a9217aSAntonio Huete Jimenez if (opt_state->vmap[val[A_ATOM]].is_const) {
13303a289941SAaron LI fold_op(opt_state, s, val[A_ATOM], val[X_ATOM]);
13311077d0bdSPeter Avalos val[A_ATOM] = K(s->k);
13321077d0bdSPeter Avalos }
13331077d0bdSPeter Avalos else {
13341077d0bdSPeter Avalos s->code = BPF_ALU|BPF_K|op;
133597a9217aSAntonio Huete Jimenez s->k = opt_state->vmap[val[X_ATOM]].const_val;
13363a289941SAaron LI if ((op == BPF_LSH || op == BPF_RSH) &&
1337*ea16f64eSAntonio Huete Jimenez s->k > 31)
13383a289941SAaron LI opt_error(opt_state,
13393a289941SAaron LI "shift by more than 31 bits");
1340*ea16f64eSAntonio Huete Jimenez /*
1341*ea16f64eSAntonio Huete Jimenez * XXX - optimizer loop detection.
1342*ea16f64eSAntonio Huete Jimenez */
1343*ea16f64eSAntonio Huete Jimenez opt_state->non_branch_movement_performed = 1;
134497a9217aSAntonio Huete Jimenez opt_state->done = 0;
13451077d0bdSPeter Avalos val[A_ATOM] =
134697a9217aSAntonio Huete Jimenez F(opt_state, s->code, val[A_ATOM], K(s->k));
13471077d0bdSPeter Avalos }
13481077d0bdSPeter Avalos break;
13491077d0bdSPeter Avalos }
13501077d0bdSPeter Avalos /*
13511077d0bdSPeter Avalos * Check if we're doing something to an accumulator
13521077d0bdSPeter Avalos * that is 0, and simplify. This may not seem like
13531077d0bdSPeter Avalos * much of a simplification but it could open up further
13541077d0bdSPeter Avalos * optimizations.
13551077d0bdSPeter Avalos * XXX We could also check for mul by 1, etc.
13561077d0bdSPeter Avalos */
135797a9217aSAntonio Huete Jimenez if (alter && opt_state->vmap[val[A_ATOM]].is_const
135897a9217aSAntonio Huete Jimenez && opt_state->vmap[val[A_ATOM]].const_val == 0) {
135997a9217aSAntonio Huete Jimenez if (op == BPF_ADD || op == BPF_OR || op == BPF_XOR) {
13601077d0bdSPeter Avalos s->code = BPF_MISC|BPF_TXA;
13611077d0bdSPeter Avalos vstore(s, &val[A_ATOM], val[X_ATOM], alter);
13621077d0bdSPeter Avalos break;
13631077d0bdSPeter Avalos }
136497a9217aSAntonio Huete Jimenez else if (op == BPF_MUL || op == BPF_DIV || op == BPF_MOD ||
13651077d0bdSPeter Avalos op == BPF_AND || op == BPF_LSH || op == BPF_RSH) {
13661077d0bdSPeter Avalos s->code = BPF_LD|BPF_IMM;
13671077d0bdSPeter Avalos s->k = 0;
13681077d0bdSPeter Avalos vstore(s, &val[A_ATOM], K(s->k), alter);
13691077d0bdSPeter Avalos break;
13701077d0bdSPeter Avalos }
13711077d0bdSPeter Avalos else if (op == BPF_NEG) {
13721077d0bdSPeter Avalos s->code = NOP;
13731077d0bdSPeter Avalos break;
13741077d0bdSPeter Avalos }
13751077d0bdSPeter Avalos }
137697a9217aSAntonio Huete Jimenez val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], val[X_ATOM]);
13771077d0bdSPeter Avalos break;
13781077d0bdSPeter Avalos
13791077d0bdSPeter Avalos case BPF_MISC|BPF_TXA:
13801077d0bdSPeter Avalos vstore(s, &val[A_ATOM], val[X_ATOM], alter);
13811077d0bdSPeter Avalos break;
13821077d0bdSPeter Avalos
13831077d0bdSPeter Avalos case BPF_LD|BPF_MEM:
13841077d0bdSPeter Avalos v = val[s->k];
138597a9217aSAntonio Huete Jimenez if (alter && opt_state->vmap[v].is_const) {
13861077d0bdSPeter Avalos s->code = BPF_LD|BPF_IMM;
138797a9217aSAntonio Huete Jimenez s->k = opt_state->vmap[v].const_val;
1388*ea16f64eSAntonio Huete Jimenez /*
1389*ea16f64eSAntonio Huete Jimenez * XXX - optimizer loop detection.
1390*ea16f64eSAntonio Huete Jimenez */
1391*ea16f64eSAntonio Huete Jimenez opt_state->non_branch_movement_performed = 1;
139297a9217aSAntonio Huete Jimenez opt_state->done = 0;
13931077d0bdSPeter Avalos }
13941077d0bdSPeter Avalos vstore(s, &val[A_ATOM], v, alter);
13951077d0bdSPeter Avalos break;
13961077d0bdSPeter Avalos
13971077d0bdSPeter Avalos case BPF_MISC|BPF_TAX:
13981077d0bdSPeter Avalos vstore(s, &val[X_ATOM], val[A_ATOM], alter);
13991077d0bdSPeter Avalos break;
14001077d0bdSPeter Avalos
14011077d0bdSPeter Avalos case BPF_LDX|BPF_MEM:
14021077d0bdSPeter Avalos v = val[s->k];
140397a9217aSAntonio Huete Jimenez if (alter && opt_state->vmap[v].is_const) {
14041077d0bdSPeter Avalos s->code = BPF_LDX|BPF_IMM;
140597a9217aSAntonio Huete Jimenez s->k = opt_state->vmap[v].const_val;
1406*ea16f64eSAntonio Huete Jimenez /*
1407*ea16f64eSAntonio Huete Jimenez * XXX - optimizer loop detection.
1408*ea16f64eSAntonio Huete Jimenez */
1409*ea16f64eSAntonio Huete Jimenez opt_state->non_branch_movement_performed = 1;
141097a9217aSAntonio Huete Jimenez opt_state->done = 0;
14111077d0bdSPeter Avalos }
14121077d0bdSPeter Avalos vstore(s, &val[X_ATOM], v, alter);
14131077d0bdSPeter Avalos break;
14141077d0bdSPeter Avalos
14151077d0bdSPeter Avalos case BPF_ST:
14161077d0bdSPeter Avalos vstore(s, &val[s->k], val[A_ATOM], alter);
14171077d0bdSPeter Avalos break;
14181077d0bdSPeter Avalos
14191077d0bdSPeter Avalos case BPF_STX:
14201077d0bdSPeter Avalos vstore(s, &val[s->k], val[X_ATOM], alter);
14211077d0bdSPeter Avalos break;
14221077d0bdSPeter Avalos }
14231077d0bdSPeter Avalos }
14241077d0bdSPeter Avalos
14251077d0bdSPeter Avalos static void
deadstmt(opt_state_t * opt_state,register struct stmt * s,register struct stmt * last[])142697a9217aSAntonio Huete Jimenez deadstmt(opt_state_t *opt_state, register struct stmt *s, register struct stmt *last[])
14271077d0bdSPeter Avalos {
14281077d0bdSPeter Avalos register int atom;
14291077d0bdSPeter Avalos
14301077d0bdSPeter Avalos atom = atomuse(s);
14311077d0bdSPeter Avalos if (atom >= 0) {
14321077d0bdSPeter Avalos if (atom == AX_ATOM) {
14331077d0bdSPeter Avalos last[X_ATOM] = 0;
14341077d0bdSPeter Avalos last[A_ATOM] = 0;
14351077d0bdSPeter Avalos }
14361077d0bdSPeter Avalos else
14371077d0bdSPeter Avalos last[atom] = 0;
14381077d0bdSPeter Avalos }
14391077d0bdSPeter Avalos atom = atomdef(s);
14401077d0bdSPeter Avalos if (atom >= 0) {
14411077d0bdSPeter Avalos if (last[atom]) {
1442*ea16f64eSAntonio Huete Jimenez /*
1443*ea16f64eSAntonio Huete Jimenez * XXX - optimizer loop detection.
1444*ea16f64eSAntonio Huete Jimenez */
1445*ea16f64eSAntonio Huete Jimenez opt_state->non_branch_movement_performed = 1;
144697a9217aSAntonio Huete Jimenez opt_state->done = 0;
14471077d0bdSPeter Avalos last[atom]->code = NOP;
14481077d0bdSPeter Avalos }
14491077d0bdSPeter Avalos last[atom] = s;
14501077d0bdSPeter Avalos }
14511077d0bdSPeter Avalos }
14521077d0bdSPeter Avalos
14531077d0bdSPeter Avalos static void
opt_deadstores(opt_state_t * opt_state,register struct block * b)145497a9217aSAntonio Huete Jimenez opt_deadstores(opt_state_t *opt_state, register struct block *b)
14551077d0bdSPeter Avalos {
14561077d0bdSPeter Avalos register struct slist *s;
14571077d0bdSPeter Avalos register int atom;
14581077d0bdSPeter Avalos struct stmt *last[N_ATOMS];
14591077d0bdSPeter Avalos
14601077d0bdSPeter Avalos memset((char *)last, 0, sizeof last);
14611077d0bdSPeter Avalos
14621077d0bdSPeter Avalos for (s = b->stmts; s != 0; s = s->next)
146397a9217aSAntonio Huete Jimenez deadstmt(opt_state, &s->s, last);
146497a9217aSAntonio Huete Jimenez deadstmt(opt_state, &b->s, last);
14651077d0bdSPeter Avalos
14661077d0bdSPeter Avalos for (atom = 0; atom < N_ATOMS; ++atom)
14671077d0bdSPeter Avalos if (last[atom] && !ATOMELEM(b->out_use, atom)) {
14681077d0bdSPeter Avalos last[atom]->code = NOP;
1469*ea16f64eSAntonio Huete Jimenez /*
1470*ea16f64eSAntonio Huete Jimenez * XXX - optimizer loop detection.
1471*ea16f64eSAntonio Huete Jimenez */
1472*ea16f64eSAntonio Huete Jimenez opt_state->non_branch_movement_performed = 1;
147397a9217aSAntonio Huete Jimenez opt_state->done = 0;
14741077d0bdSPeter Avalos }
14751077d0bdSPeter Avalos }
14761077d0bdSPeter Avalos
14771077d0bdSPeter Avalos static void
opt_blk(opt_state_t * opt_state,struct block * b,int do_stmts)14783a289941SAaron LI opt_blk(opt_state_t *opt_state, struct block *b, int do_stmts)
14791077d0bdSPeter Avalos {
14801077d0bdSPeter Avalos struct slist *s;
14811077d0bdSPeter Avalos struct edge *p;
14821077d0bdSPeter Avalos int i;
1483*ea16f64eSAntonio Huete Jimenez bpf_u_int32 aval, xval;
14841077d0bdSPeter Avalos
14851077d0bdSPeter Avalos #if 0
14861077d0bdSPeter Avalos for (s = b->stmts; s && s->next; s = s->next)
14871077d0bdSPeter Avalos if (BPF_CLASS(s->s.code) == BPF_JMP) {
14881077d0bdSPeter Avalos do_stmts = 0;
14891077d0bdSPeter Avalos break;
14901077d0bdSPeter Avalos }
14911077d0bdSPeter Avalos #endif
14921077d0bdSPeter Avalos
14931077d0bdSPeter Avalos /*
14941077d0bdSPeter Avalos * Initialize the atom values.
14951077d0bdSPeter Avalos */
14961077d0bdSPeter Avalos p = b->in_edges;
14971077d0bdSPeter Avalos if (p == 0) {
14981077d0bdSPeter Avalos /*
14991077d0bdSPeter Avalos * We have no predecessors, so everything is undefined
15001077d0bdSPeter Avalos * upon entry to this block.
15011077d0bdSPeter Avalos */
15021077d0bdSPeter Avalos memset((char *)b->val, 0, sizeof(b->val));
15031077d0bdSPeter Avalos } else {
15041077d0bdSPeter Avalos /*
15051077d0bdSPeter Avalos * Inherit values from our predecessors.
15061077d0bdSPeter Avalos *
15071077d0bdSPeter Avalos * First, get the values from the predecessor along the
15081077d0bdSPeter Avalos * first edge leading to this node.
15091077d0bdSPeter Avalos */
15101077d0bdSPeter Avalos memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
15111077d0bdSPeter Avalos /*
15121077d0bdSPeter Avalos * Now look at all the other nodes leading to this node.
15131077d0bdSPeter Avalos * If, for the predecessor along that edge, a register
15141077d0bdSPeter Avalos * has a different value from the one we have (i.e.,
15151077d0bdSPeter Avalos * control paths are merging, and the merging paths
15161077d0bdSPeter Avalos * assign different values to that register), give the
15171077d0bdSPeter Avalos * register the undefined value of 0.
15181077d0bdSPeter Avalos */
15191077d0bdSPeter Avalos while ((p = p->next) != NULL) {
15201077d0bdSPeter Avalos for (i = 0; i < N_ATOMS; ++i)
15211077d0bdSPeter Avalos if (b->val[i] != p->pred->val[i])
15221077d0bdSPeter Avalos b->val[i] = 0;
15231077d0bdSPeter Avalos }
15241077d0bdSPeter Avalos }
15251077d0bdSPeter Avalos aval = b->val[A_ATOM];
15261077d0bdSPeter Avalos xval = b->val[X_ATOM];
15271077d0bdSPeter Avalos for (s = b->stmts; s; s = s->next)
15283a289941SAaron LI opt_stmt(opt_state, &s->s, b->val, do_stmts);
15291077d0bdSPeter Avalos
15301077d0bdSPeter Avalos /*
15311077d0bdSPeter Avalos * This is a special case: if we don't use anything from this
15321077d0bdSPeter Avalos * block, and we load the accumulator or index register with a
15331077d0bdSPeter Avalos * value that is already there, or if this block is a return,
15341077d0bdSPeter Avalos * eliminate all the statements.
15351077d0bdSPeter Avalos *
1536*ea16f64eSAntonio Huete Jimenez * XXX - what if it does a store? Presumably that falls under
1537*ea16f64eSAntonio Huete Jimenez * the heading of "if we don't use anything from this block",
1538*ea16f64eSAntonio Huete Jimenez * i.e., if we use any memory location set to a different
1539*ea16f64eSAntonio Huete Jimenez * value by this block, then we use something from this block.
15401077d0bdSPeter Avalos *
15411077d0bdSPeter Avalos * XXX - why does it matter whether we use anything from this
15421077d0bdSPeter Avalos * block? If the accumulator or index register doesn't change
15431077d0bdSPeter Avalos * its value, isn't that OK even if we use that value?
15441077d0bdSPeter Avalos *
15451077d0bdSPeter Avalos * XXX - if we load the accumulator with a different value,
15461077d0bdSPeter Avalos * and the block ends with a conditional branch, we obviously
15471077d0bdSPeter Avalos * can't eliminate it, as the branch depends on that value.
15481077d0bdSPeter Avalos * For the index register, the conditional branch only depends
15491077d0bdSPeter Avalos * on the index register value if the test is against the index
15501077d0bdSPeter Avalos * register value rather than a constant; if nothing uses the
15511077d0bdSPeter Avalos * value we put into the index register, and we're not testing
15521077d0bdSPeter Avalos * against the index register's value, and there aren't any
15531077d0bdSPeter Avalos * other problems that would keep us from eliminating this
15541077d0bdSPeter Avalos * block, can we eliminate it?
15551077d0bdSPeter Avalos */
15561077d0bdSPeter Avalos if (do_stmts &&
15573a289941SAaron LI ((b->out_use == 0 &&
15583a289941SAaron LI aval != VAL_UNKNOWN && b->val[A_ATOM] == aval &&
15593a289941SAaron LI xval != VAL_UNKNOWN && b->val[X_ATOM] == xval) ||
15601077d0bdSPeter Avalos BPF_CLASS(b->s.code) == BPF_RET)) {
15611077d0bdSPeter Avalos if (b->stmts != 0) {
15621077d0bdSPeter Avalos b->stmts = 0;
1563*ea16f64eSAntonio Huete Jimenez /*
1564*ea16f64eSAntonio Huete Jimenez * XXX - optimizer loop detection.
1565*ea16f64eSAntonio Huete Jimenez */
1566*ea16f64eSAntonio Huete Jimenez opt_state->non_branch_movement_performed = 1;
156797a9217aSAntonio Huete Jimenez opt_state->done = 0;
15681077d0bdSPeter Avalos }
15691077d0bdSPeter Avalos } else {
157097a9217aSAntonio Huete Jimenez opt_peep(opt_state, b);
157197a9217aSAntonio Huete Jimenez opt_deadstores(opt_state, b);
15721077d0bdSPeter Avalos }
15731077d0bdSPeter Avalos /*
15741077d0bdSPeter Avalos * Set up values for branch optimizer.
15751077d0bdSPeter Avalos */
15761077d0bdSPeter Avalos if (BPF_SRC(b->s.code) == BPF_K)
15771077d0bdSPeter Avalos b->oval = K(b->s.k);
15781077d0bdSPeter Avalos else
15791077d0bdSPeter Avalos b->oval = b->val[X_ATOM];
15801077d0bdSPeter Avalos b->et.code = b->s.code;
15811077d0bdSPeter Avalos b->ef.code = -b->s.code;
15821077d0bdSPeter Avalos }
15831077d0bdSPeter Avalos
15841077d0bdSPeter Avalos /*
15851077d0bdSPeter Avalos * Return true if any register that is used on exit from 'succ', has
15861077d0bdSPeter Avalos * an exit value that is different from the corresponding exit value
15871077d0bdSPeter Avalos * from 'b'.
15881077d0bdSPeter Avalos */
15891077d0bdSPeter Avalos static int
use_conflict(struct block * b,struct block * succ)15900e381983SMatthew Dillon use_conflict(struct block *b, struct block *succ)
15911077d0bdSPeter Avalos {
15921077d0bdSPeter Avalos int atom;
15931077d0bdSPeter Avalos atomset use = succ->out_use;
15941077d0bdSPeter Avalos
15951077d0bdSPeter Avalos if (use == 0)
15961077d0bdSPeter Avalos return 0;
15971077d0bdSPeter Avalos
15981077d0bdSPeter Avalos for (atom = 0; atom < N_ATOMS; ++atom)
15991077d0bdSPeter Avalos if (ATOMELEM(use, atom))
16001077d0bdSPeter Avalos if (b->val[atom] != succ->val[atom])
16011077d0bdSPeter Avalos return 1;
16021077d0bdSPeter Avalos return 0;
16031077d0bdSPeter Avalos }
16041077d0bdSPeter Avalos
1605*ea16f64eSAntonio Huete Jimenez /*
1606*ea16f64eSAntonio Huete Jimenez * Given a block that is the successor of an edge, and an edge that
1607*ea16f64eSAntonio Huete Jimenez * dominates that edge, return either a pointer to a child of that
1608*ea16f64eSAntonio Huete Jimenez * block (a block to which that block jumps) if that block is a
1609*ea16f64eSAntonio Huete Jimenez * candidate to replace the successor of the latter edge or NULL
1610*ea16f64eSAntonio Huete Jimenez * if neither of the children of the first block are candidates.
1611*ea16f64eSAntonio Huete Jimenez */
16121077d0bdSPeter Avalos static struct block *
fold_edge(struct block * child,struct edge * ep)16130e381983SMatthew Dillon fold_edge(struct block *child, struct edge *ep)
16141077d0bdSPeter Avalos {
16151077d0bdSPeter Avalos int sense;
1616*ea16f64eSAntonio Huete Jimenez bpf_u_int32 aval0, aval1, oval0, oval1;
16171077d0bdSPeter Avalos int code = ep->code;
16181077d0bdSPeter Avalos
16191077d0bdSPeter Avalos if (code < 0) {
1620*ea16f64eSAntonio Huete Jimenez /*
1621*ea16f64eSAntonio Huete Jimenez * This edge is a "branch if false" edge.
1622*ea16f64eSAntonio Huete Jimenez */
16231077d0bdSPeter Avalos code = -code;
16241077d0bdSPeter Avalos sense = 0;
1625*ea16f64eSAntonio Huete Jimenez } else {
1626*ea16f64eSAntonio Huete Jimenez /*
1627*ea16f64eSAntonio Huete Jimenez * This edge is a "branch if true" edge.
1628*ea16f64eSAntonio Huete Jimenez */
16291077d0bdSPeter Avalos sense = 1;
1630*ea16f64eSAntonio Huete Jimenez }
16311077d0bdSPeter Avalos
1632*ea16f64eSAntonio Huete Jimenez /*
1633*ea16f64eSAntonio Huete Jimenez * If the opcode for the branch at the end of the block we
1634*ea16f64eSAntonio Huete Jimenez * were handed isn't the same as the opcode for the branch
1635*ea16f64eSAntonio Huete Jimenez * to which the edge we were handed corresponds, the tests
1636*ea16f64eSAntonio Huete Jimenez * for those branches aren't testing the same conditions,
1637*ea16f64eSAntonio Huete Jimenez * so the blocks to which the first block branches aren't
1638*ea16f64eSAntonio Huete Jimenez * candidates to replace the successor of the edge.
1639*ea16f64eSAntonio Huete Jimenez */
16401077d0bdSPeter Avalos if (child->s.code != code)
16411077d0bdSPeter Avalos return 0;
16421077d0bdSPeter Avalos
16431077d0bdSPeter Avalos aval0 = child->val[A_ATOM];
16441077d0bdSPeter Avalos oval0 = child->oval;
16451077d0bdSPeter Avalos aval1 = ep->pred->val[A_ATOM];
16461077d0bdSPeter Avalos oval1 = ep->pred->oval;
16471077d0bdSPeter Avalos
1648*ea16f64eSAntonio Huete Jimenez /*
1649*ea16f64eSAntonio Huete Jimenez * If the A register value on exit from the successor block
1650*ea16f64eSAntonio Huete Jimenez * isn't the same as the A register value on exit from the
1651*ea16f64eSAntonio Huete Jimenez * predecessor of the edge, the blocks to which the first
1652*ea16f64eSAntonio Huete Jimenez * block branches aren't candidates to replace the successor
1653*ea16f64eSAntonio Huete Jimenez * of the edge.
1654*ea16f64eSAntonio Huete Jimenez */
16551077d0bdSPeter Avalos if (aval0 != aval1)
16561077d0bdSPeter Avalos return 0;
16571077d0bdSPeter Avalos
16581077d0bdSPeter Avalos if (oval0 == oval1)
16591077d0bdSPeter Avalos /*
16601077d0bdSPeter Avalos * The operands of the branch instructions are
1661*ea16f64eSAntonio Huete Jimenez * identical, so the branches are testing the
1662*ea16f64eSAntonio Huete Jimenez * same condition, and the result is true if a true
16631077d0bdSPeter Avalos * branch was taken to get here, otherwise false.
16641077d0bdSPeter Avalos */
16651077d0bdSPeter Avalos return sense ? JT(child) : JF(child);
16661077d0bdSPeter Avalos
16671077d0bdSPeter Avalos if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
16681077d0bdSPeter Avalos /*
16691077d0bdSPeter Avalos * At this point, we only know the comparison if we
16701077d0bdSPeter Avalos * came down the true branch, and it was an equality
16711077d0bdSPeter Avalos * comparison with a constant.
16721077d0bdSPeter Avalos *
16731077d0bdSPeter Avalos * I.e., if we came down the true branch, and the branch
16741077d0bdSPeter Avalos * was an equality comparison with a constant, we know the
16751077d0bdSPeter Avalos * accumulator contains that constant. If we came down
16761077d0bdSPeter Avalos * the false branch, or the comparison wasn't with a
16771077d0bdSPeter Avalos * constant, we don't know what was in the accumulator.
16781077d0bdSPeter Avalos *
16791077d0bdSPeter Avalos * We rely on the fact that distinct constants have distinct
16801077d0bdSPeter Avalos * value numbers.
16811077d0bdSPeter Avalos */
16821077d0bdSPeter Avalos return JF(child);
16831077d0bdSPeter Avalos
16841077d0bdSPeter Avalos return 0;
16851077d0bdSPeter Avalos }
16861077d0bdSPeter Avalos
1687*ea16f64eSAntonio Huete Jimenez /*
1688*ea16f64eSAntonio Huete Jimenez * If we can make this edge go directly to a child of the edge's current
1689*ea16f64eSAntonio Huete Jimenez * successor, do so.
1690*ea16f64eSAntonio Huete Jimenez */
16911077d0bdSPeter Avalos static void
opt_j(opt_state_t * opt_state,struct edge * ep)169297a9217aSAntonio Huete Jimenez opt_j(opt_state_t *opt_state, struct edge *ep)
16931077d0bdSPeter Avalos {
1694*ea16f64eSAntonio Huete Jimenez register u_int i, k;
16951077d0bdSPeter Avalos register struct block *target;
16961077d0bdSPeter Avalos
1697*ea16f64eSAntonio Huete Jimenez /*
1698*ea16f64eSAntonio Huete Jimenez * Does this edge go to a block where, if the test
1699*ea16f64eSAntonio Huete Jimenez * at the end of it succeeds, it goes to a block
1700*ea16f64eSAntonio Huete Jimenez * that's a leaf node of the DAG, i.e. a return
1701*ea16f64eSAntonio Huete Jimenez * statement?
1702*ea16f64eSAntonio Huete Jimenez * If so, there's nothing to optimize.
1703*ea16f64eSAntonio Huete Jimenez */
17041077d0bdSPeter Avalos if (JT(ep->succ) == 0)
17051077d0bdSPeter Avalos return;
17061077d0bdSPeter Avalos
1707*ea16f64eSAntonio Huete Jimenez /*
1708*ea16f64eSAntonio Huete Jimenez * Does this edge go to a block that goes, in turn, to
1709*ea16f64eSAntonio Huete Jimenez * the same block regardless of whether the test at the
1710*ea16f64eSAntonio Huete Jimenez * end succeeds or fails?
1711*ea16f64eSAntonio Huete Jimenez */
17121077d0bdSPeter Avalos if (JT(ep->succ) == JF(ep->succ)) {
17131077d0bdSPeter Avalos /*
17141077d0bdSPeter Avalos * Common branch targets can be eliminated, provided
17151077d0bdSPeter Avalos * there is no data dependency.
1716*ea16f64eSAntonio Huete Jimenez *
1717*ea16f64eSAntonio Huete Jimenez * Check whether any register used on exit from the
1718*ea16f64eSAntonio Huete Jimenez * block to which the successor of this edge goes
1719*ea16f64eSAntonio Huete Jimenez * has a value at that point that's different from
1720*ea16f64eSAntonio Huete Jimenez * the value it has on exit from the predecessor of
1721*ea16f64eSAntonio Huete Jimenez * this edge. If not, the predecessor of this edge
1722*ea16f64eSAntonio Huete Jimenez * can just go to the block to which the successor
1723*ea16f64eSAntonio Huete Jimenez * of this edge goes, bypassing the successor of this
1724*ea16f64eSAntonio Huete Jimenez * edge, as the successor of this edge isn't doing
1725*ea16f64eSAntonio Huete Jimenez * any calculations whose results are different
1726*ea16f64eSAntonio Huete Jimenez * from what the blocks before it did and isn't
1727*ea16f64eSAntonio Huete Jimenez * doing any tests the results of which matter.
17281077d0bdSPeter Avalos */
1729*ea16f64eSAntonio Huete Jimenez if (!use_conflict(ep->pred, JT(ep->succ))) {
1730*ea16f64eSAntonio Huete Jimenez /*
1731*ea16f64eSAntonio Huete Jimenez * No, there isn't.
1732*ea16f64eSAntonio Huete Jimenez * Make this edge go to the block to
1733*ea16f64eSAntonio Huete Jimenez * which the successor of that edge
1734*ea16f64eSAntonio Huete Jimenez * goes.
1735*ea16f64eSAntonio Huete Jimenez *
1736*ea16f64eSAntonio Huete Jimenez * XXX - optimizer loop detection.
1737*ea16f64eSAntonio Huete Jimenez */
1738*ea16f64eSAntonio Huete Jimenez opt_state->non_branch_movement_performed = 1;
173997a9217aSAntonio Huete Jimenez opt_state->done = 0;
17401077d0bdSPeter Avalos ep->succ = JT(ep->succ);
17411077d0bdSPeter Avalos }
17421077d0bdSPeter Avalos }
17431077d0bdSPeter Avalos /*
17441077d0bdSPeter Avalos * For each edge dominator that matches the successor of this
17451077d0bdSPeter Avalos * edge, promote the edge successor to the its grandchild.
17461077d0bdSPeter Avalos *
17471077d0bdSPeter Avalos * XXX We violate the set abstraction here in favor a reasonably
17481077d0bdSPeter Avalos * efficient loop.
17491077d0bdSPeter Avalos */
17501077d0bdSPeter Avalos top:
175197a9217aSAntonio Huete Jimenez for (i = 0; i < opt_state->edgewords; ++i) {
1752*ea16f64eSAntonio Huete Jimenez /* i'th word in the bitset of dominators */
17531077d0bdSPeter Avalos register bpf_u_int32 x = ep->edom[i];
17541077d0bdSPeter Avalos
17551077d0bdSPeter Avalos while (x != 0) {
1756*ea16f64eSAntonio Huete Jimenez /* Find the next dominator in that word and mark it as found */
17573a289941SAaron LI k = lowest_set_bit(x);
17583a289941SAaron LI x &=~ ((bpf_u_int32)1 << k);
17591077d0bdSPeter Avalos k += i * BITS_PER_WORD;
17601077d0bdSPeter Avalos
176197a9217aSAntonio Huete Jimenez target = fold_edge(ep->succ, opt_state->edges[k]);
17621077d0bdSPeter Avalos /*
1763*ea16f64eSAntonio Huete Jimenez * We have a candidate to replace the successor
1764*ea16f64eSAntonio Huete Jimenez * of ep.
1765*ea16f64eSAntonio Huete Jimenez *
17661077d0bdSPeter Avalos * Check that there is no data dependency between
1767*ea16f64eSAntonio Huete Jimenez * nodes that will be violated if we move the edge;
1768*ea16f64eSAntonio Huete Jimenez * i.e., if any register used on exit from the
1769*ea16f64eSAntonio Huete Jimenez * candidate has a value at that point different
1770*ea16f64eSAntonio Huete Jimenez * from the value it has when we exit the
1771*ea16f64eSAntonio Huete Jimenez * predecessor of that edge, there's a data
1772*ea16f64eSAntonio Huete Jimenez * dependency that will be violated.
17731077d0bdSPeter Avalos */
17741077d0bdSPeter Avalos if (target != 0 && !use_conflict(ep->pred, target)) {
1775*ea16f64eSAntonio Huete Jimenez /*
1776*ea16f64eSAntonio Huete Jimenez * It's safe to replace the successor of
1777*ea16f64eSAntonio Huete Jimenez * ep; do so, and note that we've made
1778*ea16f64eSAntonio Huete Jimenez * at least one change.
1779*ea16f64eSAntonio Huete Jimenez *
1780*ea16f64eSAntonio Huete Jimenez * XXX - this is one of the operations that
1781*ea16f64eSAntonio Huete Jimenez * happens when the optimizer gets into
1782*ea16f64eSAntonio Huete Jimenez * one of those infinite loops.
1783*ea16f64eSAntonio Huete Jimenez */
178497a9217aSAntonio Huete Jimenez opt_state->done = 0;
17851077d0bdSPeter Avalos ep->succ = target;
17861077d0bdSPeter Avalos if (JT(target) != 0)
17871077d0bdSPeter Avalos /*
17881077d0bdSPeter Avalos * Start over unless we hit a leaf.
17891077d0bdSPeter Avalos */
17901077d0bdSPeter Avalos goto top;
17911077d0bdSPeter Avalos return;
17921077d0bdSPeter Avalos }
17931077d0bdSPeter Avalos }
17941077d0bdSPeter Avalos }
17951077d0bdSPeter Avalos }
17961077d0bdSPeter Avalos
1797*ea16f64eSAntonio Huete Jimenez /*
1798*ea16f64eSAntonio Huete Jimenez * XXX - is this, and and_pullup(), what's described in section 6.1.2
1799*ea16f64eSAntonio Huete Jimenez * "Predicate Assertion Propagation" in the BPF+ paper?
1800*ea16f64eSAntonio Huete Jimenez *
1801*ea16f64eSAntonio Huete Jimenez * Note that this looks at block dominators, not edge dominators.
1802*ea16f64eSAntonio Huete Jimenez * Don't think so.
1803*ea16f64eSAntonio Huete Jimenez *
1804*ea16f64eSAntonio Huete Jimenez * "A or B" compiles into
1805*ea16f64eSAntonio Huete Jimenez *
1806*ea16f64eSAntonio Huete Jimenez * A
1807*ea16f64eSAntonio Huete Jimenez * t / \ f
1808*ea16f64eSAntonio Huete Jimenez * / B
1809*ea16f64eSAntonio Huete Jimenez * / t / \ f
1810*ea16f64eSAntonio Huete Jimenez * \ /
1811*ea16f64eSAntonio Huete Jimenez * \ /
1812*ea16f64eSAntonio Huete Jimenez * X
1813*ea16f64eSAntonio Huete Jimenez *
1814*ea16f64eSAntonio Huete Jimenez *
1815*ea16f64eSAntonio Huete Jimenez */
18161077d0bdSPeter Avalos static void
or_pullup(opt_state_t * opt_state,struct block * b)181797a9217aSAntonio Huete Jimenez or_pullup(opt_state_t *opt_state, struct block *b)
18181077d0bdSPeter Avalos {
1819*ea16f64eSAntonio Huete Jimenez bpf_u_int32 val;
1820*ea16f64eSAntonio Huete Jimenez int at_top;
18211077d0bdSPeter Avalos struct block *pull;
18221077d0bdSPeter Avalos struct block **diffp, **samep;
18231077d0bdSPeter Avalos struct edge *ep;
18241077d0bdSPeter Avalos
18251077d0bdSPeter Avalos ep = b->in_edges;
18261077d0bdSPeter Avalos if (ep == 0)
18271077d0bdSPeter Avalos return;
18281077d0bdSPeter Avalos
18291077d0bdSPeter Avalos /*
18301077d0bdSPeter Avalos * Make sure each predecessor loads the same value.
18311077d0bdSPeter Avalos * XXX why?
18321077d0bdSPeter Avalos */
18331077d0bdSPeter Avalos val = ep->pred->val[A_ATOM];
18341077d0bdSPeter Avalos for (ep = ep->next; ep != 0; ep = ep->next)
18351077d0bdSPeter Avalos if (val != ep->pred->val[A_ATOM])
18361077d0bdSPeter Avalos return;
18371077d0bdSPeter Avalos
1838*ea16f64eSAntonio Huete Jimenez /*
1839*ea16f64eSAntonio Huete Jimenez * For the first edge in the list of edges coming into this block,
1840*ea16f64eSAntonio Huete Jimenez * see whether the predecessor of that edge comes here via a true
1841*ea16f64eSAntonio Huete Jimenez * branch or a false branch.
1842*ea16f64eSAntonio Huete Jimenez */
18431077d0bdSPeter Avalos if (JT(b->in_edges->pred) == b)
1844*ea16f64eSAntonio Huete Jimenez diffp = &JT(b->in_edges->pred); /* jt */
18451077d0bdSPeter Avalos else
1846*ea16f64eSAntonio Huete Jimenez diffp = &JF(b->in_edges->pred); /* jf */
18471077d0bdSPeter Avalos
1848*ea16f64eSAntonio Huete Jimenez /*
1849*ea16f64eSAntonio Huete Jimenez * diffp is a pointer to a pointer to the block.
1850*ea16f64eSAntonio Huete Jimenez *
1851*ea16f64eSAntonio Huete Jimenez * Go down the false chain looking as far as you can,
1852*ea16f64eSAntonio Huete Jimenez * making sure that each jump-compare is doing the
1853*ea16f64eSAntonio Huete Jimenez * same as the original block.
1854*ea16f64eSAntonio Huete Jimenez *
1855*ea16f64eSAntonio Huete Jimenez * If you reach the bottom before you reach a
1856*ea16f64eSAntonio Huete Jimenez * different jump-compare, just exit. There's nothing
1857*ea16f64eSAntonio Huete Jimenez * to do here. XXX - no, this version is checking for
1858*ea16f64eSAntonio Huete Jimenez * the value leaving the block; that's from the BPF+
1859*ea16f64eSAntonio Huete Jimenez * pullup routine.
1860*ea16f64eSAntonio Huete Jimenez */
18611077d0bdSPeter Avalos at_top = 1;
18623a289941SAaron LI for (;;) {
1863*ea16f64eSAntonio Huete Jimenez /*
1864*ea16f64eSAntonio Huete Jimenez * Done if that's not going anywhere XXX
1865*ea16f64eSAntonio Huete Jimenez */
18661077d0bdSPeter Avalos if (*diffp == 0)
18671077d0bdSPeter Avalos return;
18681077d0bdSPeter Avalos
1869*ea16f64eSAntonio Huete Jimenez /*
1870*ea16f64eSAntonio Huete Jimenez * Done if that predecessor blah blah blah isn't
1871*ea16f64eSAntonio Huete Jimenez * going the same place we're going XXX
1872*ea16f64eSAntonio Huete Jimenez *
1873*ea16f64eSAntonio Huete Jimenez * Does the true edge of this block point to the same
1874*ea16f64eSAntonio Huete Jimenez * location as the true edge of b?
1875*ea16f64eSAntonio Huete Jimenez */
18761077d0bdSPeter Avalos if (JT(*diffp) != JT(b))
18771077d0bdSPeter Avalos return;
18781077d0bdSPeter Avalos
1879*ea16f64eSAntonio Huete Jimenez /*
1880*ea16f64eSAntonio Huete Jimenez * Done if this node isn't a dominator of that
1881*ea16f64eSAntonio Huete Jimenez * node blah blah blah XXX
1882*ea16f64eSAntonio Huete Jimenez *
1883*ea16f64eSAntonio Huete Jimenez * Does b dominate diffp?
1884*ea16f64eSAntonio Huete Jimenez */
18851077d0bdSPeter Avalos if (!SET_MEMBER((*diffp)->dom, b->id))
18861077d0bdSPeter Avalos return;
18871077d0bdSPeter Avalos
1888*ea16f64eSAntonio Huete Jimenez /*
1889*ea16f64eSAntonio Huete Jimenez * Break out of the loop if that node's value of A
1890*ea16f64eSAntonio Huete Jimenez * isn't the value of A above XXX
1891*ea16f64eSAntonio Huete Jimenez */
18921077d0bdSPeter Avalos if ((*diffp)->val[A_ATOM] != val)
18931077d0bdSPeter Avalos break;
18941077d0bdSPeter Avalos
1895*ea16f64eSAntonio Huete Jimenez /*
1896*ea16f64eSAntonio Huete Jimenez * Get the JF for that node XXX
1897*ea16f64eSAntonio Huete Jimenez * Go down the false path.
1898*ea16f64eSAntonio Huete Jimenez */
18991077d0bdSPeter Avalos diffp = &JF(*diffp);
19001077d0bdSPeter Avalos at_top = 0;
19011077d0bdSPeter Avalos }
1902*ea16f64eSAntonio Huete Jimenez
1903*ea16f64eSAntonio Huete Jimenez /*
1904*ea16f64eSAntonio Huete Jimenez * Now that we've found a different jump-compare in a chain
1905*ea16f64eSAntonio Huete Jimenez * below b, search further down until we find another
1906*ea16f64eSAntonio Huete Jimenez * jump-compare that looks at the original value. This
1907*ea16f64eSAntonio Huete Jimenez * jump-compare should get pulled up. XXX again we're
1908*ea16f64eSAntonio Huete Jimenez * comparing values not jump-compares.
1909*ea16f64eSAntonio Huete Jimenez */
19101077d0bdSPeter Avalos samep = &JF(*diffp);
19113a289941SAaron LI for (;;) {
1912*ea16f64eSAntonio Huete Jimenez /*
1913*ea16f64eSAntonio Huete Jimenez * Done if that's not going anywhere XXX
1914*ea16f64eSAntonio Huete Jimenez */
19151077d0bdSPeter Avalos if (*samep == 0)
19161077d0bdSPeter Avalos return;
19171077d0bdSPeter Avalos
1918*ea16f64eSAntonio Huete Jimenez /*
1919*ea16f64eSAntonio Huete Jimenez * Done if that predecessor blah blah blah isn't
1920*ea16f64eSAntonio Huete Jimenez * going the same place we're going XXX
1921*ea16f64eSAntonio Huete Jimenez */
19221077d0bdSPeter Avalos if (JT(*samep) != JT(b))
19231077d0bdSPeter Avalos return;
19241077d0bdSPeter Avalos
1925*ea16f64eSAntonio Huete Jimenez /*
1926*ea16f64eSAntonio Huete Jimenez * Done if this node isn't a dominator of that
1927*ea16f64eSAntonio Huete Jimenez * node blah blah blah XXX
1928*ea16f64eSAntonio Huete Jimenez *
1929*ea16f64eSAntonio Huete Jimenez * Does b dominate samep?
1930*ea16f64eSAntonio Huete Jimenez */
19311077d0bdSPeter Avalos if (!SET_MEMBER((*samep)->dom, b->id))
19321077d0bdSPeter Avalos return;
19331077d0bdSPeter Avalos
1934*ea16f64eSAntonio Huete Jimenez /*
1935*ea16f64eSAntonio Huete Jimenez * Break out of the loop if that node's value of A
1936*ea16f64eSAntonio Huete Jimenez * is the value of A above XXX
1937*ea16f64eSAntonio Huete Jimenez */
19381077d0bdSPeter Avalos if ((*samep)->val[A_ATOM] == val)
19391077d0bdSPeter Avalos break;
19401077d0bdSPeter Avalos
19411077d0bdSPeter Avalos /* XXX Need to check that there are no data dependencies
19421077d0bdSPeter Avalos between dp0 and dp1. Currently, the code generator
19431077d0bdSPeter Avalos will not produce such dependencies. */
19441077d0bdSPeter Avalos samep = &JF(*samep);
19451077d0bdSPeter Avalos }
19461077d0bdSPeter Avalos #ifdef notdef
19471077d0bdSPeter Avalos /* XXX This doesn't cover everything. */
19481077d0bdSPeter Avalos for (i = 0; i < N_ATOMS; ++i)
19491077d0bdSPeter Avalos if ((*samep)->val[i] != pred->val[i])
19501077d0bdSPeter Avalos return;
19511077d0bdSPeter Avalos #endif
19521077d0bdSPeter Avalos /* Pull up the node. */
19531077d0bdSPeter Avalos pull = *samep;
19541077d0bdSPeter Avalos *samep = JF(pull);
19551077d0bdSPeter Avalos JF(pull) = *diffp;
19561077d0bdSPeter Avalos
19571077d0bdSPeter Avalos /*
19581077d0bdSPeter Avalos * At the top of the chain, each predecessor needs to point at the
19591077d0bdSPeter Avalos * pulled up node. Inside the chain, there is only one predecessor
19601077d0bdSPeter Avalos * to worry about.
19611077d0bdSPeter Avalos */
19621077d0bdSPeter Avalos if (at_top) {
19631077d0bdSPeter Avalos for (ep = b->in_edges; ep != 0; ep = ep->next) {
19641077d0bdSPeter Avalos if (JT(ep->pred) == b)
19651077d0bdSPeter Avalos JT(ep->pred) = pull;
19661077d0bdSPeter Avalos else
19671077d0bdSPeter Avalos JF(ep->pred) = pull;
19681077d0bdSPeter Avalos }
19691077d0bdSPeter Avalos }
19701077d0bdSPeter Avalos else
19711077d0bdSPeter Avalos *diffp = pull;
19721077d0bdSPeter Avalos
1973*ea16f64eSAntonio Huete Jimenez /*
1974*ea16f64eSAntonio Huete Jimenez * XXX - this is one of the operations that happens when the
1975*ea16f64eSAntonio Huete Jimenez * optimizer gets into one of those infinite loops.
1976*ea16f64eSAntonio Huete Jimenez */
197797a9217aSAntonio Huete Jimenez opt_state->done = 0;
19781077d0bdSPeter Avalos }
19791077d0bdSPeter Avalos
19801077d0bdSPeter Avalos static void
and_pullup(opt_state_t * opt_state,struct block * b)198197a9217aSAntonio Huete Jimenez and_pullup(opt_state_t *opt_state, struct block *b)
19821077d0bdSPeter Avalos {
1983*ea16f64eSAntonio Huete Jimenez bpf_u_int32 val;
1984*ea16f64eSAntonio Huete Jimenez int at_top;
19851077d0bdSPeter Avalos struct block *pull;
19861077d0bdSPeter Avalos struct block **diffp, **samep;
19871077d0bdSPeter Avalos struct edge *ep;
19881077d0bdSPeter Avalos
19891077d0bdSPeter Avalos ep = b->in_edges;
19901077d0bdSPeter Avalos if (ep == 0)
19911077d0bdSPeter Avalos return;
19921077d0bdSPeter Avalos
19931077d0bdSPeter Avalos /*
19941077d0bdSPeter Avalos * Make sure each predecessor loads the same value.
19951077d0bdSPeter Avalos */
19961077d0bdSPeter Avalos val = ep->pred->val[A_ATOM];
19971077d0bdSPeter Avalos for (ep = ep->next; ep != 0; ep = ep->next)
19981077d0bdSPeter Avalos if (val != ep->pred->val[A_ATOM])
19991077d0bdSPeter Avalos return;
20001077d0bdSPeter Avalos
20011077d0bdSPeter Avalos if (JT(b->in_edges->pred) == b)
20021077d0bdSPeter Avalos diffp = &JT(b->in_edges->pred);
20031077d0bdSPeter Avalos else
20041077d0bdSPeter Avalos diffp = &JF(b->in_edges->pred);
20051077d0bdSPeter Avalos
20061077d0bdSPeter Avalos at_top = 1;
20073a289941SAaron LI for (;;) {
20081077d0bdSPeter Avalos if (*diffp == 0)
20091077d0bdSPeter Avalos return;
20101077d0bdSPeter Avalos
20111077d0bdSPeter Avalos if (JF(*diffp) != JF(b))
20121077d0bdSPeter Avalos return;
20131077d0bdSPeter Avalos
20141077d0bdSPeter Avalos if (!SET_MEMBER((*diffp)->dom, b->id))
20151077d0bdSPeter Avalos return;
20161077d0bdSPeter Avalos
20171077d0bdSPeter Avalos if ((*diffp)->val[A_ATOM] != val)
20181077d0bdSPeter Avalos break;
20191077d0bdSPeter Avalos
20201077d0bdSPeter Avalos diffp = &JT(*diffp);
20211077d0bdSPeter Avalos at_top = 0;
20221077d0bdSPeter Avalos }
20231077d0bdSPeter Avalos samep = &JT(*diffp);
20243a289941SAaron LI for (;;) {
20251077d0bdSPeter Avalos if (*samep == 0)
20261077d0bdSPeter Avalos return;
20271077d0bdSPeter Avalos
20281077d0bdSPeter Avalos if (JF(*samep) != JF(b))
20291077d0bdSPeter Avalos return;
20301077d0bdSPeter Avalos
20311077d0bdSPeter Avalos if (!SET_MEMBER((*samep)->dom, b->id))
20321077d0bdSPeter Avalos return;
20331077d0bdSPeter Avalos
20341077d0bdSPeter Avalos if ((*samep)->val[A_ATOM] == val)
20351077d0bdSPeter Avalos break;
20361077d0bdSPeter Avalos
20371077d0bdSPeter Avalos /* XXX Need to check that there are no data dependencies
20381077d0bdSPeter Avalos between diffp and samep. Currently, the code generator
20391077d0bdSPeter Avalos will not produce such dependencies. */
20401077d0bdSPeter Avalos samep = &JT(*samep);
20411077d0bdSPeter Avalos }
20421077d0bdSPeter Avalos #ifdef notdef
20431077d0bdSPeter Avalos /* XXX This doesn't cover everything. */
20441077d0bdSPeter Avalos for (i = 0; i < N_ATOMS; ++i)
20451077d0bdSPeter Avalos if ((*samep)->val[i] != pred->val[i])
20461077d0bdSPeter Avalos return;
20471077d0bdSPeter Avalos #endif
20481077d0bdSPeter Avalos /* Pull up the node. */
20491077d0bdSPeter Avalos pull = *samep;
20501077d0bdSPeter Avalos *samep = JT(pull);
20511077d0bdSPeter Avalos JT(pull) = *diffp;
20521077d0bdSPeter Avalos
20531077d0bdSPeter Avalos /*
20541077d0bdSPeter Avalos * At the top of the chain, each predecessor needs to point at the
20551077d0bdSPeter Avalos * pulled up node. Inside the chain, there is only one predecessor
20561077d0bdSPeter Avalos * to worry about.
20571077d0bdSPeter Avalos */
20581077d0bdSPeter Avalos if (at_top) {
20591077d0bdSPeter Avalos for (ep = b->in_edges; ep != 0; ep = ep->next) {
20601077d0bdSPeter Avalos if (JT(ep->pred) == b)
20611077d0bdSPeter Avalos JT(ep->pred) = pull;
20621077d0bdSPeter Avalos else
20631077d0bdSPeter Avalos JF(ep->pred) = pull;
20641077d0bdSPeter Avalos }
20651077d0bdSPeter Avalos }
20661077d0bdSPeter Avalos else
20671077d0bdSPeter Avalos *diffp = pull;
20681077d0bdSPeter Avalos
2069*ea16f64eSAntonio Huete Jimenez /*
2070*ea16f64eSAntonio Huete Jimenez * XXX - this is one of the operations that happens when the
2071*ea16f64eSAntonio Huete Jimenez * optimizer gets into one of those infinite loops.
2072*ea16f64eSAntonio Huete Jimenez */
207397a9217aSAntonio Huete Jimenez opt_state->done = 0;
20741077d0bdSPeter Avalos }
20751077d0bdSPeter Avalos
20761077d0bdSPeter Avalos static void
opt_blks(opt_state_t * opt_state,struct icode * ic,int do_stmts)20773a289941SAaron LI opt_blks(opt_state_t *opt_state, struct icode *ic, int do_stmts)
20781077d0bdSPeter Avalos {
20791077d0bdSPeter Avalos int i, maxlevel;
20801077d0bdSPeter Avalos struct block *p;
20811077d0bdSPeter Avalos
208297a9217aSAntonio Huete Jimenez init_val(opt_state);
208397a9217aSAntonio Huete Jimenez maxlevel = ic->root->level;
20841077d0bdSPeter Avalos
208597a9217aSAntonio Huete Jimenez find_inedges(opt_state, ic->root);
20861077d0bdSPeter Avalos for (i = maxlevel; i >= 0; --i)
208797a9217aSAntonio Huete Jimenez for (p = opt_state->levels[i]; p; p = p->link)
20883a289941SAaron LI opt_blk(opt_state, p, do_stmts);
20891077d0bdSPeter Avalos
20901077d0bdSPeter Avalos if (do_stmts)
20911077d0bdSPeter Avalos /*
20921077d0bdSPeter Avalos * No point trying to move branches; it can't possibly
20931077d0bdSPeter Avalos * make a difference at this point.
2094*ea16f64eSAntonio Huete Jimenez *
2095*ea16f64eSAntonio Huete Jimenez * XXX - this might be after we detect a loop where
2096*ea16f64eSAntonio Huete Jimenez * we were just looping infinitely moving branches
2097*ea16f64eSAntonio Huete Jimenez * in such a fashion that we went through two or more
2098*ea16f64eSAntonio Huete Jimenez * versions of the machine code, eventually returning
2099*ea16f64eSAntonio Huete Jimenez * to the first version. (We're really not doing a
2100*ea16f64eSAntonio Huete Jimenez * full loop detection, we're just testing for two
2101*ea16f64eSAntonio Huete Jimenez * passes in a row where where we do nothing but
2102*ea16f64eSAntonio Huete Jimenez * move branches.)
21031077d0bdSPeter Avalos */
21041077d0bdSPeter Avalos return;
21051077d0bdSPeter Avalos
2106*ea16f64eSAntonio Huete Jimenez /*
2107*ea16f64eSAntonio Huete Jimenez * Is this what the BPF+ paper describes in sections 6.1.1,
2108*ea16f64eSAntonio Huete Jimenez * 6.1.2, and 6.1.3?
2109*ea16f64eSAntonio Huete Jimenez */
21101077d0bdSPeter Avalos for (i = 1; i <= maxlevel; ++i) {
211197a9217aSAntonio Huete Jimenez for (p = opt_state->levels[i]; p; p = p->link) {
211297a9217aSAntonio Huete Jimenez opt_j(opt_state, &p->et);
211397a9217aSAntonio Huete Jimenez opt_j(opt_state, &p->ef);
21141077d0bdSPeter Avalos }
21151077d0bdSPeter Avalos }
21161077d0bdSPeter Avalos
211797a9217aSAntonio Huete Jimenez find_inedges(opt_state, ic->root);
21181077d0bdSPeter Avalos for (i = 1; i <= maxlevel; ++i) {
211997a9217aSAntonio Huete Jimenez for (p = opt_state->levels[i]; p; p = p->link) {
212097a9217aSAntonio Huete Jimenez or_pullup(opt_state, p);
212197a9217aSAntonio Huete Jimenez and_pullup(opt_state, p);
21221077d0bdSPeter Avalos }
21231077d0bdSPeter Avalos }
21241077d0bdSPeter Avalos }
21251077d0bdSPeter Avalos
21261077d0bdSPeter Avalos static inline void
link_inedge(struct edge * parent,struct block * child)21270e381983SMatthew Dillon link_inedge(struct edge *parent, struct block *child)
21281077d0bdSPeter Avalos {
21291077d0bdSPeter Avalos parent->next = child->in_edges;
21301077d0bdSPeter Avalos child->in_edges = parent;
21311077d0bdSPeter Avalos }
21321077d0bdSPeter Avalos
21331077d0bdSPeter Avalos static void
find_inedges(opt_state_t * opt_state,struct block * root)213497a9217aSAntonio Huete Jimenez find_inedges(opt_state_t *opt_state, struct block *root)
21351077d0bdSPeter Avalos {
2136*ea16f64eSAntonio Huete Jimenez u_int i;
2137*ea16f64eSAntonio Huete Jimenez int level;
21381077d0bdSPeter Avalos struct block *b;
21391077d0bdSPeter Avalos
214097a9217aSAntonio Huete Jimenez for (i = 0; i < opt_state->n_blocks; ++i)
214197a9217aSAntonio Huete Jimenez opt_state->blocks[i]->in_edges = 0;
21421077d0bdSPeter Avalos
21431077d0bdSPeter Avalos /*
21441077d0bdSPeter Avalos * Traverse the graph, adding each edge to the predecessor
21451077d0bdSPeter Avalos * list of its successors. Skip the leaves (i.e. level 0).
21461077d0bdSPeter Avalos */
2147*ea16f64eSAntonio Huete Jimenez for (level = root->level; level > 0; --level) {
2148*ea16f64eSAntonio Huete Jimenez for (b = opt_state->levels[level]; b != 0; b = b->link) {
21491077d0bdSPeter Avalos link_inedge(&b->et, JT(b));
21501077d0bdSPeter Avalos link_inedge(&b->ef, JF(b));
21511077d0bdSPeter Avalos }
21521077d0bdSPeter Avalos }
21531077d0bdSPeter Avalos }
21541077d0bdSPeter Avalos
21551077d0bdSPeter Avalos static void
opt_root(struct block ** b)21560e381983SMatthew Dillon opt_root(struct block **b)
21571077d0bdSPeter Avalos {
21581077d0bdSPeter Avalos struct slist *tmp, *s;
21591077d0bdSPeter Avalos
21601077d0bdSPeter Avalos s = (*b)->stmts;
21611077d0bdSPeter Avalos (*b)->stmts = 0;
21621077d0bdSPeter Avalos while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
21631077d0bdSPeter Avalos *b = JT(*b);
21641077d0bdSPeter Avalos
21651077d0bdSPeter Avalos tmp = (*b)->stmts;
21661077d0bdSPeter Avalos if (tmp != 0)
21671077d0bdSPeter Avalos sappend(s, tmp);
21681077d0bdSPeter Avalos (*b)->stmts = s;
21691077d0bdSPeter Avalos
21701077d0bdSPeter Avalos /*
21711077d0bdSPeter Avalos * If the root node is a return, then there is no
21721077d0bdSPeter Avalos * point executing any statements (since the bpf machine
21731077d0bdSPeter Avalos * has no side effects).
21741077d0bdSPeter Avalos */
21751077d0bdSPeter Avalos if (BPF_CLASS((*b)->s.code) == BPF_RET)
21761077d0bdSPeter Avalos (*b)->stmts = 0;
21771077d0bdSPeter Avalos }
21781077d0bdSPeter Avalos
21791077d0bdSPeter Avalos static void
opt_loop(opt_state_t * opt_state,struct icode * ic,int do_stmts)21803a289941SAaron LI opt_loop(opt_state_t *opt_state, struct icode *ic, int do_stmts)
21811077d0bdSPeter Avalos {
21821077d0bdSPeter Avalos
21831077d0bdSPeter Avalos #ifdef BDEBUG
21843a289941SAaron LI if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
21851077d0bdSPeter Avalos printf("opt_loop(root, %d) begin\n", do_stmts);
21863a289941SAaron LI opt_dump(opt_state, ic);
21871077d0bdSPeter Avalos }
21881077d0bdSPeter Avalos #endif
2189*ea16f64eSAntonio Huete Jimenez
2190*ea16f64eSAntonio Huete Jimenez /*
2191*ea16f64eSAntonio Huete Jimenez * XXX - optimizer loop detection.
2192*ea16f64eSAntonio Huete Jimenez */
2193*ea16f64eSAntonio Huete Jimenez int loop_count = 0;
2194*ea16f64eSAntonio Huete Jimenez for (;;) {
219597a9217aSAntonio Huete Jimenez opt_state->done = 1;
2196*ea16f64eSAntonio Huete Jimenez /*
2197*ea16f64eSAntonio Huete Jimenez * XXX - optimizer loop detection.
2198*ea16f64eSAntonio Huete Jimenez */
2199*ea16f64eSAntonio Huete Jimenez opt_state->non_branch_movement_performed = 0;
220097a9217aSAntonio Huete Jimenez find_levels(opt_state, ic);
220197a9217aSAntonio Huete Jimenez find_dom(opt_state, ic->root);
220297a9217aSAntonio Huete Jimenez find_closure(opt_state, ic->root);
220397a9217aSAntonio Huete Jimenez find_ud(opt_state, ic->root);
220497a9217aSAntonio Huete Jimenez find_edom(opt_state, ic->root);
22053a289941SAaron LI opt_blks(opt_state, ic, do_stmts);
22061077d0bdSPeter Avalos #ifdef BDEBUG
22073a289941SAaron LI if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
220897a9217aSAntonio Huete Jimenez printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, opt_state->done);
22093a289941SAaron LI opt_dump(opt_state, ic);
22101077d0bdSPeter Avalos }
22111077d0bdSPeter Avalos #endif
2212*ea16f64eSAntonio Huete Jimenez
2213*ea16f64eSAntonio Huete Jimenez /*
2214*ea16f64eSAntonio Huete Jimenez * Was anything done in this optimizer pass?
2215*ea16f64eSAntonio Huete Jimenez */
2216*ea16f64eSAntonio Huete Jimenez if (opt_state->done) {
2217*ea16f64eSAntonio Huete Jimenez /*
2218*ea16f64eSAntonio Huete Jimenez * No, so we've reached a fixed point.
2219*ea16f64eSAntonio Huete Jimenez * We're done.
2220*ea16f64eSAntonio Huete Jimenez */
2221*ea16f64eSAntonio Huete Jimenez break;
2222*ea16f64eSAntonio Huete Jimenez }
2223*ea16f64eSAntonio Huete Jimenez
2224*ea16f64eSAntonio Huete Jimenez /*
2225*ea16f64eSAntonio Huete Jimenez * XXX - was anything done other than branch movement
2226*ea16f64eSAntonio Huete Jimenez * in this pass?
2227*ea16f64eSAntonio Huete Jimenez */
2228*ea16f64eSAntonio Huete Jimenez if (opt_state->non_branch_movement_performed) {
2229*ea16f64eSAntonio Huete Jimenez /*
2230*ea16f64eSAntonio Huete Jimenez * Yes. Clear any loop-detection counter;
2231*ea16f64eSAntonio Huete Jimenez * we're making some form of progress (assuming
2232*ea16f64eSAntonio Huete Jimenez * we can't get into a cycle doing *other*
2233*ea16f64eSAntonio Huete Jimenez * optimizations...).
2234*ea16f64eSAntonio Huete Jimenez */
2235*ea16f64eSAntonio Huete Jimenez loop_count = 0;
2236*ea16f64eSAntonio Huete Jimenez } else {
2237*ea16f64eSAntonio Huete Jimenez /*
2238*ea16f64eSAntonio Huete Jimenez * No - increment the counter, and quit if
2239*ea16f64eSAntonio Huete Jimenez * it's up to 100.
2240*ea16f64eSAntonio Huete Jimenez */
2241*ea16f64eSAntonio Huete Jimenez loop_count++;
2242*ea16f64eSAntonio Huete Jimenez if (loop_count >= 100) {
2243*ea16f64eSAntonio Huete Jimenez /*
2244*ea16f64eSAntonio Huete Jimenez * We've done nothing but branch movement
2245*ea16f64eSAntonio Huete Jimenez * for 100 passes; we're probably
2246*ea16f64eSAntonio Huete Jimenez * in a cycle and will never reach a
2247*ea16f64eSAntonio Huete Jimenez * fixed point.
2248*ea16f64eSAntonio Huete Jimenez *
2249*ea16f64eSAntonio Huete Jimenez * XXX - yes, we really need a non-
2250*ea16f64eSAntonio Huete Jimenez * heuristic way of detecting a cycle.
2251*ea16f64eSAntonio Huete Jimenez */
2252*ea16f64eSAntonio Huete Jimenez opt_state->done = 1;
2253*ea16f64eSAntonio Huete Jimenez break;
2254*ea16f64eSAntonio Huete Jimenez }
2255*ea16f64eSAntonio Huete Jimenez }
2256*ea16f64eSAntonio Huete Jimenez }
22571077d0bdSPeter Avalos }
22581077d0bdSPeter Avalos
22591077d0bdSPeter Avalos /*
22601077d0bdSPeter Avalos * Optimize the filter code in its dag representation.
22613a289941SAaron LI * Return 0 on success, -1 on error.
22621077d0bdSPeter Avalos */
22633a289941SAaron LI int
bpf_optimize(struct icode * ic,char * errbuf)22643a289941SAaron LI bpf_optimize(struct icode *ic, char *errbuf)
22651077d0bdSPeter Avalos {
226697a9217aSAntonio Huete Jimenez opt_state_t opt_state;
22671077d0bdSPeter Avalos
22683a289941SAaron LI memset(&opt_state, 0, sizeof(opt_state));
22693a289941SAaron LI opt_state.errbuf = errbuf;
2270*ea16f64eSAntonio Huete Jimenez opt_state.non_branch_movement_performed = 0;
22713a289941SAaron LI if (setjmp(opt_state.top_ctx)) {
22723a289941SAaron LI opt_cleanup(&opt_state);
22733a289941SAaron LI return -1;
22743a289941SAaron LI }
22753a289941SAaron LI opt_init(&opt_state, ic);
22763a289941SAaron LI opt_loop(&opt_state, ic, 0);
22773a289941SAaron LI opt_loop(&opt_state, ic, 1);
227897a9217aSAntonio Huete Jimenez intern_blocks(&opt_state, ic);
22791077d0bdSPeter Avalos #ifdef BDEBUG
22803a289941SAaron LI if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
22811077d0bdSPeter Avalos printf("after intern_blocks()\n");
22823a289941SAaron LI opt_dump(&opt_state, ic);
22831077d0bdSPeter Avalos }
22841077d0bdSPeter Avalos #endif
228597a9217aSAntonio Huete Jimenez opt_root(&ic->root);
22861077d0bdSPeter Avalos #ifdef BDEBUG
22873a289941SAaron LI if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
22881077d0bdSPeter Avalos printf("after opt_root()\n");
22893a289941SAaron LI opt_dump(&opt_state, ic);
22901077d0bdSPeter Avalos }
22911077d0bdSPeter Avalos #endif
229297a9217aSAntonio Huete Jimenez opt_cleanup(&opt_state);
22933a289941SAaron LI return 0;
22941077d0bdSPeter Avalos }
22951077d0bdSPeter Avalos
22961077d0bdSPeter Avalos static void
make_marks(struct icode * ic,struct block * p)229797a9217aSAntonio Huete Jimenez make_marks(struct icode *ic, struct block *p)
22981077d0bdSPeter Avalos {
229997a9217aSAntonio Huete Jimenez if (!isMarked(ic, p)) {
230097a9217aSAntonio Huete Jimenez Mark(ic, p);
23011077d0bdSPeter Avalos if (BPF_CLASS(p->s.code) != BPF_RET) {
230297a9217aSAntonio Huete Jimenez make_marks(ic, JT(p));
230397a9217aSAntonio Huete Jimenez make_marks(ic, JF(p));
23041077d0bdSPeter Avalos }
23051077d0bdSPeter Avalos }
23061077d0bdSPeter Avalos }
23071077d0bdSPeter Avalos
23081077d0bdSPeter Avalos /*
230997a9217aSAntonio Huete Jimenez * Mark code array such that isMarked(ic->cur_mark, i) is true
23101077d0bdSPeter Avalos * only for nodes that are alive.
23111077d0bdSPeter Avalos */
23121077d0bdSPeter Avalos static void
mark_code(struct icode * ic)231397a9217aSAntonio Huete Jimenez mark_code(struct icode *ic)
23141077d0bdSPeter Avalos {
231597a9217aSAntonio Huete Jimenez ic->cur_mark += 1;
231697a9217aSAntonio Huete Jimenez make_marks(ic, ic->root);
23171077d0bdSPeter Avalos }
23181077d0bdSPeter Avalos
23191077d0bdSPeter Avalos /*
23201077d0bdSPeter Avalos * True iff the two stmt lists load the same value from the packet into
23211077d0bdSPeter Avalos * the accumulator.
23221077d0bdSPeter Avalos */
23231077d0bdSPeter Avalos static int
eq_slist(struct slist * x,struct slist * y)23240e381983SMatthew Dillon eq_slist(struct slist *x, struct slist *y)
23251077d0bdSPeter Avalos {
23263a289941SAaron LI for (;;) {
23271077d0bdSPeter Avalos while (x && x->s.code == NOP)
23281077d0bdSPeter Avalos x = x->next;
23291077d0bdSPeter Avalos while (y && y->s.code == NOP)
23301077d0bdSPeter Avalos y = y->next;
23311077d0bdSPeter Avalos if (x == 0)
23321077d0bdSPeter Avalos return y == 0;
23331077d0bdSPeter Avalos if (y == 0)
23341077d0bdSPeter Avalos return x == 0;
23351077d0bdSPeter Avalos if (x->s.code != y->s.code || x->s.k != y->s.k)
23361077d0bdSPeter Avalos return 0;
23371077d0bdSPeter Avalos x = x->next;
23381077d0bdSPeter Avalos y = y->next;
23391077d0bdSPeter Avalos }
23401077d0bdSPeter Avalos }
23411077d0bdSPeter Avalos
23421077d0bdSPeter Avalos static inline int
eq_blk(struct block * b0,struct block * b1)23430e381983SMatthew Dillon eq_blk(struct block *b0, struct block *b1)
23441077d0bdSPeter Avalos {
23451077d0bdSPeter Avalos if (b0->s.code == b1->s.code &&
23461077d0bdSPeter Avalos b0->s.k == b1->s.k &&
23471077d0bdSPeter Avalos b0->et.succ == b1->et.succ &&
23481077d0bdSPeter Avalos b0->ef.succ == b1->ef.succ)
23491077d0bdSPeter Avalos return eq_slist(b0->stmts, b1->stmts);
23501077d0bdSPeter Avalos return 0;
23511077d0bdSPeter Avalos }
23521077d0bdSPeter Avalos
23531077d0bdSPeter Avalos static void
intern_blocks(opt_state_t * opt_state,struct icode * ic)235497a9217aSAntonio Huete Jimenez intern_blocks(opt_state_t *opt_state, struct icode *ic)
23551077d0bdSPeter Avalos {
23561077d0bdSPeter Avalos struct block *p;
2357*ea16f64eSAntonio Huete Jimenez u_int i, j;
23581077d0bdSPeter Avalos int done1; /* don't shadow global */
23591077d0bdSPeter Avalos top:
23601077d0bdSPeter Avalos done1 = 1;
236197a9217aSAntonio Huete Jimenez for (i = 0; i < opt_state->n_blocks; ++i)
236297a9217aSAntonio Huete Jimenez opt_state->blocks[i]->link = 0;
23631077d0bdSPeter Avalos
236497a9217aSAntonio Huete Jimenez mark_code(ic);
23651077d0bdSPeter Avalos
2366*ea16f64eSAntonio Huete Jimenez for (i = opt_state->n_blocks - 1; i != 0; ) {
2367*ea16f64eSAntonio Huete Jimenez --i;
236897a9217aSAntonio Huete Jimenez if (!isMarked(ic, opt_state->blocks[i]))
23691077d0bdSPeter Avalos continue;
237097a9217aSAntonio Huete Jimenez for (j = i + 1; j < opt_state->n_blocks; ++j) {
237197a9217aSAntonio Huete Jimenez if (!isMarked(ic, opt_state->blocks[j]))
23721077d0bdSPeter Avalos continue;
237397a9217aSAntonio Huete Jimenez if (eq_blk(opt_state->blocks[i], opt_state->blocks[j])) {
237497a9217aSAntonio Huete Jimenez opt_state->blocks[i]->link = opt_state->blocks[j]->link ?
237597a9217aSAntonio Huete Jimenez opt_state->blocks[j]->link : opt_state->blocks[j];
23761077d0bdSPeter Avalos break;
23771077d0bdSPeter Avalos }
23781077d0bdSPeter Avalos }
23791077d0bdSPeter Avalos }
238097a9217aSAntonio Huete Jimenez for (i = 0; i < opt_state->n_blocks; ++i) {
238197a9217aSAntonio Huete Jimenez p = opt_state->blocks[i];
23821077d0bdSPeter Avalos if (JT(p) == 0)
23831077d0bdSPeter Avalos continue;
23841077d0bdSPeter Avalos if (JT(p)->link) {
23851077d0bdSPeter Avalos done1 = 0;
23861077d0bdSPeter Avalos JT(p) = JT(p)->link;
23871077d0bdSPeter Avalos }
23881077d0bdSPeter Avalos if (JF(p)->link) {
23891077d0bdSPeter Avalos done1 = 0;
23901077d0bdSPeter Avalos JF(p) = JF(p)->link;
23911077d0bdSPeter Avalos }
23921077d0bdSPeter Avalos }
23931077d0bdSPeter Avalos if (!done1)
23941077d0bdSPeter Avalos goto top;
23951077d0bdSPeter Avalos }
23961077d0bdSPeter Avalos
23971077d0bdSPeter Avalos static void
opt_cleanup(opt_state_t * opt_state)239897a9217aSAntonio Huete Jimenez opt_cleanup(opt_state_t *opt_state)
23991077d0bdSPeter Avalos {
240097a9217aSAntonio Huete Jimenez free((void *)opt_state->vnode_base);
240197a9217aSAntonio Huete Jimenez free((void *)opt_state->vmap);
240297a9217aSAntonio Huete Jimenez free((void *)opt_state->edges);
240397a9217aSAntonio Huete Jimenez free((void *)opt_state->space);
240497a9217aSAntonio Huete Jimenez free((void *)opt_state->levels);
240597a9217aSAntonio Huete Jimenez free((void *)opt_state->blocks);
24061077d0bdSPeter Avalos }
24071077d0bdSPeter Avalos
24081077d0bdSPeter Avalos /*
24093a289941SAaron LI * For optimizer errors.
24103a289941SAaron LI */
24113a289941SAaron LI static void PCAP_NORETURN
opt_error(opt_state_t * opt_state,const char * fmt,...)24123a289941SAaron LI opt_error(opt_state_t *opt_state, const char *fmt, ...)
24133a289941SAaron LI {
24143a289941SAaron LI va_list ap;
24153a289941SAaron LI
24163a289941SAaron LI if (opt_state->errbuf != NULL) {
24173a289941SAaron LI va_start(ap, fmt);
2418*ea16f64eSAntonio Huete Jimenez (void)vsnprintf(opt_state->errbuf,
24193a289941SAaron LI PCAP_ERRBUF_SIZE, fmt, ap);
24203a289941SAaron LI va_end(ap);
24213a289941SAaron LI }
24223a289941SAaron LI longjmp(opt_state->top_ctx, 1);
24233a289941SAaron LI /* NOTREACHED */
24243a289941SAaron LI }
24253a289941SAaron LI
24263a289941SAaron LI /*
24271077d0bdSPeter Avalos * Return the number of stmts in 's'.
24281077d0bdSPeter Avalos */
24290e1eae1fSPeter Avalos static u_int
slength(struct slist * s)24300e381983SMatthew Dillon slength(struct slist *s)
24311077d0bdSPeter Avalos {
24320e1eae1fSPeter Avalos u_int n = 0;
24331077d0bdSPeter Avalos
24341077d0bdSPeter Avalos for (; s; s = s->next)
24351077d0bdSPeter Avalos if (s->s.code != NOP)
24361077d0bdSPeter Avalos ++n;
24371077d0bdSPeter Avalos return n;
24381077d0bdSPeter Avalos }
24391077d0bdSPeter Avalos
24401077d0bdSPeter Avalos /*
24411077d0bdSPeter Avalos * Return the number of nodes reachable by 'p'.
24421077d0bdSPeter Avalos * All nodes should be initially unmarked.
24431077d0bdSPeter Avalos */
24441077d0bdSPeter Avalos static int
count_blocks(struct icode * ic,struct block * p)244597a9217aSAntonio Huete Jimenez count_blocks(struct icode *ic, struct block *p)
24461077d0bdSPeter Avalos {
244797a9217aSAntonio Huete Jimenez if (p == 0 || isMarked(ic, p))
24481077d0bdSPeter Avalos return 0;
244997a9217aSAntonio Huete Jimenez Mark(ic, p);
245097a9217aSAntonio Huete Jimenez return count_blocks(ic, JT(p)) + count_blocks(ic, JF(p)) + 1;
24511077d0bdSPeter Avalos }
24521077d0bdSPeter Avalos
24531077d0bdSPeter Avalos /*
24541077d0bdSPeter Avalos * Do a depth first search on the flow graph, numbering the
24551077d0bdSPeter Avalos * the basic blocks, and entering them into the 'blocks' array.`
24561077d0bdSPeter Avalos */
24571077d0bdSPeter Avalos static void
number_blks_r(opt_state_t * opt_state,struct icode * ic,struct block * p)245897a9217aSAntonio Huete Jimenez number_blks_r(opt_state_t *opt_state, struct icode *ic, struct block *p)
24591077d0bdSPeter Avalos {
2460*ea16f64eSAntonio Huete Jimenez u_int n;
24611077d0bdSPeter Avalos
246297a9217aSAntonio Huete Jimenez if (p == 0 || isMarked(ic, p))
24631077d0bdSPeter Avalos return;
24641077d0bdSPeter Avalos
246597a9217aSAntonio Huete Jimenez Mark(ic, p);
246697a9217aSAntonio Huete Jimenez n = opt_state->n_blocks++;
2467*ea16f64eSAntonio Huete Jimenez if (opt_state->n_blocks == 0) {
2468*ea16f64eSAntonio Huete Jimenez /*
2469*ea16f64eSAntonio Huete Jimenez * Overflow.
2470*ea16f64eSAntonio Huete Jimenez */
2471*ea16f64eSAntonio Huete Jimenez opt_error(opt_state, "filter is too complex to optimize");
2472*ea16f64eSAntonio Huete Jimenez }
24731077d0bdSPeter Avalos p->id = n;
247497a9217aSAntonio Huete Jimenez opt_state->blocks[n] = p;
24751077d0bdSPeter Avalos
247697a9217aSAntonio Huete Jimenez number_blks_r(opt_state, ic, JT(p));
247797a9217aSAntonio Huete Jimenez number_blks_r(opt_state, ic, JF(p));
24781077d0bdSPeter Avalos }
24791077d0bdSPeter Avalos
24801077d0bdSPeter Avalos /*
24811077d0bdSPeter Avalos * Return the number of stmts in the flowgraph reachable by 'p'.
24821077d0bdSPeter Avalos * The nodes should be unmarked before calling.
24831077d0bdSPeter Avalos *
24841077d0bdSPeter Avalos * Note that "stmts" means "instructions", and that this includes
24851077d0bdSPeter Avalos *
24861077d0bdSPeter Avalos * side-effect statements in 'p' (slength(p->stmts));
24871077d0bdSPeter Avalos *
24881077d0bdSPeter Avalos * statements in the true branch from 'p' (count_stmts(JT(p)));
24891077d0bdSPeter Avalos *
24901077d0bdSPeter Avalos * statements in the false branch from 'p' (count_stmts(JF(p)));
24911077d0bdSPeter Avalos *
24921077d0bdSPeter Avalos * the conditional jump itself (1);
24931077d0bdSPeter Avalos *
24941077d0bdSPeter Avalos * an extra long jump if the true branch requires it (p->longjt);
24951077d0bdSPeter Avalos *
24961077d0bdSPeter Avalos * an extra long jump if the false branch requires it (p->longjf).
24971077d0bdSPeter Avalos */
24980e1eae1fSPeter Avalos static u_int
count_stmts(struct icode * ic,struct block * p)249997a9217aSAntonio Huete Jimenez count_stmts(struct icode *ic, struct block *p)
25001077d0bdSPeter Avalos {
25010e1eae1fSPeter Avalos u_int n;
25021077d0bdSPeter Avalos
250397a9217aSAntonio Huete Jimenez if (p == 0 || isMarked(ic, p))
25041077d0bdSPeter Avalos return 0;
250597a9217aSAntonio Huete Jimenez Mark(ic, p);
250697a9217aSAntonio Huete Jimenez n = count_stmts(ic, JT(p)) + count_stmts(ic, JF(p));
25071077d0bdSPeter Avalos return slength(p->stmts) + n + 1 + p->longjt + p->longjf;
25081077d0bdSPeter Avalos }
25091077d0bdSPeter Avalos
25101077d0bdSPeter Avalos /*
25111077d0bdSPeter Avalos * Allocate memory. All allocation is done before optimization
25121077d0bdSPeter Avalos * is begun. A linear bound on the size of all data structures is computed
25131077d0bdSPeter Avalos * from the total number of blocks and/or statements.
25141077d0bdSPeter Avalos */
25151077d0bdSPeter Avalos static void
opt_init(opt_state_t * opt_state,struct icode * ic)25163a289941SAaron LI opt_init(opt_state_t *opt_state, struct icode *ic)
25171077d0bdSPeter Avalos {
25181077d0bdSPeter Avalos bpf_u_int32 *p;
25191077d0bdSPeter Avalos int i, n, max_stmts;
2520*ea16f64eSAntonio Huete Jimenez u_int product;
2521*ea16f64eSAntonio Huete Jimenez size_t block_memsize, edge_memsize;
25221077d0bdSPeter Avalos
25231077d0bdSPeter Avalos /*
25241077d0bdSPeter Avalos * First, count the blocks, so we can malloc an array to map
25251077d0bdSPeter Avalos * block number to block. Then, put the blocks into the array.
25261077d0bdSPeter Avalos */
252797a9217aSAntonio Huete Jimenez unMarkAll(ic);
252897a9217aSAntonio Huete Jimenez n = count_blocks(ic, ic->root);
252997a9217aSAntonio Huete Jimenez opt_state->blocks = (struct block **)calloc(n, sizeof(*opt_state->blocks));
253097a9217aSAntonio Huete Jimenez if (opt_state->blocks == NULL)
25313a289941SAaron LI opt_error(opt_state, "malloc");
253297a9217aSAntonio Huete Jimenez unMarkAll(ic);
253397a9217aSAntonio Huete Jimenez opt_state->n_blocks = 0;
253497a9217aSAntonio Huete Jimenez number_blks_r(opt_state, ic, ic->root);
25351077d0bdSPeter Avalos
2536*ea16f64eSAntonio Huete Jimenez /*
2537*ea16f64eSAntonio Huete Jimenez * This "should not happen".
2538*ea16f64eSAntonio Huete Jimenez */
2539*ea16f64eSAntonio Huete Jimenez if (opt_state->n_blocks == 0)
2540*ea16f64eSAntonio Huete Jimenez opt_error(opt_state, "filter has no instructions; please report this as a libpcap issue");
2541*ea16f64eSAntonio Huete Jimenez
254297a9217aSAntonio Huete Jimenez opt_state->n_edges = 2 * opt_state->n_blocks;
2543*ea16f64eSAntonio Huete Jimenez if ((opt_state->n_edges / 2) != opt_state->n_blocks) {
2544*ea16f64eSAntonio Huete Jimenez /*
2545*ea16f64eSAntonio Huete Jimenez * Overflow.
2546*ea16f64eSAntonio Huete Jimenez */
2547*ea16f64eSAntonio Huete Jimenez opt_error(opt_state, "filter is too complex to optimize");
2548*ea16f64eSAntonio Huete Jimenez }
254997a9217aSAntonio Huete Jimenez opt_state->edges = (struct edge **)calloc(opt_state->n_edges, sizeof(*opt_state->edges));
25503a289941SAaron LI if (opt_state->edges == NULL) {
25513a289941SAaron LI opt_error(opt_state, "malloc");
25523a289941SAaron LI }
25531077d0bdSPeter Avalos
25541077d0bdSPeter Avalos /*
25551077d0bdSPeter Avalos * The number of levels is bounded by the number of nodes.
25561077d0bdSPeter Avalos */
255797a9217aSAntonio Huete Jimenez opt_state->levels = (struct block **)calloc(opt_state->n_blocks, sizeof(*opt_state->levels));
25583a289941SAaron LI if (opt_state->levels == NULL) {
25593a289941SAaron LI opt_error(opt_state, "malloc");
25603a289941SAaron LI }
25611077d0bdSPeter Avalos
2562*ea16f64eSAntonio Huete Jimenez opt_state->edgewords = opt_state->n_edges / BITS_PER_WORD + 1;
2563*ea16f64eSAntonio Huete Jimenez opt_state->nodewords = opt_state->n_blocks / BITS_PER_WORD + 1;
2564*ea16f64eSAntonio Huete Jimenez
2565*ea16f64eSAntonio Huete Jimenez /*
2566*ea16f64eSAntonio Huete Jimenez * Make sure opt_state->n_blocks * opt_state->nodewords fits
2567*ea16f64eSAntonio Huete Jimenez * in a u_int; we use it as a u_int number-of-iterations
2568*ea16f64eSAntonio Huete Jimenez * value.
2569*ea16f64eSAntonio Huete Jimenez */
2570*ea16f64eSAntonio Huete Jimenez product = opt_state->n_blocks * opt_state->nodewords;
2571*ea16f64eSAntonio Huete Jimenez if ((product / opt_state->n_blocks) != opt_state->nodewords) {
2572*ea16f64eSAntonio Huete Jimenez /*
2573*ea16f64eSAntonio Huete Jimenez * XXX - just punt and don't try to optimize?
2574*ea16f64eSAntonio Huete Jimenez * In practice, this is unlikely to happen with
2575*ea16f64eSAntonio Huete Jimenez * a normal filter.
2576*ea16f64eSAntonio Huete Jimenez */
2577*ea16f64eSAntonio Huete Jimenez opt_error(opt_state, "filter is too complex to optimize");
2578*ea16f64eSAntonio Huete Jimenez }
2579*ea16f64eSAntonio Huete Jimenez
2580*ea16f64eSAntonio Huete Jimenez /*
2581*ea16f64eSAntonio Huete Jimenez * Make sure the total memory required for that doesn't
2582*ea16f64eSAntonio Huete Jimenez * overflow.
2583*ea16f64eSAntonio Huete Jimenez */
2584*ea16f64eSAntonio Huete Jimenez block_memsize = (size_t)2 * product * sizeof(*opt_state->space);
2585*ea16f64eSAntonio Huete Jimenez if ((block_memsize / product) != 2 * sizeof(*opt_state->space)) {
2586*ea16f64eSAntonio Huete Jimenez opt_error(opt_state, "filter is too complex to optimize");
2587*ea16f64eSAntonio Huete Jimenez }
2588*ea16f64eSAntonio Huete Jimenez
2589*ea16f64eSAntonio Huete Jimenez /*
2590*ea16f64eSAntonio Huete Jimenez * Make sure opt_state->n_edges * opt_state->edgewords fits
2591*ea16f64eSAntonio Huete Jimenez * in a u_int; we use it as a u_int number-of-iterations
2592*ea16f64eSAntonio Huete Jimenez * value.
2593*ea16f64eSAntonio Huete Jimenez */
2594*ea16f64eSAntonio Huete Jimenez product = opt_state->n_edges * opt_state->edgewords;
2595*ea16f64eSAntonio Huete Jimenez if ((product / opt_state->n_edges) != opt_state->edgewords) {
2596*ea16f64eSAntonio Huete Jimenez opt_error(opt_state, "filter is too complex to optimize");
2597*ea16f64eSAntonio Huete Jimenez }
2598*ea16f64eSAntonio Huete Jimenez
2599*ea16f64eSAntonio Huete Jimenez /*
2600*ea16f64eSAntonio Huete Jimenez * Make sure the total memory required for that doesn't
2601*ea16f64eSAntonio Huete Jimenez * overflow.
2602*ea16f64eSAntonio Huete Jimenez */
2603*ea16f64eSAntonio Huete Jimenez edge_memsize = (size_t)product * sizeof(*opt_state->space);
2604*ea16f64eSAntonio Huete Jimenez if (edge_memsize / product != sizeof(*opt_state->space)) {
2605*ea16f64eSAntonio Huete Jimenez opt_error(opt_state, "filter is too complex to optimize");
2606*ea16f64eSAntonio Huete Jimenez }
2607*ea16f64eSAntonio Huete Jimenez
2608*ea16f64eSAntonio Huete Jimenez /*
2609*ea16f64eSAntonio Huete Jimenez * Make sure the total memory required for both of them dosn't
2610*ea16f64eSAntonio Huete Jimenez * overflow.
2611*ea16f64eSAntonio Huete Jimenez */
2612*ea16f64eSAntonio Huete Jimenez if (block_memsize > SIZE_MAX - edge_memsize) {
2613*ea16f64eSAntonio Huete Jimenez opt_error(opt_state, "filter is too complex to optimize");
2614*ea16f64eSAntonio Huete Jimenez }
26151077d0bdSPeter Avalos
26161077d0bdSPeter Avalos /* XXX */
2617*ea16f64eSAntonio Huete Jimenez opt_state->space = (bpf_u_int32 *)malloc(block_memsize + edge_memsize);
26183a289941SAaron LI if (opt_state->space == NULL) {
26193a289941SAaron LI opt_error(opt_state, "malloc");
26203a289941SAaron LI }
262197a9217aSAntonio Huete Jimenez p = opt_state->space;
262297a9217aSAntonio Huete Jimenez opt_state->all_dom_sets = p;
26231077d0bdSPeter Avalos for (i = 0; i < n; ++i) {
262497a9217aSAntonio Huete Jimenez opt_state->blocks[i]->dom = p;
262597a9217aSAntonio Huete Jimenez p += opt_state->nodewords;
26261077d0bdSPeter Avalos }
262797a9217aSAntonio Huete Jimenez opt_state->all_closure_sets = p;
26281077d0bdSPeter Avalos for (i = 0; i < n; ++i) {
262997a9217aSAntonio Huete Jimenez opt_state->blocks[i]->closure = p;
263097a9217aSAntonio Huete Jimenez p += opt_state->nodewords;
26311077d0bdSPeter Avalos }
263297a9217aSAntonio Huete Jimenez opt_state->all_edge_sets = p;
26331077d0bdSPeter Avalos for (i = 0; i < n; ++i) {
263497a9217aSAntonio Huete Jimenez register struct block *b = opt_state->blocks[i];
26351077d0bdSPeter Avalos
26361077d0bdSPeter Avalos b->et.edom = p;
263797a9217aSAntonio Huete Jimenez p += opt_state->edgewords;
26381077d0bdSPeter Avalos b->ef.edom = p;
263997a9217aSAntonio Huete Jimenez p += opt_state->edgewords;
26401077d0bdSPeter Avalos b->et.id = i;
264197a9217aSAntonio Huete Jimenez opt_state->edges[i] = &b->et;
264297a9217aSAntonio Huete Jimenez b->ef.id = opt_state->n_blocks + i;
264397a9217aSAntonio Huete Jimenez opt_state->edges[opt_state->n_blocks + i] = &b->ef;
26441077d0bdSPeter Avalos b->et.pred = b;
26451077d0bdSPeter Avalos b->ef.pred = b;
26461077d0bdSPeter Avalos }
26471077d0bdSPeter Avalos max_stmts = 0;
26481077d0bdSPeter Avalos for (i = 0; i < n; ++i)
264997a9217aSAntonio Huete Jimenez max_stmts += slength(opt_state->blocks[i]->stmts) + 1;
26501077d0bdSPeter Avalos /*
26511077d0bdSPeter Avalos * We allocate at most 3 value numbers per statement,
26521077d0bdSPeter Avalos * so this is an upper bound on the number of valnodes
26531077d0bdSPeter Avalos * we'll need.
26541077d0bdSPeter Avalos */
265597a9217aSAntonio Huete Jimenez opt_state->maxval = 3 * max_stmts;
265697a9217aSAntonio Huete Jimenez opt_state->vmap = (struct vmapinfo *)calloc(opt_state->maxval, sizeof(*opt_state->vmap));
26573a289941SAaron LI if (opt_state->vmap == NULL) {
26583a289941SAaron LI opt_error(opt_state, "malloc");
26593a289941SAaron LI }
266097a9217aSAntonio Huete Jimenez opt_state->vnode_base = (struct valnode *)calloc(opt_state->maxval, sizeof(*opt_state->vnode_base));
26613a289941SAaron LI if (opt_state->vnode_base == NULL) {
26623a289941SAaron LI opt_error(opt_state, "malloc");
26633a289941SAaron LI }
26641077d0bdSPeter Avalos }
26651077d0bdSPeter Avalos
26661077d0bdSPeter Avalos /*
266797a9217aSAntonio Huete Jimenez * This is only used when supporting optimizer debugging. It is
266897a9217aSAntonio Huete Jimenez * global state, so do *not* do more than one compile in parallel
266997a9217aSAntonio Huete Jimenez * and expect it to provide meaningful information.
26701077d0bdSPeter Avalos */
26711077d0bdSPeter Avalos #ifdef BDEBUG
26723a289941SAaron LI int bids[NBIDS];
26731077d0bdSPeter Avalos #endif
26741077d0bdSPeter Avalos
26753a289941SAaron LI static void PCAP_NORETURN conv_error(conv_state_t *, const char *, ...)
26763a289941SAaron LI PCAP_PRINTFLIKE(2, 3);
26773a289941SAaron LI
26781077d0bdSPeter Avalos /*
26791077d0bdSPeter Avalos * Returns true if successful. Returns false if a branch has
26801077d0bdSPeter Avalos * an offset that is too large. If so, we have marked that
26811077d0bdSPeter Avalos * branch so that on a subsequent iteration, it will be treated
26821077d0bdSPeter Avalos * properly.
26831077d0bdSPeter Avalos */
26841077d0bdSPeter Avalos static int
convert_code_r(conv_state_t * conv_state,struct icode * ic,struct block * p)26853a289941SAaron LI convert_code_r(conv_state_t *conv_state, struct icode *ic, struct block *p)
26861077d0bdSPeter Avalos {
26871077d0bdSPeter Avalos struct bpf_insn *dst;
26881077d0bdSPeter Avalos struct slist *src;
268997a9217aSAntonio Huete Jimenez u_int slen;
26901077d0bdSPeter Avalos u_int off;
26911077d0bdSPeter Avalos struct slist **offset = NULL;
26921077d0bdSPeter Avalos
269397a9217aSAntonio Huete Jimenez if (p == 0 || isMarked(ic, p))
26941077d0bdSPeter Avalos return (1);
269597a9217aSAntonio Huete Jimenez Mark(ic, p);
26961077d0bdSPeter Avalos
26973a289941SAaron LI if (convert_code_r(conv_state, ic, JF(p)) == 0)
26981077d0bdSPeter Avalos return (0);
26993a289941SAaron LI if (convert_code_r(conv_state, ic, JT(p)) == 0)
27001077d0bdSPeter Avalos return (0);
27011077d0bdSPeter Avalos
27021077d0bdSPeter Avalos slen = slength(p->stmts);
270397a9217aSAntonio Huete Jimenez dst = conv_state->ftail -= (slen + 1 + p->longjt + p->longjf);
27041077d0bdSPeter Avalos /* inflate length by any extra jumps */
27051077d0bdSPeter Avalos
270697a9217aSAntonio Huete Jimenez p->offset = (int)(dst - conv_state->fstart);
27071077d0bdSPeter Avalos
27081077d0bdSPeter Avalos /* generate offset[] for convenience */
27091077d0bdSPeter Avalos if (slen) {
27101077d0bdSPeter Avalos offset = (struct slist **)calloc(slen, sizeof(struct slist *));
27111077d0bdSPeter Avalos if (!offset) {
27123a289941SAaron LI conv_error(conv_state, "not enough core");
27131077d0bdSPeter Avalos /*NOTREACHED*/
27141077d0bdSPeter Avalos }
27151077d0bdSPeter Avalos }
27161077d0bdSPeter Avalos src = p->stmts;
27171077d0bdSPeter Avalos for (off = 0; off < slen && src; off++) {
27181077d0bdSPeter Avalos #if 0
27191077d0bdSPeter Avalos printf("off=%d src=%x\n", off, src);
27201077d0bdSPeter Avalos #endif
27211077d0bdSPeter Avalos offset[off] = src;
27221077d0bdSPeter Avalos src = src->next;
27231077d0bdSPeter Avalos }
27241077d0bdSPeter Avalos
27251077d0bdSPeter Avalos off = 0;
27261077d0bdSPeter Avalos for (src = p->stmts; src; src = src->next) {
27271077d0bdSPeter Avalos if (src->s.code == NOP)
27281077d0bdSPeter Avalos continue;
27291077d0bdSPeter Avalos dst->code = (u_short)src->s.code;
27301077d0bdSPeter Avalos dst->k = src->s.k;
27311077d0bdSPeter Avalos
27321077d0bdSPeter Avalos /* fill block-local relative jump */
27331077d0bdSPeter Avalos if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) {
27341077d0bdSPeter Avalos #if 0
27351077d0bdSPeter Avalos if (src->s.jt || src->s.jf) {
27363a289941SAaron LI free(offset);
27373a289941SAaron LI conv_error(conv_state, "illegal jmp destination");
27381077d0bdSPeter Avalos /*NOTREACHED*/
27391077d0bdSPeter Avalos }
27401077d0bdSPeter Avalos #endif
27411077d0bdSPeter Avalos goto filled;
27421077d0bdSPeter Avalos }
27431077d0bdSPeter Avalos if (off == slen - 2) /*???*/
27441077d0bdSPeter Avalos goto filled;
27451077d0bdSPeter Avalos
27461077d0bdSPeter Avalos {
274797a9217aSAntonio Huete Jimenez u_int i;
27481077d0bdSPeter Avalos int jt, jf;
27493a289941SAaron LI const char ljerr[] = "%s for block-local relative jump: off=%d";
27501077d0bdSPeter Avalos
27511077d0bdSPeter Avalos #if 0
27521077d0bdSPeter Avalos printf("code=%x off=%d %x %x\n", src->s.code,
27531077d0bdSPeter Avalos off, src->s.jt, src->s.jf);
27541077d0bdSPeter Avalos #endif
27551077d0bdSPeter Avalos
27561077d0bdSPeter Avalos if (!src->s.jt || !src->s.jf) {
27573a289941SAaron LI free(offset);
27583a289941SAaron LI conv_error(conv_state, ljerr, "no jmp destination", off);
27591077d0bdSPeter Avalos /*NOTREACHED*/
27601077d0bdSPeter Avalos }
27611077d0bdSPeter Avalos
27621077d0bdSPeter Avalos jt = jf = 0;
27631077d0bdSPeter Avalos for (i = 0; i < slen; i++) {
27641077d0bdSPeter Avalos if (offset[i] == src->s.jt) {
27651077d0bdSPeter Avalos if (jt) {
27663a289941SAaron LI free(offset);
27673a289941SAaron LI conv_error(conv_state, ljerr, "multiple matches", off);
27681077d0bdSPeter Avalos /*NOTREACHED*/
27691077d0bdSPeter Avalos }
27701077d0bdSPeter Avalos
27713a289941SAaron LI if (i - off - 1 >= 256) {
27723a289941SAaron LI free(offset);
27733a289941SAaron LI conv_error(conv_state, ljerr, "out-of-range jump", off);
27743a289941SAaron LI /*NOTREACHED*/
27753a289941SAaron LI }
27763a289941SAaron LI dst->jt = (u_char)(i - off - 1);
27771077d0bdSPeter Avalos jt++;
27781077d0bdSPeter Avalos }
27791077d0bdSPeter Avalos if (offset[i] == src->s.jf) {
27801077d0bdSPeter Avalos if (jf) {
27813a289941SAaron LI free(offset);
27823a289941SAaron LI conv_error(conv_state, ljerr, "multiple matches", off);
27831077d0bdSPeter Avalos /*NOTREACHED*/
27841077d0bdSPeter Avalos }
27853a289941SAaron LI if (i - off - 1 >= 256) {
27863a289941SAaron LI free(offset);
27873a289941SAaron LI conv_error(conv_state, ljerr, "out-of-range jump", off);
27883a289941SAaron LI /*NOTREACHED*/
27893a289941SAaron LI }
27903a289941SAaron LI dst->jf = (u_char)(i - off - 1);
27911077d0bdSPeter Avalos jf++;
27921077d0bdSPeter Avalos }
27931077d0bdSPeter Avalos }
27941077d0bdSPeter Avalos if (!jt || !jf) {
27953a289941SAaron LI free(offset);
27963a289941SAaron LI conv_error(conv_state, ljerr, "no destination found", off);
27971077d0bdSPeter Avalos /*NOTREACHED*/
27981077d0bdSPeter Avalos }
27991077d0bdSPeter Avalos }
28001077d0bdSPeter Avalos filled:
28011077d0bdSPeter Avalos ++dst;
28021077d0bdSPeter Avalos ++off;
28031077d0bdSPeter Avalos }
28041077d0bdSPeter Avalos if (offset)
28051077d0bdSPeter Avalos free(offset);
28061077d0bdSPeter Avalos
28071077d0bdSPeter Avalos #ifdef BDEBUG
28083a289941SAaron LI if (dst - conv_state->fstart < NBIDS)
280997a9217aSAntonio Huete Jimenez bids[dst - conv_state->fstart] = p->id + 1;
28101077d0bdSPeter Avalos #endif
28111077d0bdSPeter Avalos dst->code = (u_short)p->s.code;
28121077d0bdSPeter Avalos dst->k = p->s.k;
28131077d0bdSPeter Avalos if (JT(p)) {
2814*ea16f64eSAntonio Huete Jimenez /* number of extra jumps inserted */
2815*ea16f64eSAntonio Huete Jimenez u_char extrajmps = 0;
28161077d0bdSPeter Avalos off = JT(p)->offset - (p->offset + slen) - 1;
28171077d0bdSPeter Avalos if (off >= 256) {
28181077d0bdSPeter Avalos /* offset too large for branch, must add a jump */
28191077d0bdSPeter Avalos if (p->longjt == 0) {
28201077d0bdSPeter Avalos /* mark this instruction and retry */
28211077d0bdSPeter Avalos p->longjt++;
28221077d0bdSPeter Avalos return(0);
28231077d0bdSPeter Avalos }
2824*ea16f64eSAntonio Huete Jimenez dst->jt = extrajmps;
28251077d0bdSPeter Avalos extrajmps++;
28261077d0bdSPeter Avalos dst[extrajmps].code = BPF_JMP|BPF_JA;
28271077d0bdSPeter Avalos dst[extrajmps].k = off - extrajmps;
28281077d0bdSPeter Avalos }
28291077d0bdSPeter Avalos else
28303a289941SAaron LI dst->jt = (u_char)off;
28311077d0bdSPeter Avalos off = JF(p)->offset - (p->offset + slen) - 1;
28321077d0bdSPeter Avalos if (off >= 256) {
28331077d0bdSPeter Avalos /* offset too large for branch, must add a jump */
28341077d0bdSPeter Avalos if (p->longjf == 0) {
28351077d0bdSPeter Avalos /* mark this instruction and retry */
28361077d0bdSPeter Avalos p->longjf++;
28371077d0bdSPeter Avalos return(0);
28381077d0bdSPeter Avalos }
28391077d0bdSPeter Avalos /* branch if F to following jump */
28401077d0bdSPeter Avalos /* if two jumps are inserted, F goes to second one */
2841*ea16f64eSAntonio Huete Jimenez dst->jf = extrajmps;
28421077d0bdSPeter Avalos extrajmps++;
28431077d0bdSPeter Avalos dst[extrajmps].code = BPF_JMP|BPF_JA;
28441077d0bdSPeter Avalos dst[extrajmps].k = off - extrajmps;
28451077d0bdSPeter Avalos }
28461077d0bdSPeter Avalos else
28473a289941SAaron LI dst->jf = (u_char)off;
28481077d0bdSPeter Avalos }
28491077d0bdSPeter Avalos return (1);
28501077d0bdSPeter Avalos }
28511077d0bdSPeter Avalos
28521077d0bdSPeter Avalos
28531077d0bdSPeter Avalos /*
28541077d0bdSPeter Avalos * Convert flowgraph intermediate representation to the
28551077d0bdSPeter Avalos * BPF array representation. Set *lenp to the number of instructions.
28561077d0bdSPeter Avalos *
28571077d0bdSPeter Avalos * This routine does *NOT* leak the memory pointed to by fp. It *must
28581077d0bdSPeter Avalos * not* do free(fp) before returning fp; doing so would make no sense,
28591077d0bdSPeter Avalos * as the BPF array pointed to by the return value of icode_to_fcode()
28601077d0bdSPeter Avalos * must be valid - it's being returned for use in a bpf_program structure.
28611077d0bdSPeter Avalos *
28621077d0bdSPeter Avalos * If it appears that icode_to_fcode() is leaking, the problem is that
28631077d0bdSPeter Avalos * the program using pcap_compile() is failing to free the memory in
28641077d0bdSPeter Avalos * the BPF program when it's done - the leak is in the program, not in
28651077d0bdSPeter Avalos * the routine that happens to be allocating the memory. (By analogy, if
28661077d0bdSPeter Avalos * a program calls fopen() without ever calling fclose() on the FILE *,
28671077d0bdSPeter Avalos * it will leak the FILE structure; the leak is not in fopen(), it's in
28681077d0bdSPeter Avalos * the program.) Change the program to use pcap_freecode() when it's
28691077d0bdSPeter Avalos * done with the filter program. See the pcap man page.
28701077d0bdSPeter Avalos */
28711077d0bdSPeter Avalos struct bpf_insn *
icode_to_fcode(struct icode * ic,struct block * root,u_int * lenp,char * errbuf)28723a289941SAaron LI icode_to_fcode(struct icode *ic, struct block *root, u_int *lenp,
28733a289941SAaron LI char *errbuf)
28741077d0bdSPeter Avalos {
28750e1eae1fSPeter Avalos u_int n;
28761077d0bdSPeter Avalos struct bpf_insn *fp;
287797a9217aSAntonio Huete Jimenez conv_state_t conv_state;
28781077d0bdSPeter Avalos
28793a289941SAaron LI conv_state.fstart = NULL;
28803a289941SAaron LI conv_state.errbuf = errbuf;
28813a289941SAaron LI if (setjmp(conv_state.top_ctx) != 0) {
28823a289941SAaron LI free(conv_state.fstart);
28833a289941SAaron LI return NULL;
28843a289941SAaron LI }
28853a289941SAaron LI
28861077d0bdSPeter Avalos /*
28871077d0bdSPeter Avalos * Loop doing convert_code_r() until no branches remain
28881077d0bdSPeter Avalos * with too-large offsets.
28891077d0bdSPeter Avalos */
28903a289941SAaron LI for (;;) {
289197a9217aSAntonio Huete Jimenez unMarkAll(ic);
289297a9217aSAntonio Huete Jimenez n = *lenp = count_stmts(ic, root);
28931077d0bdSPeter Avalos
28941077d0bdSPeter Avalos fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
28953a289941SAaron LI if (fp == NULL) {
2896*ea16f64eSAntonio Huete Jimenez (void)snprintf(errbuf, PCAP_ERRBUF_SIZE,
28973a289941SAaron LI "malloc");
28983a289941SAaron LI free(fp);
28993a289941SAaron LI return NULL;
29003a289941SAaron LI }
29011077d0bdSPeter Avalos memset((char *)fp, 0, sizeof(*fp) * n);
290297a9217aSAntonio Huete Jimenez conv_state.fstart = fp;
290397a9217aSAntonio Huete Jimenez conv_state.ftail = fp + n;
29041077d0bdSPeter Avalos
290597a9217aSAntonio Huete Jimenez unMarkAll(ic);
29063a289941SAaron LI if (convert_code_r(&conv_state, ic, root))
29071077d0bdSPeter Avalos break;
29081077d0bdSPeter Avalos free(fp);
29091077d0bdSPeter Avalos }
29101077d0bdSPeter Avalos
29111077d0bdSPeter Avalos return fp;
29121077d0bdSPeter Avalos }
29131077d0bdSPeter Avalos
29141077d0bdSPeter Avalos /*
29153a289941SAaron LI * For iconv_to_fconv() errors.
29163a289941SAaron LI */
29173a289941SAaron LI static void PCAP_NORETURN
conv_error(conv_state_t * conv_state,const char * fmt,...)29183a289941SAaron LI conv_error(conv_state_t *conv_state, const char *fmt, ...)
29193a289941SAaron LI {
29203a289941SAaron LI va_list ap;
29213a289941SAaron LI
29223a289941SAaron LI va_start(ap, fmt);
2923*ea16f64eSAntonio Huete Jimenez (void)vsnprintf(conv_state->errbuf,
29243a289941SAaron LI PCAP_ERRBUF_SIZE, fmt, ap);
29253a289941SAaron LI va_end(ap);
29263a289941SAaron LI longjmp(conv_state->top_ctx, 1);
29273a289941SAaron LI /* NOTREACHED */
29283a289941SAaron LI }
29293a289941SAaron LI
29303a289941SAaron LI /*
29311077d0bdSPeter Avalos * Make a copy of a BPF program and put it in the "fcode" member of
29321077d0bdSPeter Avalos * a "pcap_t".
29331077d0bdSPeter Avalos *
29341077d0bdSPeter Avalos * If we fail to allocate memory for the copy, fill in the "errbuf"
29351077d0bdSPeter Avalos * member of the "pcap_t" with an error message, and return -1;
29361077d0bdSPeter Avalos * otherwise, return 0.
29371077d0bdSPeter Avalos */
29381077d0bdSPeter Avalos int
install_bpf_program(pcap_t * p,struct bpf_program * fp)29391077d0bdSPeter Avalos install_bpf_program(pcap_t *p, struct bpf_program *fp)
29401077d0bdSPeter Avalos {
29411077d0bdSPeter Avalos size_t prog_size;
29421077d0bdSPeter Avalos
29431077d0bdSPeter Avalos /*
2944de0d3203SPeter Avalos * Validate the program.
2945de0d3203SPeter Avalos */
2946*ea16f64eSAntonio Huete Jimenez if (!pcap_validate_filter(fp->bf_insns, fp->bf_len)) {
2947*ea16f64eSAntonio Huete Jimenez snprintf(p->errbuf, sizeof(p->errbuf),
2948de0d3203SPeter Avalos "BPF program is not valid");
2949de0d3203SPeter Avalos return (-1);
2950de0d3203SPeter Avalos }
2951de0d3203SPeter Avalos
2952de0d3203SPeter Avalos /*
29531077d0bdSPeter Avalos * Free up any already installed program.
29541077d0bdSPeter Avalos */
29551077d0bdSPeter Avalos pcap_freecode(&p->fcode);
29561077d0bdSPeter Avalos
29571077d0bdSPeter Avalos prog_size = sizeof(*fp->bf_insns) * fp->bf_len;
29581077d0bdSPeter Avalos p->fcode.bf_len = fp->bf_len;
29591077d0bdSPeter Avalos p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size);
29601077d0bdSPeter Avalos if (p->fcode.bf_insns == NULL) {
29613a289941SAaron LI pcap_fmt_errmsg_for_errno(p->errbuf, sizeof(p->errbuf),
29623a289941SAaron LI errno, "malloc");
29631077d0bdSPeter Avalos return (-1);
29641077d0bdSPeter Avalos }
29651077d0bdSPeter Avalos memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size);
29661077d0bdSPeter Avalos return (0);
29671077d0bdSPeter Avalos }
29681077d0bdSPeter Avalos
29691077d0bdSPeter Avalos #ifdef BDEBUG
29701077d0bdSPeter Avalos static void
dot_dump_node(struct icode * ic,struct block * block,struct bpf_program * prog,FILE * out)297197a9217aSAntonio Huete Jimenez dot_dump_node(struct icode *ic, struct block *block, struct bpf_program *prog,
297297a9217aSAntonio Huete Jimenez FILE *out)
297397a9217aSAntonio Huete Jimenez {
297497a9217aSAntonio Huete Jimenez int icount, noffset;
297597a9217aSAntonio Huete Jimenez int i;
297697a9217aSAntonio Huete Jimenez
297797a9217aSAntonio Huete Jimenez if (block == NULL || isMarked(ic, block))
297897a9217aSAntonio Huete Jimenez return;
297997a9217aSAntonio Huete Jimenez Mark(ic, block);
298097a9217aSAntonio Huete Jimenez
298197a9217aSAntonio Huete Jimenez icount = slength(block->stmts) + 1 + block->longjt + block->longjf;
298297a9217aSAntonio Huete Jimenez noffset = min(block->offset + icount, (int)prog->bf_len);
298397a9217aSAntonio Huete Jimenez
2984*ea16f64eSAntonio Huete Jimenez fprintf(out, "\tblock%u [shape=ellipse, id=\"block-%u\" label=\"BLOCK%u\\n", block->id, block->id, block->id);
298597a9217aSAntonio Huete Jimenez for (i = block->offset; i < noffset; i++) {
298697a9217aSAntonio Huete Jimenez fprintf(out, "\\n%s", bpf_image(prog->bf_insns + i, i));
298797a9217aSAntonio Huete Jimenez }
298897a9217aSAntonio Huete Jimenez fprintf(out, "\" tooltip=\"");
298997a9217aSAntonio Huete Jimenez for (i = 0; i < BPF_MEMWORDS; i++)
29903a289941SAaron LI if (block->val[i] != VAL_UNKNOWN)
299197a9217aSAntonio Huete Jimenez fprintf(out, "val[%d]=%d ", i, block->val[i]);
299297a9217aSAntonio Huete Jimenez fprintf(out, "val[A]=%d ", block->val[A_ATOM]);
299397a9217aSAntonio Huete Jimenez fprintf(out, "val[X]=%d", block->val[X_ATOM]);
299497a9217aSAntonio Huete Jimenez fprintf(out, "\"");
299597a9217aSAntonio Huete Jimenez if (JT(block) == NULL)
299697a9217aSAntonio Huete Jimenez fprintf(out, ", peripheries=2");
299797a9217aSAntonio Huete Jimenez fprintf(out, "];\n");
299897a9217aSAntonio Huete Jimenez
299997a9217aSAntonio Huete Jimenez dot_dump_node(ic, JT(block), prog, out);
300097a9217aSAntonio Huete Jimenez dot_dump_node(ic, JF(block), prog, out);
300197a9217aSAntonio Huete Jimenez }
300297a9217aSAntonio Huete Jimenez
300397a9217aSAntonio Huete Jimenez static void
dot_dump_edge(struct icode * ic,struct block * block,FILE * out)300497a9217aSAntonio Huete Jimenez dot_dump_edge(struct icode *ic, struct block *block, FILE *out)
300597a9217aSAntonio Huete Jimenez {
300697a9217aSAntonio Huete Jimenez if (block == NULL || isMarked(ic, block))
300797a9217aSAntonio Huete Jimenez return;
300897a9217aSAntonio Huete Jimenez Mark(ic, block);
300997a9217aSAntonio Huete Jimenez
301097a9217aSAntonio Huete Jimenez if (JT(block)) {
3011*ea16f64eSAntonio Huete Jimenez fprintf(out, "\t\"block%u\":se -> \"block%u\":n [label=\"T\"]; \n",
301297a9217aSAntonio Huete Jimenez block->id, JT(block)->id);
3013*ea16f64eSAntonio Huete Jimenez fprintf(out, "\t\"block%u\":sw -> \"block%u\":n [label=\"F\"]; \n",
301497a9217aSAntonio Huete Jimenez block->id, JF(block)->id);
301597a9217aSAntonio Huete Jimenez }
301697a9217aSAntonio Huete Jimenez dot_dump_edge(ic, JT(block), out);
301797a9217aSAntonio Huete Jimenez dot_dump_edge(ic, JF(block), out);
301897a9217aSAntonio Huete Jimenez }
301997a9217aSAntonio Huete Jimenez
302097a9217aSAntonio Huete Jimenez /* Output the block CFG using graphviz/DOT language
302197a9217aSAntonio Huete Jimenez * In the CFG, block's code, value index for each registers at EXIT,
302297a9217aSAntonio Huete Jimenez * and the jump relationship is show.
302397a9217aSAntonio Huete Jimenez *
302497a9217aSAntonio Huete Jimenez * example DOT for BPF `ip src host 1.1.1.1' is:
302597a9217aSAntonio Huete Jimenez digraph BPF {
302697a9217aSAntonio Huete Jimenez block0 [shape=ellipse, id="block-0" label="BLOCK0\n\n(000) ldh [12]\n(001) jeq #0x800 jt 2 jf 5" tooltip="val[A]=0 val[X]=0"];
302797a9217aSAntonio Huete Jimenez block1 [shape=ellipse, id="block-1" label="BLOCK1\n\n(002) ld [26]\n(003) jeq #0x1010101 jt 4 jf 5" tooltip="val[A]=0 val[X]=0"];
302897a9217aSAntonio Huete Jimenez block2 [shape=ellipse, id="block-2" label="BLOCK2\n\n(004) ret #68" tooltip="val[A]=0 val[X]=0", peripheries=2];
302997a9217aSAntonio Huete Jimenez block3 [shape=ellipse, id="block-3" label="BLOCK3\n\n(005) ret #0" tooltip="val[A]=0 val[X]=0", peripheries=2];
303097a9217aSAntonio Huete Jimenez "block0":se -> "block1":n [label="T"];
303197a9217aSAntonio Huete Jimenez "block0":sw -> "block3":n [label="F"];
303297a9217aSAntonio Huete Jimenez "block1":se -> "block2":n [label="T"];
303397a9217aSAntonio Huete Jimenez "block1":sw -> "block3":n [label="F"];
303497a9217aSAntonio Huete Jimenez }
303597a9217aSAntonio Huete Jimenez *
3036*ea16f64eSAntonio Huete Jimenez * After install graphviz on https://www.graphviz.org/, save it as bpf.dot
303797a9217aSAntonio Huete Jimenez * and run `dot -Tpng -O bpf.dot' to draw the graph.
303897a9217aSAntonio Huete Jimenez */
30393a289941SAaron LI static int
dot_dump(struct icode * ic,char * errbuf)30403a289941SAaron LI dot_dump(struct icode *ic, char *errbuf)
304197a9217aSAntonio Huete Jimenez {
304297a9217aSAntonio Huete Jimenez struct bpf_program f;
304397a9217aSAntonio Huete Jimenez FILE *out = stdout;
304497a9217aSAntonio Huete Jimenez
304597a9217aSAntonio Huete Jimenez memset(bids, 0, sizeof bids);
30463a289941SAaron LI f.bf_insns = icode_to_fcode(ic, ic->root, &f.bf_len, errbuf);
30473a289941SAaron LI if (f.bf_insns == NULL)
30483a289941SAaron LI return -1;
304997a9217aSAntonio Huete Jimenez
305097a9217aSAntonio Huete Jimenez fprintf(out, "digraph BPF {\n");
305197a9217aSAntonio Huete Jimenez unMarkAll(ic);
305297a9217aSAntonio Huete Jimenez dot_dump_node(ic, ic->root, &f, out);
305397a9217aSAntonio Huete Jimenez unMarkAll(ic);
305497a9217aSAntonio Huete Jimenez dot_dump_edge(ic, ic->root, out);
305597a9217aSAntonio Huete Jimenez fprintf(out, "}\n");
305697a9217aSAntonio Huete Jimenez
305797a9217aSAntonio Huete Jimenez free((char *)f.bf_insns);
30583a289941SAaron LI return 0;
305997a9217aSAntonio Huete Jimenez }
306097a9217aSAntonio Huete Jimenez
30613a289941SAaron LI static int
plain_dump(struct icode * ic,char * errbuf)30623a289941SAaron LI plain_dump(struct icode *ic, char *errbuf)
30631077d0bdSPeter Avalos {
30641077d0bdSPeter Avalos struct bpf_program f;
30651077d0bdSPeter Avalos
30661077d0bdSPeter Avalos memset(bids, 0, sizeof bids);
30673a289941SAaron LI f.bf_insns = icode_to_fcode(ic, ic->root, &f.bf_len, errbuf);
30683a289941SAaron LI if (f.bf_insns == NULL)
30693a289941SAaron LI return -1;
30701077d0bdSPeter Avalos bpf_dump(&f, 1);
30711077d0bdSPeter Avalos putchar('\n');
30721077d0bdSPeter Avalos free((char *)f.bf_insns);
30733a289941SAaron LI return 0;
30741077d0bdSPeter Avalos }
307597a9217aSAntonio Huete Jimenez
307697a9217aSAntonio Huete Jimenez static void
opt_dump(opt_state_t * opt_state,struct icode * ic)30773a289941SAaron LI opt_dump(opt_state_t *opt_state, struct icode *ic)
307897a9217aSAntonio Huete Jimenez {
30793a289941SAaron LI int status;
30803a289941SAaron LI char errbuf[PCAP_ERRBUF_SIZE];
30813a289941SAaron LI
30823a289941SAaron LI /*
30833a289941SAaron LI * If the CFG, in DOT format, is requested, output it rather than
30843a289941SAaron LI * the code that would be generated from that graph.
308597a9217aSAntonio Huete Jimenez */
30863a289941SAaron LI if (pcap_print_dot_graph)
30873a289941SAaron LI status = dot_dump(ic, errbuf);
308897a9217aSAntonio Huete Jimenez else
30893a289941SAaron LI status = plain_dump(ic, errbuf);
30903a289941SAaron LI if (status == -1)
30913a289941SAaron LI opt_error(opt_state, "opt_dump: icode_to_fcode failed: %s", errbuf);
309297a9217aSAntonio Huete Jimenez }
30931077d0bdSPeter Avalos #endif
3094