18cf6c252SPaul Traina /* 28cf6c252SPaul Traina * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996 38cf6c252SPaul Traina * The Regents of the University of California. All rights reserved. 48cf6c252SPaul Traina * 58cf6c252SPaul Traina * Redistribution and use in source and binary forms, with or without 68cf6c252SPaul Traina * modification, are permitted provided that: (1) source code distributions 78cf6c252SPaul Traina * retain the above copyright notice and this paragraph in its entirety, (2) 88cf6c252SPaul Traina * distributions including binary code include the above copyright notice and 98cf6c252SPaul Traina * this paragraph in its entirety in the documentation or other materials 108cf6c252SPaul Traina * provided with the distribution, and (3) all advertising materials mentioning 118cf6c252SPaul Traina * features or use of this software display the following acknowledgement: 128cf6c252SPaul Traina * ``This product includes software developed by the University of California, 138cf6c252SPaul Traina * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of 148cf6c252SPaul Traina * the University nor the names of its contributors may be used to endorse 158cf6c252SPaul Traina * or promote products derived from this software without specific prior 168cf6c252SPaul Traina * written permission. 178cf6c252SPaul Traina * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED 188cf6c252SPaul Traina * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF 198cf6c252SPaul Traina * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 208cf6c252SPaul Traina * 218cf6c252SPaul Traina * Optimization module for tcpdump intermediate representation. 228cf6c252SPaul Traina */ 238cf6c252SPaul Traina #ifndef lint 24feb4ecdbSBruce M Simpson static const char rcsid[] _U_ = 25a0ee43a1SRui Paulo "@(#) $Header: /tcpdump/master/libpcap/optimize.c,v 1.91 2008-01-02 04:16:46 guy Exp $ (LBL)"; 26dc2c7305SBill Fenner #endif 27dc2c7305SBill Fenner 28dc2c7305SBill Fenner #ifdef HAVE_CONFIG_H 29dc2c7305SBill Fenner #include "config.h" 308cf6c252SPaul Traina #endif 318cf6c252SPaul Traina 32a0ee43a1SRui Paulo #ifdef WIN32 33a0ee43a1SRui Paulo #include <pcap-stdinc.h> 34a0ee43a1SRui Paulo #else /* WIN32 */ 35a0ee43a1SRui Paulo #if HAVE_INTTYPES_H 36a0ee43a1SRui Paulo #include <inttypes.h> 37a0ee43a1SRui Paulo #elif HAVE_STDINT_H 38a0ee43a1SRui Paulo #include <stdint.h> 39a0ee43a1SRui Paulo #endif 40a0ee43a1SRui Paulo #ifdef HAVE_SYS_BITYPES_H 41a0ee43a1SRui Paulo #include <sys/bitypes.h> 42a0ee43a1SRui Paulo #endif 43a0ee43a1SRui Paulo #include <sys/types.h> 44a0ee43a1SRui Paulo #endif /* WIN32 */ 45a0ee43a1SRui Paulo 468cf6c252SPaul Traina #include <stdio.h> 478cf6c252SPaul Traina #include <stdlib.h> 488cf6c252SPaul Traina #include <memory.h> 4904fb2745SSam Leffler #include <string.h> 508cf6c252SPaul Traina 51dc2c7305SBill Fenner #include <errno.h> 52dc2c7305SBill Fenner 538cf6c252SPaul Traina #include "pcap-int.h" 548cf6c252SPaul Traina 558cf6c252SPaul Traina #include "gencode.h" 568cf6c252SPaul Traina 578cf6c252SPaul Traina #ifdef HAVE_OS_PROTO_H 588cf6c252SPaul Traina #include "os-proto.h" 598cf6c252SPaul Traina #endif 608cf6c252SPaul Traina 618cf6c252SPaul Traina #ifdef BDEBUG 628cf6c252SPaul Traina extern int dflag; 638cf6c252SPaul Traina #endif 648cf6c252SPaul Traina 6504fb2745SSam Leffler #if defined(MSDOS) && !defined(__DJGPP__) 6604fb2745SSam Leffler extern int _w32_ffs (int mask); 6704fb2745SSam Leffler #define ffs _w32_ffs 6804fb2745SSam Leffler #endif 6904fb2745SSam Leffler 70a8e07101SRui Paulo #if defined(WIN32) && defined (_MSC_VER) 71a8e07101SRui Paulo int ffs(int mask); 72a8e07101SRui Paulo #endif 73a8e07101SRui Paulo 7404fb2745SSam Leffler /* 7504fb2745SSam Leffler * Represents a deleted instruction. 7604fb2745SSam Leffler */ 7704fb2745SSam Leffler #define NOP -1 7804fb2745SSam Leffler 7904fb2745SSam Leffler /* 8004fb2745SSam Leffler * Register numbers for use-def values. 8104fb2745SSam Leffler * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory 8204fb2745SSam Leffler * location. A_ATOM is the accumulator and X_ATOM is the index 8304fb2745SSam Leffler * register. 8404fb2745SSam Leffler */ 858cf6c252SPaul Traina #define A_ATOM BPF_MEMWORDS 868cf6c252SPaul Traina #define X_ATOM (BPF_MEMWORDS+1) 878cf6c252SPaul Traina 888cf6c252SPaul Traina /* 898cf6c252SPaul Traina * This define is used to represent *both* the accumulator and 908cf6c252SPaul Traina * x register in use-def computations. 918cf6c252SPaul Traina * Currently, the use-def code assumes only one definition per instruction. 928cf6c252SPaul Traina */ 938cf6c252SPaul Traina #define AX_ATOM N_ATOMS 948cf6c252SPaul Traina 958cf6c252SPaul Traina /* 968cf6c252SPaul Traina * A flag to indicate that further optimization is needed. 978cf6c252SPaul Traina * Iterative passes are continued until a given pass yields no 988cf6c252SPaul Traina * branch movement. 998cf6c252SPaul Traina */ 1008cf6c252SPaul Traina static int done; 1018cf6c252SPaul Traina 1028cf6c252SPaul Traina /* 1038cf6c252SPaul Traina * A block is marked if only if its mark equals the current mark. 1048cf6c252SPaul Traina * Rather than traverse the code array, marking each item, 'cur_mark' is 1058cf6c252SPaul Traina * incremented. This automatically makes each element unmarked. 1068cf6c252SPaul Traina */ 1078cf6c252SPaul Traina static int cur_mark; 1088cf6c252SPaul Traina #define isMarked(p) ((p)->mark == cur_mark) 1098cf6c252SPaul Traina #define unMarkAll() cur_mark += 1 1108cf6c252SPaul Traina #define Mark(p) ((p)->mark = cur_mark) 1118cf6c252SPaul Traina 1128cf6c252SPaul Traina static void opt_init(struct block *); 1138cf6c252SPaul Traina static void opt_cleanup(void); 1148cf6c252SPaul Traina 1158cf6c252SPaul Traina static void make_marks(struct block *); 1168cf6c252SPaul Traina static void mark_code(struct block *); 1178cf6c252SPaul Traina 1188cf6c252SPaul Traina static void intern_blocks(struct block *); 1198cf6c252SPaul Traina 1208cf6c252SPaul Traina static int eq_slist(struct slist *, struct slist *); 1218cf6c252SPaul Traina 1228cf6c252SPaul Traina static void find_levels_r(struct block *); 1238cf6c252SPaul Traina 1248cf6c252SPaul Traina static void find_levels(struct block *); 1258cf6c252SPaul Traina static void find_dom(struct block *); 1268cf6c252SPaul Traina static void propedom(struct edge *); 1278cf6c252SPaul Traina static void find_edom(struct block *); 1288cf6c252SPaul Traina static void find_closure(struct block *); 1298cf6c252SPaul Traina static int atomuse(struct stmt *); 1308cf6c252SPaul Traina static int atomdef(struct stmt *); 1318cf6c252SPaul Traina static void compute_local_ud(struct block *); 1328cf6c252SPaul Traina static void find_ud(struct block *); 1338cf6c252SPaul Traina static void init_val(void); 1348cf6c252SPaul Traina static int F(int, int, int); 1358cf6c252SPaul Traina static inline void vstore(struct stmt *, int *, int, int); 1368cf6c252SPaul Traina static void opt_blk(struct block *, int); 1378cf6c252SPaul Traina static int use_conflict(struct block *, struct block *); 1388cf6c252SPaul Traina static void opt_j(struct edge *); 1398cf6c252SPaul Traina static void or_pullup(struct block *); 1408cf6c252SPaul Traina static void and_pullup(struct block *); 1418cf6c252SPaul Traina static void opt_blks(struct block *, int); 1428cf6c252SPaul Traina static inline void link_inedge(struct edge *, struct block *); 1438cf6c252SPaul Traina static void find_inedges(struct block *); 1448cf6c252SPaul Traina static void opt_root(struct block **); 1458cf6c252SPaul Traina static void opt_loop(struct block *, int); 1468cf6c252SPaul Traina static void fold_op(struct stmt *, int, int); 1478cf6c252SPaul Traina static inline struct slist *this_op(struct slist *); 1488cf6c252SPaul Traina static void opt_not(struct block *); 1498cf6c252SPaul Traina static void opt_peep(struct block *); 1508cf6c252SPaul Traina static void opt_stmt(struct stmt *, int[], int); 1518cf6c252SPaul Traina static void deadstmt(struct stmt *, struct stmt *[]); 1528cf6c252SPaul Traina static void opt_deadstores(struct block *); 1538cf6c252SPaul Traina static struct block *fold_edge(struct block *, struct edge *); 1548cf6c252SPaul Traina static inline int eq_blk(struct block *, struct block *); 1558cf6c252SPaul Traina static int slength(struct slist *); 1568cf6c252SPaul Traina static int count_blocks(struct block *); 1578cf6c252SPaul Traina static void number_blks_r(struct block *); 1588cf6c252SPaul Traina static int count_stmts(struct block *); 1598cf6c252SPaul Traina static int convert_code_r(struct block *); 1608cf6c252SPaul Traina #ifdef BDEBUG 1618cf6c252SPaul Traina static void opt_dump(struct block *); 1628cf6c252SPaul Traina #endif 1638cf6c252SPaul Traina 1648cf6c252SPaul Traina static int n_blocks; 1658cf6c252SPaul Traina struct block **blocks; 1668cf6c252SPaul Traina static int n_edges; 1678cf6c252SPaul Traina struct edge **edges; 1688cf6c252SPaul Traina 1698cf6c252SPaul Traina /* 1708cf6c252SPaul Traina * A bit vector set representation of the dominators. 1718cf6c252SPaul Traina * We round up the set size to the next power of two. 1728cf6c252SPaul Traina */ 1738cf6c252SPaul Traina static int nodewords; 1748cf6c252SPaul Traina static int edgewords; 1758cf6c252SPaul Traina struct block **levels; 1768cf6c252SPaul Traina bpf_u_int32 *space; 1778cf6c252SPaul Traina #define BITS_PER_WORD (8*sizeof(bpf_u_int32)) 1788cf6c252SPaul Traina /* 1798cf6c252SPaul Traina * True if a is in uset {p} 1808cf6c252SPaul Traina */ 1818cf6c252SPaul Traina #define SET_MEMBER(p, a) \ 1828cf6c252SPaul Traina ((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD))) 1838cf6c252SPaul Traina 1848cf6c252SPaul Traina /* 1858cf6c252SPaul Traina * Add 'a' to uset p. 1868cf6c252SPaul Traina */ 1878cf6c252SPaul Traina #define SET_INSERT(p, a) \ 1888cf6c252SPaul Traina (p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD)) 1898cf6c252SPaul Traina 1908cf6c252SPaul Traina /* 1918cf6c252SPaul Traina * Delete 'a' from uset p. 1928cf6c252SPaul Traina */ 1938cf6c252SPaul Traina #define SET_DELETE(p, a) \ 1948cf6c252SPaul Traina (p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD)) 1958cf6c252SPaul Traina 1968cf6c252SPaul Traina /* 1978cf6c252SPaul Traina * a := a intersect b 1988cf6c252SPaul Traina */ 1998cf6c252SPaul Traina #define SET_INTERSECT(a, b, n)\ 2008cf6c252SPaul Traina {\ 2018cf6c252SPaul Traina register bpf_u_int32 *_x = a, *_y = b;\ 2028cf6c252SPaul Traina register int _n = n;\ 2038cf6c252SPaul Traina while (--_n >= 0) *_x++ &= *_y++;\ 2048cf6c252SPaul Traina } 2058cf6c252SPaul Traina 2068cf6c252SPaul Traina /* 2078cf6c252SPaul Traina * a := a - b 2088cf6c252SPaul Traina */ 2098cf6c252SPaul Traina #define SET_SUBTRACT(a, b, n)\ 2108cf6c252SPaul Traina {\ 2118cf6c252SPaul Traina register bpf_u_int32 *_x = a, *_y = b;\ 2128cf6c252SPaul Traina register int _n = n;\ 2138cf6c252SPaul Traina while (--_n >= 0) *_x++ &=~ *_y++;\ 2148cf6c252SPaul Traina } 2158cf6c252SPaul Traina 2168cf6c252SPaul Traina /* 2178cf6c252SPaul Traina * a := a union b 2188cf6c252SPaul Traina */ 2198cf6c252SPaul Traina #define SET_UNION(a, b, n)\ 2208cf6c252SPaul Traina {\ 2218cf6c252SPaul Traina register bpf_u_int32 *_x = a, *_y = b;\ 2228cf6c252SPaul Traina register int _n = n;\ 2238cf6c252SPaul Traina while (--_n >= 0) *_x++ |= *_y++;\ 2248cf6c252SPaul Traina } 2258cf6c252SPaul Traina 2268cf6c252SPaul Traina static uset all_dom_sets; 2278cf6c252SPaul Traina static uset all_closure_sets; 2288cf6c252SPaul Traina static uset all_edge_sets; 2298cf6c252SPaul Traina 2308cf6c252SPaul Traina #ifndef MAX 2318cf6c252SPaul Traina #define MAX(a,b) ((a)>(b)?(a):(b)) 2328cf6c252SPaul Traina #endif 2338cf6c252SPaul Traina 2348cf6c252SPaul Traina static void 2358cf6c252SPaul Traina find_levels_r(b) 2368cf6c252SPaul Traina struct block *b; 2378cf6c252SPaul Traina { 2388cf6c252SPaul Traina int level; 2398cf6c252SPaul Traina 2408cf6c252SPaul Traina if (isMarked(b)) 2418cf6c252SPaul Traina return; 2428cf6c252SPaul Traina 2438cf6c252SPaul Traina Mark(b); 2448cf6c252SPaul Traina b->link = 0; 2458cf6c252SPaul Traina 2468cf6c252SPaul Traina if (JT(b)) { 2478cf6c252SPaul Traina find_levels_r(JT(b)); 2488cf6c252SPaul Traina find_levels_r(JF(b)); 2498cf6c252SPaul Traina level = MAX(JT(b)->level, JF(b)->level) + 1; 2508cf6c252SPaul Traina } else 2518cf6c252SPaul Traina level = 0; 2528cf6c252SPaul Traina b->level = level; 2538cf6c252SPaul Traina b->link = levels[level]; 2548cf6c252SPaul Traina levels[level] = b; 2558cf6c252SPaul Traina } 2568cf6c252SPaul Traina 2578cf6c252SPaul Traina /* 2588cf6c252SPaul Traina * Level graph. The levels go from 0 at the leaves to 2598cf6c252SPaul Traina * N_LEVELS at the root. The levels[] array points to the 2608cf6c252SPaul Traina * first node of the level list, whose elements are linked 2618cf6c252SPaul Traina * with the 'link' field of the struct block. 2628cf6c252SPaul Traina */ 2638cf6c252SPaul Traina static void 2648cf6c252SPaul Traina find_levels(root) 2658cf6c252SPaul Traina struct block *root; 2668cf6c252SPaul Traina { 2678cf6c252SPaul Traina memset((char *)levels, 0, n_blocks * sizeof(*levels)); 2688cf6c252SPaul Traina unMarkAll(); 2698cf6c252SPaul Traina find_levels_r(root); 2708cf6c252SPaul Traina } 2718cf6c252SPaul Traina 2728cf6c252SPaul Traina /* 2738cf6c252SPaul Traina * Find dominator relationships. 2748cf6c252SPaul Traina * Assumes graph has been leveled. 2758cf6c252SPaul Traina */ 2768cf6c252SPaul Traina static void 2778cf6c252SPaul Traina find_dom(root) 2788cf6c252SPaul Traina struct block *root; 2798cf6c252SPaul Traina { 2808cf6c252SPaul Traina int i; 2818cf6c252SPaul Traina struct block *b; 2828cf6c252SPaul Traina bpf_u_int32 *x; 2838cf6c252SPaul Traina 2848cf6c252SPaul Traina /* 2858cf6c252SPaul Traina * Initialize sets to contain all nodes. 2868cf6c252SPaul Traina */ 2878cf6c252SPaul Traina x = all_dom_sets; 2888cf6c252SPaul Traina i = n_blocks * nodewords; 2898cf6c252SPaul Traina while (--i >= 0) 2908cf6c252SPaul Traina *x++ = ~0; 2918cf6c252SPaul Traina /* Root starts off empty. */ 2928cf6c252SPaul Traina for (i = nodewords; --i >= 0;) 2938cf6c252SPaul Traina root->dom[i] = 0; 2948cf6c252SPaul Traina 2958cf6c252SPaul Traina /* root->level is the highest level no found. */ 2968cf6c252SPaul Traina for (i = root->level; i >= 0; --i) { 2978cf6c252SPaul Traina for (b = levels[i]; b; b = b->link) { 2988cf6c252SPaul Traina SET_INSERT(b->dom, b->id); 2998cf6c252SPaul Traina if (JT(b) == 0) 3008cf6c252SPaul Traina continue; 3018cf6c252SPaul Traina SET_INTERSECT(JT(b)->dom, b->dom, nodewords); 3028cf6c252SPaul Traina SET_INTERSECT(JF(b)->dom, b->dom, nodewords); 3038cf6c252SPaul Traina } 3048cf6c252SPaul Traina } 3058cf6c252SPaul Traina } 3068cf6c252SPaul Traina 3078cf6c252SPaul Traina static void 3088cf6c252SPaul Traina propedom(ep) 3098cf6c252SPaul Traina struct edge *ep; 3108cf6c252SPaul Traina { 3118cf6c252SPaul Traina SET_INSERT(ep->edom, ep->id); 3128cf6c252SPaul Traina if (ep->succ) { 3138cf6c252SPaul Traina SET_INTERSECT(ep->succ->et.edom, ep->edom, edgewords); 3148cf6c252SPaul Traina SET_INTERSECT(ep->succ->ef.edom, ep->edom, edgewords); 3158cf6c252SPaul Traina } 3168cf6c252SPaul Traina } 3178cf6c252SPaul Traina 3188cf6c252SPaul Traina /* 3198cf6c252SPaul Traina * Compute edge dominators. 3208cf6c252SPaul Traina * Assumes graph has been leveled and predecessors established. 3218cf6c252SPaul Traina */ 3228cf6c252SPaul Traina static void 3238cf6c252SPaul Traina find_edom(root) 3248cf6c252SPaul Traina struct block *root; 3258cf6c252SPaul Traina { 3268cf6c252SPaul Traina int i; 3278cf6c252SPaul Traina uset x; 3288cf6c252SPaul Traina struct block *b; 3298cf6c252SPaul Traina 3308cf6c252SPaul Traina x = all_edge_sets; 3318cf6c252SPaul Traina for (i = n_edges * edgewords; --i >= 0; ) 3328cf6c252SPaul Traina x[i] = ~0; 3338cf6c252SPaul Traina 3348cf6c252SPaul Traina /* root->level is the highest level no found. */ 3358cf6c252SPaul Traina memset(root->et.edom, 0, edgewords * sizeof(*(uset)0)); 3368cf6c252SPaul Traina memset(root->ef.edom, 0, edgewords * sizeof(*(uset)0)); 3378cf6c252SPaul Traina for (i = root->level; i >= 0; --i) { 3388cf6c252SPaul Traina for (b = levels[i]; b != 0; b = b->link) { 3398cf6c252SPaul Traina propedom(&b->et); 3408cf6c252SPaul Traina propedom(&b->ef); 3418cf6c252SPaul Traina } 3428cf6c252SPaul Traina } 3438cf6c252SPaul Traina } 3448cf6c252SPaul Traina 3458cf6c252SPaul Traina /* 3468cf6c252SPaul Traina * Find the backwards transitive closure of the flow graph. These sets 3478cf6c252SPaul Traina * are backwards in the sense that we find the set of nodes that reach 3488cf6c252SPaul Traina * a given node, not the set of nodes that can be reached by a node. 3498cf6c252SPaul Traina * 3508cf6c252SPaul Traina * Assumes graph has been leveled. 3518cf6c252SPaul Traina */ 3528cf6c252SPaul Traina static void 3538cf6c252SPaul Traina find_closure(root) 3548cf6c252SPaul Traina struct block *root; 3558cf6c252SPaul Traina { 3568cf6c252SPaul Traina int i; 3578cf6c252SPaul Traina struct block *b; 3588cf6c252SPaul Traina 3598cf6c252SPaul Traina /* 3608cf6c252SPaul Traina * Initialize sets to contain no nodes. 3618cf6c252SPaul Traina */ 3628cf6c252SPaul Traina memset((char *)all_closure_sets, 0, 3638cf6c252SPaul Traina n_blocks * nodewords * sizeof(*all_closure_sets)); 3648cf6c252SPaul Traina 3658cf6c252SPaul Traina /* root->level is the highest level no found. */ 3668cf6c252SPaul Traina for (i = root->level; i >= 0; --i) { 3678cf6c252SPaul Traina for (b = levels[i]; b; b = b->link) { 3688cf6c252SPaul Traina SET_INSERT(b->closure, b->id); 3698cf6c252SPaul Traina if (JT(b) == 0) 3708cf6c252SPaul Traina continue; 3718cf6c252SPaul Traina SET_UNION(JT(b)->closure, b->closure, nodewords); 3728cf6c252SPaul Traina SET_UNION(JF(b)->closure, b->closure, nodewords); 3738cf6c252SPaul Traina } 3748cf6c252SPaul Traina } 3758cf6c252SPaul Traina } 3768cf6c252SPaul Traina 3778cf6c252SPaul Traina /* 3788cf6c252SPaul Traina * Return the register number that is used by s. If A and X are both 3798cf6c252SPaul Traina * used, return AX_ATOM. If no register is used, return -1. 3808cf6c252SPaul Traina * 3818cf6c252SPaul Traina * The implementation should probably change to an array access. 3828cf6c252SPaul Traina */ 3838cf6c252SPaul Traina static int 3848cf6c252SPaul Traina atomuse(s) 3858cf6c252SPaul Traina struct stmt *s; 3868cf6c252SPaul Traina { 3878cf6c252SPaul Traina register int c = s->code; 3888cf6c252SPaul Traina 3898cf6c252SPaul Traina if (c == NOP) 3908cf6c252SPaul Traina return -1; 3918cf6c252SPaul Traina 3928cf6c252SPaul Traina switch (BPF_CLASS(c)) { 3938cf6c252SPaul Traina 3948cf6c252SPaul Traina case BPF_RET: 3958cf6c252SPaul Traina return (BPF_RVAL(c) == BPF_A) ? A_ATOM : 3968cf6c252SPaul Traina (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1; 3978cf6c252SPaul Traina 3988cf6c252SPaul Traina case BPF_LD: 3998cf6c252SPaul Traina case BPF_LDX: 4008cf6c252SPaul Traina return (BPF_MODE(c) == BPF_IND) ? X_ATOM : 4018cf6c252SPaul Traina (BPF_MODE(c) == BPF_MEM) ? s->k : -1; 4028cf6c252SPaul Traina 4038cf6c252SPaul Traina case BPF_ST: 4048cf6c252SPaul Traina return A_ATOM; 4058cf6c252SPaul Traina 4068cf6c252SPaul Traina case BPF_STX: 4078cf6c252SPaul Traina return X_ATOM; 4088cf6c252SPaul Traina 4098cf6c252SPaul Traina case BPF_JMP: 4108cf6c252SPaul Traina case BPF_ALU: 4118cf6c252SPaul Traina if (BPF_SRC(c) == BPF_X) 4128cf6c252SPaul Traina return AX_ATOM; 4138cf6c252SPaul Traina return A_ATOM; 4148cf6c252SPaul Traina 4158cf6c252SPaul Traina case BPF_MISC: 4168cf6c252SPaul Traina return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM; 4178cf6c252SPaul Traina } 4188cf6c252SPaul Traina abort(); 4198cf6c252SPaul Traina /* NOTREACHED */ 4208cf6c252SPaul Traina } 4218cf6c252SPaul Traina 4228cf6c252SPaul Traina /* 4238cf6c252SPaul Traina * Return the register number that is defined by 's'. We assume that 4248cf6c252SPaul Traina * a single stmt cannot define more than one register. If no register 4258cf6c252SPaul Traina * is defined, return -1. 4268cf6c252SPaul Traina * 4278cf6c252SPaul Traina * The implementation should probably change to an array access. 4288cf6c252SPaul Traina */ 4298cf6c252SPaul Traina static int 4308cf6c252SPaul Traina atomdef(s) 4318cf6c252SPaul Traina struct stmt *s; 4328cf6c252SPaul Traina { 4338cf6c252SPaul Traina if (s->code == NOP) 4348cf6c252SPaul Traina return -1; 4358cf6c252SPaul Traina 4368cf6c252SPaul Traina switch (BPF_CLASS(s->code)) { 4378cf6c252SPaul Traina 4388cf6c252SPaul Traina case BPF_LD: 4398cf6c252SPaul Traina case BPF_ALU: 4408cf6c252SPaul Traina return A_ATOM; 4418cf6c252SPaul Traina 4428cf6c252SPaul Traina case BPF_LDX: 4438cf6c252SPaul Traina return X_ATOM; 4448cf6c252SPaul Traina 4458cf6c252SPaul Traina case BPF_ST: 4468cf6c252SPaul Traina case BPF_STX: 4478cf6c252SPaul Traina return s->k; 4488cf6c252SPaul Traina 4498cf6c252SPaul Traina case BPF_MISC: 4508cf6c252SPaul Traina return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM; 4518cf6c252SPaul Traina } 4528cf6c252SPaul Traina return -1; 4538cf6c252SPaul Traina } 4548cf6c252SPaul Traina 45504fb2745SSam Leffler /* 45604fb2745SSam Leffler * Compute the sets of registers used, defined, and killed by 'b'. 45704fb2745SSam Leffler * 45804fb2745SSam Leffler * "Used" means that a statement in 'b' uses the register before any 45904fb2745SSam Leffler * statement in 'b' defines it, i.e. it uses the value left in 46004fb2745SSam Leffler * that register by a predecessor block of this block. 46104fb2745SSam Leffler * "Defined" means that a statement in 'b' defines it. 46204fb2745SSam Leffler * "Killed" means that a statement in 'b' defines it before any 46304fb2745SSam Leffler * statement in 'b' uses it, i.e. it kills the value left in that 46404fb2745SSam Leffler * register by a predecessor block of this block. 46504fb2745SSam Leffler */ 4668cf6c252SPaul Traina static void 4678cf6c252SPaul Traina compute_local_ud(b) 4688cf6c252SPaul Traina struct block *b; 4698cf6c252SPaul Traina { 4708cf6c252SPaul Traina struct slist *s; 4718cf6c252SPaul Traina atomset def = 0, use = 0, kill = 0; 4728cf6c252SPaul Traina int atom; 4738cf6c252SPaul Traina 4748cf6c252SPaul Traina for (s = b->stmts; s; s = s->next) { 4758cf6c252SPaul Traina if (s->s.code == NOP) 4768cf6c252SPaul Traina continue; 4778cf6c252SPaul Traina atom = atomuse(&s->s); 4788cf6c252SPaul Traina if (atom >= 0) { 4798cf6c252SPaul Traina if (atom == AX_ATOM) { 4808cf6c252SPaul Traina if (!ATOMELEM(def, X_ATOM)) 4818cf6c252SPaul Traina use |= ATOMMASK(X_ATOM); 4828cf6c252SPaul Traina if (!ATOMELEM(def, A_ATOM)) 4838cf6c252SPaul Traina use |= ATOMMASK(A_ATOM); 4848cf6c252SPaul Traina } 4858cf6c252SPaul Traina else if (atom < N_ATOMS) { 4868cf6c252SPaul Traina if (!ATOMELEM(def, atom)) 4878cf6c252SPaul Traina use |= ATOMMASK(atom); 4888cf6c252SPaul Traina } 4898cf6c252SPaul Traina else 4908cf6c252SPaul Traina abort(); 4918cf6c252SPaul Traina } 4928cf6c252SPaul Traina atom = atomdef(&s->s); 4938cf6c252SPaul Traina if (atom >= 0) { 4948cf6c252SPaul Traina if (!ATOMELEM(use, atom)) 4958cf6c252SPaul Traina kill |= ATOMMASK(atom); 4968cf6c252SPaul Traina def |= ATOMMASK(atom); 4978cf6c252SPaul Traina } 4988cf6c252SPaul Traina } 49904fb2745SSam Leffler if (BPF_CLASS(b->s.code) == BPF_JMP) { 50004fb2745SSam Leffler /* 50104fb2745SSam Leffler * XXX - what about RET? 50204fb2745SSam Leffler */ 50304fb2745SSam Leffler atom = atomuse(&b->s); 50404fb2745SSam Leffler if (atom >= 0) { 50504fb2745SSam Leffler if (atom == AX_ATOM) { 50604fb2745SSam Leffler if (!ATOMELEM(def, X_ATOM)) 50704fb2745SSam Leffler use |= ATOMMASK(X_ATOM); 50804fb2745SSam Leffler if (!ATOMELEM(def, A_ATOM)) 5098cf6c252SPaul Traina use |= ATOMMASK(A_ATOM); 51004fb2745SSam Leffler } 51104fb2745SSam Leffler else if (atom < N_ATOMS) { 51204fb2745SSam Leffler if (!ATOMELEM(def, atom)) 51304fb2745SSam Leffler use |= ATOMMASK(atom); 51404fb2745SSam Leffler } 51504fb2745SSam Leffler else 51604fb2745SSam Leffler abort(); 51704fb2745SSam Leffler } 51804fb2745SSam Leffler } 5198cf6c252SPaul Traina 5208cf6c252SPaul Traina b->def = def; 5218cf6c252SPaul Traina b->kill = kill; 5228cf6c252SPaul Traina b->in_use = use; 5238cf6c252SPaul Traina } 5248cf6c252SPaul Traina 5258cf6c252SPaul Traina /* 5268cf6c252SPaul Traina * Assume graph is already leveled. 5278cf6c252SPaul Traina */ 5288cf6c252SPaul Traina static void 5298cf6c252SPaul Traina find_ud(root) 5308cf6c252SPaul Traina struct block *root; 5318cf6c252SPaul Traina { 5328cf6c252SPaul Traina int i, maxlevel; 5338cf6c252SPaul Traina struct block *p; 5348cf6c252SPaul Traina 5358cf6c252SPaul Traina /* 5368cf6c252SPaul Traina * root->level is the highest level no found; 5378cf6c252SPaul Traina * count down from there. 5388cf6c252SPaul Traina */ 5398cf6c252SPaul Traina maxlevel = root->level; 5408cf6c252SPaul Traina for (i = maxlevel; i >= 0; --i) 5418cf6c252SPaul Traina for (p = levels[i]; p; p = p->link) { 5428cf6c252SPaul Traina compute_local_ud(p); 5438cf6c252SPaul Traina p->out_use = 0; 5448cf6c252SPaul Traina } 5458cf6c252SPaul Traina 5468cf6c252SPaul Traina for (i = 1; i <= maxlevel; ++i) { 5478cf6c252SPaul Traina for (p = levels[i]; p; p = p->link) { 5488cf6c252SPaul Traina p->out_use |= JT(p)->in_use | JF(p)->in_use; 5498cf6c252SPaul Traina p->in_use |= p->out_use &~ p->kill; 5508cf6c252SPaul Traina } 5518cf6c252SPaul Traina } 5528cf6c252SPaul Traina } 5538cf6c252SPaul Traina 5548cf6c252SPaul Traina /* 5558cf6c252SPaul Traina * These data structures are used in a Cocke and Shwarz style 5568cf6c252SPaul Traina * value numbering scheme. Since the flowgraph is acyclic, 5578cf6c252SPaul Traina * exit values can be propagated from a node's predecessors 5588cf6c252SPaul Traina * provided it is uniquely defined. 5598cf6c252SPaul Traina */ 5608cf6c252SPaul Traina struct valnode { 5618cf6c252SPaul Traina int code; 5628cf6c252SPaul Traina int v0, v1; 5638cf6c252SPaul Traina int val; 5648cf6c252SPaul Traina struct valnode *next; 5658cf6c252SPaul Traina }; 5668cf6c252SPaul Traina 5678cf6c252SPaul Traina #define MODULUS 213 5688cf6c252SPaul Traina static struct valnode *hashtbl[MODULUS]; 5698cf6c252SPaul Traina static int curval; 5708cf6c252SPaul Traina static int maxval; 5718cf6c252SPaul Traina 5728cf6c252SPaul Traina /* Integer constants mapped with the load immediate opcode. */ 5738cf6c252SPaul Traina #define K(i) F(BPF_LD|BPF_IMM|BPF_W, i, 0L) 5748cf6c252SPaul Traina 5758cf6c252SPaul Traina struct vmapinfo { 5768cf6c252SPaul Traina int is_const; 5778cf6c252SPaul Traina bpf_int32 const_val; 5788cf6c252SPaul Traina }; 5798cf6c252SPaul Traina 5808cf6c252SPaul Traina struct vmapinfo *vmap; 5818cf6c252SPaul Traina struct valnode *vnode_base; 5828cf6c252SPaul Traina struct valnode *next_vnode; 5838cf6c252SPaul Traina 5848cf6c252SPaul Traina static void 5858cf6c252SPaul Traina init_val() 5868cf6c252SPaul Traina { 5878cf6c252SPaul Traina curval = 0; 5888cf6c252SPaul Traina next_vnode = vnode_base; 5898cf6c252SPaul Traina memset((char *)vmap, 0, maxval * sizeof(*vmap)); 5908cf6c252SPaul Traina memset((char *)hashtbl, 0, sizeof hashtbl); 5918cf6c252SPaul Traina } 5928cf6c252SPaul Traina 5938cf6c252SPaul Traina /* Because we really don't have an IR, this stuff is a little messy. */ 5948cf6c252SPaul Traina static int 5958cf6c252SPaul Traina F(code, v0, v1) 5968cf6c252SPaul Traina int code; 5978cf6c252SPaul Traina int v0, v1; 5988cf6c252SPaul Traina { 5998cf6c252SPaul Traina u_int hash; 6008cf6c252SPaul Traina int val; 6018cf6c252SPaul Traina struct valnode *p; 6028cf6c252SPaul Traina 6038cf6c252SPaul Traina hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8); 6048cf6c252SPaul Traina hash %= MODULUS; 6058cf6c252SPaul Traina 6068cf6c252SPaul Traina for (p = hashtbl[hash]; p; p = p->next) 6078cf6c252SPaul Traina if (p->code == code && p->v0 == v0 && p->v1 == v1) 6088cf6c252SPaul Traina return p->val; 6098cf6c252SPaul Traina 6108cf6c252SPaul Traina val = ++curval; 6118cf6c252SPaul Traina if (BPF_MODE(code) == BPF_IMM && 6128cf6c252SPaul Traina (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) { 6138cf6c252SPaul Traina vmap[val].const_val = v0; 6148cf6c252SPaul Traina vmap[val].is_const = 1; 6158cf6c252SPaul Traina } 6168cf6c252SPaul Traina p = next_vnode++; 6178cf6c252SPaul Traina p->val = val; 6188cf6c252SPaul Traina p->code = code; 6198cf6c252SPaul Traina p->v0 = v0; 6208cf6c252SPaul Traina p->v1 = v1; 6218cf6c252SPaul Traina p->next = hashtbl[hash]; 6228cf6c252SPaul Traina hashtbl[hash] = p; 6238cf6c252SPaul Traina 6248cf6c252SPaul Traina return val; 6258cf6c252SPaul Traina } 6268cf6c252SPaul Traina 6278cf6c252SPaul Traina static inline void 6288cf6c252SPaul Traina vstore(s, valp, newval, alter) 6298cf6c252SPaul Traina struct stmt *s; 6308cf6c252SPaul Traina int *valp; 6318cf6c252SPaul Traina int newval; 6328cf6c252SPaul Traina int alter; 6338cf6c252SPaul Traina { 6348cf6c252SPaul Traina if (alter && *valp == newval) 6358cf6c252SPaul Traina s->code = NOP; 6368cf6c252SPaul Traina else 6378cf6c252SPaul Traina *valp = newval; 6388cf6c252SPaul Traina } 6398cf6c252SPaul Traina 6408cf6c252SPaul Traina static void 6418cf6c252SPaul Traina fold_op(s, v0, v1) 6428cf6c252SPaul Traina struct stmt *s; 6438cf6c252SPaul Traina int v0, v1; 6448cf6c252SPaul Traina { 645ef96d74fSMax Laier bpf_u_int32 a, b; 6468cf6c252SPaul Traina 6478cf6c252SPaul Traina a = vmap[v0].const_val; 6488cf6c252SPaul Traina b = vmap[v1].const_val; 6498cf6c252SPaul Traina 6508cf6c252SPaul Traina switch (BPF_OP(s->code)) { 6518cf6c252SPaul Traina case BPF_ADD: 6528cf6c252SPaul Traina a += b; 6538cf6c252SPaul Traina break; 6548cf6c252SPaul Traina 6558cf6c252SPaul Traina case BPF_SUB: 6568cf6c252SPaul Traina a -= b; 6578cf6c252SPaul Traina break; 6588cf6c252SPaul Traina 6598cf6c252SPaul Traina case BPF_MUL: 6608cf6c252SPaul Traina a *= b; 6618cf6c252SPaul Traina break; 6628cf6c252SPaul Traina 6638cf6c252SPaul Traina case BPF_DIV: 6648cf6c252SPaul Traina if (b == 0) 6658cf6c252SPaul Traina bpf_error("division by zero"); 6668cf6c252SPaul Traina a /= b; 6678cf6c252SPaul Traina break; 6688cf6c252SPaul Traina 6698cf6c252SPaul Traina case BPF_AND: 6708cf6c252SPaul Traina a &= b; 6718cf6c252SPaul Traina break; 6728cf6c252SPaul Traina 6738cf6c252SPaul Traina case BPF_OR: 6748cf6c252SPaul Traina a |= b; 6758cf6c252SPaul Traina break; 6768cf6c252SPaul Traina 6778cf6c252SPaul Traina case BPF_LSH: 6788cf6c252SPaul Traina a <<= b; 6798cf6c252SPaul Traina break; 6808cf6c252SPaul Traina 6818cf6c252SPaul Traina case BPF_RSH: 6828cf6c252SPaul Traina a >>= b; 6838cf6c252SPaul Traina break; 6848cf6c252SPaul Traina 6858cf6c252SPaul Traina case BPF_NEG: 6868cf6c252SPaul Traina a = -a; 6878cf6c252SPaul Traina break; 6888cf6c252SPaul Traina 6898cf6c252SPaul Traina default: 6908cf6c252SPaul Traina abort(); 6918cf6c252SPaul Traina } 6928cf6c252SPaul Traina s->k = a; 6938cf6c252SPaul Traina s->code = BPF_LD|BPF_IMM; 6948cf6c252SPaul Traina done = 0; 6958cf6c252SPaul Traina } 6968cf6c252SPaul Traina 6978cf6c252SPaul Traina static inline struct slist * 6988cf6c252SPaul Traina this_op(s) 6998cf6c252SPaul Traina struct slist *s; 7008cf6c252SPaul Traina { 7018cf6c252SPaul Traina while (s != 0 && s->s.code == NOP) 7028cf6c252SPaul Traina s = s->next; 7038cf6c252SPaul Traina return s; 7048cf6c252SPaul Traina } 7058cf6c252SPaul Traina 7068cf6c252SPaul Traina static void 7078cf6c252SPaul Traina opt_not(b) 7088cf6c252SPaul Traina struct block *b; 7098cf6c252SPaul Traina { 7108cf6c252SPaul Traina struct block *tmp = JT(b); 7118cf6c252SPaul Traina 7128cf6c252SPaul Traina JT(b) = JF(b); 7138cf6c252SPaul Traina JF(b) = tmp; 7148cf6c252SPaul Traina } 7158cf6c252SPaul Traina 7168cf6c252SPaul Traina static void 7178cf6c252SPaul Traina opt_peep(b) 7188cf6c252SPaul Traina struct block *b; 7198cf6c252SPaul Traina { 7208cf6c252SPaul Traina struct slist *s; 7218cf6c252SPaul Traina struct slist *next, *last; 7228cf6c252SPaul Traina int val; 7238cf6c252SPaul Traina 7248cf6c252SPaul Traina s = b->stmts; 7258cf6c252SPaul Traina if (s == 0) 7268cf6c252SPaul Traina return; 7278cf6c252SPaul Traina 7288cf6c252SPaul Traina last = s; 72904fb2745SSam Leffler for (/*empty*/; /*empty*/; s = next) { 73004fb2745SSam Leffler /* 73104fb2745SSam Leffler * Skip over nops. 73204fb2745SSam Leffler */ 7338cf6c252SPaul Traina s = this_op(s); 7348cf6c252SPaul Traina if (s == 0) 73504fb2745SSam Leffler break; /* nothing left in the block */ 73604fb2745SSam Leffler 73704fb2745SSam Leffler /* 73804fb2745SSam Leffler * Find the next real instruction after that one 73904fb2745SSam Leffler * (skipping nops). 74004fb2745SSam Leffler */ 7418cf6c252SPaul Traina next = this_op(s->next); 7428cf6c252SPaul Traina if (next == 0) 74304fb2745SSam Leffler break; /* no next instruction */ 7448cf6c252SPaul Traina last = next; 7458cf6c252SPaul Traina 7468cf6c252SPaul Traina /* 7478cf6c252SPaul Traina * st M[k] --> st M[k] 7488cf6c252SPaul Traina * ldx M[k] tax 7498cf6c252SPaul Traina */ 7508cf6c252SPaul Traina if (s->s.code == BPF_ST && 7518cf6c252SPaul Traina next->s.code == (BPF_LDX|BPF_MEM) && 7528cf6c252SPaul Traina s->s.k == next->s.k) { 7538cf6c252SPaul Traina done = 0; 7548cf6c252SPaul Traina next->s.code = BPF_MISC|BPF_TAX; 7558cf6c252SPaul Traina } 7568cf6c252SPaul Traina /* 7578cf6c252SPaul Traina * ld #k --> ldx #k 7588cf6c252SPaul Traina * tax txa 7598cf6c252SPaul Traina */ 7608cf6c252SPaul Traina if (s->s.code == (BPF_LD|BPF_IMM) && 7618cf6c252SPaul Traina next->s.code == (BPF_MISC|BPF_TAX)) { 7628cf6c252SPaul Traina s->s.code = BPF_LDX|BPF_IMM; 7638cf6c252SPaul Traina next->s.code = BPF_MISC|BPF_TXA; 7648cf6c252SPaul Traina done = 0; 7658cf6c252SPaul Traina } 7668cf6c252SPaul Traina /* 7678cf6c252SPaul Traina * This is an ugly special case, but it happens 7688cf6c252SPaul Traina * when you say tcp[k] or udp[k] where k is a constant. 7698cf6c252SPaul Traina */ 7708cf6c252SPaul Traina if (s->s.code == (BPF_LD|BPF_IMM)) { 7718cf6c252SPaul Traina struct slist *add, *tax, *ild; 7728cf6c252SPaul Traina 7738cf6c252SPaul Traina /* 7748cf6c252SPaul Traina * Check that X isn't used on exit from this 7758cf6c252SPaul Traina * block (which the optimizer might cause). 7768cf6c252SPaul Traina * We know the code generator won't generate 7778cf6c252SPaul Traina * any local dependencies. 7788cf6c252SPaul Traina */ 7798cf6c252SPaul Traina if (ATOMELEM(b->out_use, X_ATOM)) 78004fb2745SSam Leffler continue; 7818cf6c252SPaul Traina 78204fb2745SSam Leffler /* 78304fb2745SSam Leffler * Check that the instruction following the ldi 78404fb2745SSam Leffler * is an addx, or it's an ldxms with an addx 78504fb2745SSam Leffler * following it (with 0 or more nops between the 78604fb2745SSam Leffler * ldxms and addx). 78704fb2745SSam Leffler */ 7888cf6c252SPaul Traina if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B)) 7898cf6c252SPaul Traina add = next; 7908cf6c252SPaul Traina else 7918cf6c252SPaul Traina add = this_op(next->next); 7928cf6c252SPaul Traina if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X)) 79304fb2745SSam Leffler continue; 7948cf6c252SPaul Traina 79504fb2745SSam Leffler /* 79604fb2745SSam Leffler * Check that a tax follows that (with 0 or more 79704fb2745SSam Leffler * nops between them). 79804fb2745SSam Leffler */ 7998cf6c252SPaul Traina tax = this_op(add->next); 8008cf6c252SPaul Traina if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX)) 80104fb2745SSam Leffler continue; 8028cf6c252SPaul Traina 80304fb2745SSam Leffler /* 80404fb2745SSam Leffler * Check that an ild follows that (with 0 or more 80504fb2745SSam Leffler * nops between them). 80604fb2745SSam Leffler */ 8078cf6c252SPaul Traina ild = this_op(tax->next); 8088cf6c252SPaul Traina if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD || 8098cf6c252SPaul Traina BPF_MODE(ild->s.code) != BPF_IND) 81004fb2745SSam Leffler continue; 8118cf6c252SPaul Traina /* 8128cf6c252SPaul Traina * We want to turn this sequence: 8138cf6c252SPaul Traina * 8148cf6c252SPaul Traina * (004) ldi #0x2 {s} 8158cf6c252SPaul Traina * (005) ldxms [14] {next} -- optional 8168cf6c252SPaul Traina * (006) addx {add} 8178cf6c252SPaul Traina * (007) tax {tax} 8188cf6c252SPaul Traina * (008) ild [x+0] {ild} 8198cf6c252SPaul Traina * 8208cf6c252SPaul Traina * into this sequence: 8218cf6c252SPaul Traina * 8228cf6c252SPaul Traina * (004) nop 8238cf6c252SPaul Traina * (005) ldxms [14] 8248cf6c252SPaul Traina * (006) nop 8258cf6c252SPaul Traina * (007) nop 8268cf6c252SPaul Traina * (008) ild [x+2] 8278cf6c252SPaul Traina * 82804fb2745SSam Leffler * XXX We need to check that X is not 82904fb2745SSam Leffler * subsequently used, because we want to change 83004fb2745SSam Leffler * what'll be in it after this sequence. 83104fb2745SSam Leffler * 83204fb2745SSam Leffler * We know we can eliminate the accumulator 83304fb2745SSam Leffler * modifications earlier in the sequence since 83404fb2745SSam Leffler * it is defined by the last stmt of this sequence 83504fb2745SSam Leffler * (i.e., the last statement of the sequence loads 83604fb2745SSam Leffler * a value into the accumulator, so we can eliminate 83704fb2745SSam Leffler * earlier operations on the accumulator). 8388cf6c252SPaul Traina */ 8398cf6c252SPaul Traina ild->s.k += s->s.k; 8408cf6c252SPaul Traina s->s.code = NOP; 8418cf6c252SPaul Traina add->s.code = NOP; 8428cf6c252SPaul Traina tax->s.code = NOP; 8438cf6c252SPaul Traina done = 0; 8448cf6c252SPaul Traina } 8458cf6c252SPaul Traina } 8468cf6c252SPaul Traina /* 84704fb2745SSam Leffler * If the comparison at the end of a block is an equality 84804fb2745SSam Leffler * comparison against a constant, and nobody uses the value 84904fb2745SSam Leffler * we leave in the A register at the end of a block, and 85004fb2745SSam Leffler * the operation preceding the comparison is an arithmetic 85104fb2745SSam Leffler * operation, we can sometime optimize it away. 8528cf6c252SPaul Traina */ 85304fb2745SSam Leffler if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) && 8548cf6c252SPaul Traina !ATOMELEM(b->out_use, A_ATOM)) { 85504fb2745SSam Leffler /* 85604fb2745SSam Leffler * We can optimize away certain subtractions of the 85704fb2745SSam Leffler * X register. 85804fb2745SSam Leffler */ 85904fb2745SSam Leffler if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) { 8608cf6c252SPaul Traina val = b->val[X_ATOM]; 8618cf6c252SPaul Traina if (vmap[val].is_const) { 862feb4ecdbSBruce M Simpson /* 86304fb2745SSam Leffler * If we have a subtract to do a comparison, 86404fb2745SSam Leffler * and the X register is a known constant, 86504fb2745SSam Leffler * we can merge this value into the 86604fb2745SSam Leffler * comparison: 86704fb2745SSam Leffler * 868feb4ecdbSBruce M Simpson * sub x -> nop 869feb4ecdbSBruce M Simpson * jeq #y jeq #(x+y) 870feb4ecdbSBruce M Simpson */ 8718cf6c252SPaul Traina b->s.k += vmap[val].const_val; 8728cf6c252SPaul Traina last->s.code = NOP; 8738cf6c252SPaul Traina done = 0; 8748cf6c252SPaul Traina } else if (b->s.k == 0) { 8758cf6c252SPaul Traina /* 87604fb2745SSam Leffler * If the X register isn't a constant, 87704fb2745SSam Leffler * and the comparison in the test is 87804fb2745SSam Leffler * against 0, we can compare with the 87904fb2745SSam Leffler * X register, instead: 88004fb2745SSam Leffler * 88104fb2745SSam Leffler * sub x -> nop 88204fb2745SSam Leffler * jeq #0 jeq x 8838cf6c252SPaul Traina */ 8848cf6c252SPaul Traina last->s.code = NOP; 88504fb2745SSam Leffler b->s.code = BPF_JMP|BPF_JEQ|BPF_X; 8868cf6c252SPaul Traina done = 0; 8878cf6c252SPaul Traina } 8888cf6c252SPaul Traina } 8898cf6c252SPaul Traina /* 89004fb2745SSam Leffler * Likewise, a constant subtract can be simplified: 89104fb2745SSam Leffler * 89204fb2745SSam Leffler * sub #x -> nop 89304fb2745SSam Leffler * jeq #y -> jeq #(x+y) 8948cf6c252SPaul Traina */ 89504fb2745SSam Leffler else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) { 8968cf6c252SPaul Traina last->s.code = NOP; 897feb4ecdbSBruce M Simpson b->s.k += last->s.k; 8988cf6c252SPaul Traina done = 0; 8998cf6c252SPaul Traina } 9008cf6c252SPaul Traina /* 90104fb2745SSam Leffler * And, similarly, a constant AND can be simplified 90204fb2745SSam Leffler * if we're testing against 0, i.e.: 90304fb2745SSam Leffler * 9048cf6c252SPaul Traina * and #k nop 9058cf6c252SPaul Traina * jeq #0 -> jset #k 9068cf6c252SPaul Traina */ 90704fb2745SSam Leffler else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) && 90804fb2745SSam Leffler b->s.k == 0) { 9098cf6c252SPaul Traina b->s.k = last->s.k; 9108cf6c252SPaul Traina b->s.code = BPF_JMP|BPF_K|BPF_JSET; 9118cf6c252SPaul Traina last->s.code = NOP; 9128cf6c252SPaul Traina done = 0; 9138cf6c252SPaul Traina opt_not(b); 9148cf6c252SPaul Traina } 91504fb2745SSam Leffler } 9168cf6c252SPaul Traina /* 917feb4ecdbSBruce M Simpson * jset #0 -> never 918feb4ecdbSBruce M Simpson * jset #ffffffff -> always 919feb4ecdbSBruce M Simpson */ 920feb4ecdbSBruce M Simpson if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) { 921feb4ecdbSBruce M Simpson if (b->s.k == 0) 922feb4ecdbSBruce M Simpson JT(b) = JF(b); 923feb4ecdbSBruce M Simpson if (b->s.k == 0xffffffff) 924feb4ecdbSBruce M Simpson JF(b) = JT(b); 925feb4ecdbSBruce M Simpson } 926feb4ecdbSBruce M Simpson /* 927a8e07101SRui Paulo * If we're comparing against the index register, and the index 928a8e07101SRui Paulo * register is a known constant, we can just compare against that 929a8e07101SRui Paulo * constant. 930a8e07101SRui Paulo */ 931a8e07101SRui Paulo val = b->val[X_ATOM]; 932a8e07101SRui Paulo if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) { 933a8e07101SRui Paulo bpf_int32 v = vmap[val].const_val; 934a8e07101SRui Paulo b->s.code &= ~BPF_X; 935a8e07101SRui Paulo b->s.k = v; 936a8e07101SRui Paulo } 937a8e07101SRui Paulo /* 9388cf6c252SPaul Traina * If the accumulator is a known constant, we can compute the 9398cf6c252SPaul Traina * comparison result. 9408cf6c252SPaul Traina */ 9418cf6c252SPaul Traina val = b->val[A_ATOM]; 9428cf6c252SPaul Traina if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) { 9438cf6c252SPaul Traina bpf_int32 v = vmap[val].const_val; 9448cf6c252SPaul Traina switch (BPF_OP(b->s.code)) { 9458cf6c252SPaul Traina 9468cf6c252SPaul Traina case BPF_JEQ: 9478cf6c252SPaul Traina v = v == b->s.k; 9488cf6c252SPaul Traina break; 9498cf6c252SPaul Traina 9508cf6c252SPaul Traina case BPF_JGT: 9518cf6c252SPaul Traina v = (unsigned)v > b->s.k; 9528cf6c252SPaul Traina break; 9538cf6c252SPaul Traina 9548cf6c252SPaul Traina case BPF_JGE: 9558cf6c252SPaul Traina v = (unsigned)v >= b->s.k; 9568cf6c252SPaul Traina break; 9578cf6c252SPaul Traina 9588cf6c252SPaul Traina case BPF_JSET: 9598cf6c252SPaul Traina v &= b->s.k; 9608cf6c252SPaul Traina break; 9618cf6c252SPaul Traina 9628cf6c252SPaul Traina default: 9638cf6c252SPaul Traina abort(); 9648cf6c252SPaul Traina } 9658cf6c252SPaul Traina if (JF(b) != JT(b)) 9668cf6c252SPaul Traina done = 0; 9678cf6c252SPaul Traina if (v) 9688cf6c252SPaul Traina JF(b) = JT(b); 9698cf6c252SPaul Traina else 9708cf6c252SPaul Traina JT(b) = JF(b); 9718cf6c252SPaul Traina } 9728cf6c252SPaul Traina } 9738cf6c252SPaul Traina 9748cf6c252SPaul Traina /* 9758cf6c252SPaul Traina * Compute the symbolic value of expression of 's', and update 9768cf6c252SPaul Traina * anything it defines in the value table 'val'. If 'alter' is true, 9778cf6c252SPaul Traina * do various optimizations. This code would be cleaner if symbolic 9788cf6c252SPaul Traina * evaluation and code transformations weren't folded together. 9798cf6c252SPaul Traina */ 9808cf6c252SPaul Traina static void 9818cf6c252SPaul Traina opt_stmt(s, val, alter) 9828cf6c252SPaul Traina struct stmt *s; 9838cf6c252SPaul Traina int val[]; 9848cf6c252SPaul Traina int alter; 9858cf6c252SPaul Traina { 9868cf6c252SPaul Traina int op; 9878cf6c252SPaul Traina int v; 9888cf6c252SPaul Traina 9898cf6c252SPaul Traina switch (s->code) { 9908cf6c252SPaul Traina 9918cf6c252SPaul Traina case BPF_LD|BPF_ABS|BPF_W: 9928cf6c252SPaul Traina case BPF_LD|BPF_ABS|BPF_H: 9938cf6c252SPaul Traina case BPF_LD|BPF_ABS|BPF_B: 9948cf6c252SPaul Traina v = F(s->code, s->k, 0L); 9958cf6c252SPaul Traina vstore(s, &val[A_ATOM], v, alter); 9968cf6c252SPaul Traina break; 9978cf6c252SPaul Traina 9988cf6c252SPaul Traina case BPF_LD|BPF_IND|BPF_W: 9998cf6c252SPaul Traina case BPF_LD|BPF_IND|BPF_H: 10008cf6c252SPaul Traina case BPF_LD|BPF_IND|BPF_B: 10018cf6c252SPaul Traina v = val[X_ATOM]; 10028cf6c252SPaul Traina if (alter && vmap[v].is_const) { 10038cf6c252SPaul Traina s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code); 10048cf6c252SPaul Traina s->k += vmap[v].const_val; 10058cf6c252SPaul Traina v = F(s->code, s->k, 0L); 10068cf6c252SPaul Traina done = 0; 10078cf6c252SPaul Traina } 10088cf6c252SPaul Traina else 10098cf6c252SPaul Traina v = F(s->code, s->k, v); 10108cf6c252SPaul Traina vstore(s, &val[A_ATOM], v, alter); 10118cf6c252SPaul Traina break; 10128cf6c252SPaul Traina 10138cf6c252SPaul Traina case BPF_LD|BPF_LEN: 10148cf6c252SPaul Traina v = F(s->code, 0L, 0L); 10158cf6c252SPaul Traina vstore(s, &val[A_ATOM], v, alter); 10168cf6c252SPaul Traina break; 10178cf6c252SPaul Traina 10188cf6c252SPaul Traina case BPF_LD|BPF_IMM: 10198cf6c252SPaul Traina v = K(s->k); 10208cf6c252SPaul Traina vstore(s, &val[A_ATOM], v, alter); 10218cf6c252SPaul Traina break; 10228cf6c252SPaul Traina 10238cf6c252SPaul Traina case BPF_LDX|BPF_IMM: 10248cf6c252SPaul Traina v = K(s->k); 10258cf6c252SPaul Traina vstore(s, &val[X_ATOM], v, alter); 10268cf6c252SPaul Traina break; 10278cf6c252SPaul Traina 10288cf6c252SPaul Traina case BPF_LDX|BPF_MSH|BPF_B: 10298cf6c252SPaul Traina v = F(s->code, s->k, 0L); 10308cf6c252SPaul Traina vstore(s, &val[X_ATOM], v, alter); 10318cf6c252SPaul Traina break; 10328cf6c252SPaul Traina 10338cf6c252SPaul Traina case BPF_ALU|BPF_NEG: 10348cf6c252SPaul Traina if (alter && vmap[val[A_ATOM]].is_const) { 10358cf6c252SPaul Traina s->code = BPF_LD|BPF_IMM; 10368cf6c252SPaul Traina s->k = -vmap[val[A_ATOM]].const_val; 10378cf6c252SPaul Traina val[A_ATOM] = K(s->k); 10388cf6c252SPaul Traina } 10398cf6c252SPaul Traina else 10408cf6c252SPaul Traina val[A_ATOM] = F(s->code, val[A_ATOM], 0L); 10418cf6c252SPaul Traina break; 10428cf6c252SPaul Traina 10438cf6c252SPaul Traina case BPF_ALU|BPF_ADD|BPF_K: 10448cf6c252SPaul Traina case BPF_ALU|BPF_SUB|BPF_K: 10458cf6c252SPaul Traina case BPF_ALU|BPF_MUL|BPF_K: 10468cf6c252SPaul Traina case BPF_ALU|BPF_DIV|BPF_K: 10478cf6c252SPaul Traina case BPF_ALU|BPF_AND|BPF_K: 10488cf6c252SPaul Traina case BPF_ALU|BPF_OR|BPF_K: 10498cf6c252SPaul Traina case BPF_ALU|BPF_LSH|BPF_K: 10508cf6c252SPaul Traina case BPF_ALU|BPF_RSH|BPF_K: 10518cf6c252SPaul Traina op = BPF_OP(s->code); 10528cf6c252SPaul Traina if (alter) { 10538cf6c252SPaul Traina if (s->k == 0) { 10540a94d38fSBill Fenner /* don't optimize away "sub #0" 10550a94d38fSBill Fenner * as it may be needed later to 10560a94d38fSBill Fenner * fixup the generated math code */ 10570a94d38fSBill Fenner if (op == BPF_ADD || 10588cf6c252SPaul Traina op == BPF_LSH || op == BPF_RSH || 10598cf6c252SPaul Traina op == BPF_OR) { 10608cf6c252SPaul Traina s->code = NOP; 10618cf6c252SPaul Traina break; 10628cf6c252SPaul Traina } 10638cf6c252SPaul Traina if (op == BPF_MUL || op == BPF_AND) { 10648cf6c252SPaul Traina s->code = BPF_LD|BPF_IMM; 10658cf6c252SPaul Traina val[A_ATOM] = K(s->k); 10668cf6c252SPaul Traina break; 10678cf6c252SPaul Traina } 10688cf6c252SPaul Traina } 10698cf6c252SPaul Traina if (vmap[val[A_ATOM]].is_const) { 10708cf6c252SPaul Traina fold_op(s, val[A_ATOM], K(s->k)); 10718cf6c252SPaul Traina val[A_ATOM] = K(s->k); 10728cf6c252SPaul Traina break; 10738cf6c252SPaul Traina } 10748cf6c252SPaul Traina } 10758cf6c252SPaul Traina val[A_ATOM] = F(s->code, val[A_ATOM], K(s->k)); 10768cf6c252SPaul Traina break; 10778cf6c252SPaul Traina 10788cf6c252SPaul Traina case BPF_ALU|BPF_ADD|BPF_X: 10798cf6c252SPaul Traina case BPF_ALU|BPF_SUB|BPF_X: 10808cf6c252SPaul Traina case BPF_ALU|BPF_MUL|BPF_X: 10818cf6c252SPaul Traina case BPF_ALU|BPF_DIV|BPF_X: 10828cf6c252SPaul Traina case BPF_ALU|BPF_AND|BPF_X: 10838cf6c252SPaul Traina case BPF_ALU|BPF_OR|BPF_X: 10848cf6c252SPaul Traina case BPF_ALU|BPF_LSH|BPF_X: 10858cf6c252SPaul Traina case BPF_ALU|BPF_RSH|BPF_X: 10868cf6c252SPaul Traina op = BPF_OP(s->code); 10878cf6c252SPaul Traina if (alter && vmap[val[X_ATOM]].is_const) { 10888cf6c252SPaul Traina if (vmap[val[A_ATOM]].is_const) { 10898cf6c252SPaul Traina fold_op(s, val[A_ATOM], val[X_ATOM]); 10908cf6c252SPaul Traina val[A_ATOM] = K(s->k); 10918cf6c252SPaul Traina } 10928cf6c252SPaul Traina else { 10938cf6c252SPaul Traina s->code = BPF_ALU|BPF_K|op; 10948cf6c252SPaul Traina s->k = vmap[val[X_ATOM]].const_val; 10958cf6c252SPaul Traina done = 0; 10968cf6c252SPaul Traina val[A_ATOM] = 10978cf6c252SPaul Traina F(s->code, val[A_ATOM], K(s->k)); 10988cf6c252SPaul Traina } 10998cf6c252SPaul Traina break; 11008cf6c252SPaul Traina } 11018cf6c252SPaul Traina /* 11028cf6c252SPaul Traina * Check if we're doing something to an accumulator 11038cf6c252SPaul Traina * that is 0, and simplify. This may not seem like 11048cf6c252SPaul Traina * much of a simplification but it could open up further 11058cf6c252SPaul Traina * optimizations. 1106feb4ecdbSBruce M Simpson * XXX We could also check for mul by 1, etc. 11078cf6c252SPaul Traina */ 11088cf6c252SPaul Traina if (alter && vmap[val[A_ATOM]].is_const 11098cf6c252SPaul Traina && vmap[val[A_ATOM]].const_val == 0) { 1110feb4ecdbSBruce M Simpson if (op == BPF_ADD || op == BPF_OR) { 11118cf6c252SPaul Traina s->code = BPF_MISC|BPF_TXA; 11128cf6c252SPaul Traina vstore(s, &val[A_ATOM], val[X_ATOM], alter); 11138cf6c252SPaul Traina break; 11148cf6c252SPaul Traina } 11158cf6c252SPaul Traina else if (op == BPF_MUL || op == BPF_DIV || 1116feb4ecdbSBruce M Simpson op == BPF_AND || op == BPF_LSH || op == BPF_RSH) { 11178cf6c252SPaul Traina s->code = BPF_LD|BPF_IMM; 11188cf6c252SPaul Traina s->k = 0; 11198cf6c252SPaul Traina vstore(s, &val[A_ATOM], K(s->k), alter); 11208cf6c252SPaul Traina break; 11218cf6c252SPaul Traina } 11228cf6c252SPaul Traina else if (op == BPF_NEG) { 11238cf6c252SPaul Traina s->code = NOP; 11248cf6c252SPaul Traina break; 11258cf6c252SPaul Traina } 11268cf6c252SPaul Traina } 11278cf6c252SPaul Traina val[A_ATOM] = F(s->code, val[A_ATOM], val[X_ATOM]); 11288cf6c252SPaul Traina break; 11298cf6c252SPaul Traina 11308cf6c252SPaul Traina case BPF_MISC|BPF_TXA: 11318cf6c252SPaul Traina vstore(s, &val[A_ATOM], val[X_ATOM], alter); 11328cf6c252SPaul Traina break; 11338cf6c252SPaul Traina 11348cf6c252SPaul Traina case BPF_LD|BPF_MEM: 11358cf6c252SPaul Traina v = val[s->k]; 11368cf6c252SPaul Traina if (alter && vmap[v].is_const) { 11378cf6c252SPaul Traina s->code = BPF_LD|BPF_IMM; 11388cf6c252SPaul Traina s->k = vmap[v].const_val; 11398cf6c252SPaul Traina done = 0; 11408cf6c252SPaul Traina } 11418cf6c252SPaul Traina vstore(s, &val[A_ATOM], v, alter); 11428cf6c252SPaul Traina break; 11438cf6c252SPaul Traina 11448cf6c252SPaul Traina case BPF_MISC|BPF_TAX: 11458cf6c252SPaul Traina vstore(s, &val[X_ATOM], val[A_ATOM], alter); 11468cf6c252SPaul Traina break; 11478cf6c252SPaul Traina 11488cf6c252SPaul Traina case BPF_LDX|BPF_MEM: 11498cf6c252SPaul Traina v = val[s->k]; 11508cf6c252SPaul Traina if (alter && vmap[v].is_const) { 11518cf6c252SPaul Traina s->code = BPF_LDX|BPF_IMM; 11528cf6c252SPaul Traina s->k = vmap[v].const_val; 11538cf6c252SPaul Traina done = 0; 11548cf6c252SPaul Traina } 11558cf6c252SPaul Traina vstore(s, &val[X_ATOM], v, alter); 11568cf6c252SPaul Traina break; 11578cf6c252SPaul Traina 11588cf6c252SPaul Traina case BPF_ST: 11598cf6c252SPaul Traina vstore(s, &val[s->k], val[A_ATOM], alter); 11608cf6c252SPaul Traina break; 11618cf6c252SPaul Traina 11628cf6c252SPaul Traina case BPF_STX: 11638cf6c252SPaul Traina vstore(s, &val[s->k], val[X_ATOM], alter); 11648cf6c252SPaul Traina break; 11658cf6c252SPaul Traina } 11668cf6c252SPaul Traina } 11678cf6c252SPaul Traina 11688cf6c252SPaul Traina static void 11698cf6c252SPaul Traina deadstmt(s, last) 11708cf6c252SPaul Traina register struct stmt *s; 11718cf6c252SPaul Traina register struct stmt *last[]; 11728cf6c252SPaul Traina { 11738cf6c252SPaul Traina register int atom; 11748cf6c252SPaul Traina 11758cf6c252SPaul Traina atom = atomuse(s); 11768cf6c252SPaul Traina if (atom >= 0) { 11778cf6c252SPaul Traina if (atom == AX_ATOM) { 11788cf6c252SPaul Traina last[X_ATOM] = 0; 11798cf6c252SPaul Traina last[A_ATOM] = 0; 11808cf6c252SPaul Traina } 11818cf6c252SPaul Traina else 11828cf6c252SPaul Traina last[atom] = 0; 11838cf6c252SPaul Traina } 11848cf6c252SPaul Traina atom = atomdef(s); 11858cf6c252SPaul Traina if (atom >= 0) { 11868cf6c252SPaul Traina if (last[atom]) { 11878cf6c252SPaul Traina done = 0; 11888cf6c252SPaul Traina last[atom]->code = NOP; 11898cf6c252SPaul Traina } 11908cf6c252SPaul Traina last[atom] = s; 11918cf6c252SPaul Traina } 11928cf6c252SPaul Traina } 11938cf6c252SPaul Traina 11948cf6c252SPaul Traina static void 11958cf6c252SPaul Traina opt_deadstores(b) 11968cf6c252SPaul Traina register struct block *b; 11978cf6c252SPaul Traina { 11988cf6c252SPaul Traina register struct slist *s; 11998cf6c252SPaul Traina register int atom; 12008cf6c252SPaul Traina struct stmt *last[N_ATOMS]; 12018cf6c252SPaul Traina 12028cf6c252SPaul Traina memset((char *)last, 0, sizeof last); 12038cf6c252SPaul Traina 12048cf6c252SPaul Traina for (s = b->stmts; s != 0; s = s->next) 12058cf6c252SPaul Traina deadstmt(&s->s, last); 12068cf6c252SPaul Traina deadstmt(&b->s, last); 12078cf6c252SPaul Traina 12088cf6c252SPaul Traina for (atom = 0; atom < N_ATOMS; ++atom) 12098cf6c252SPaul Traina if (last[atom] && !ATOMELEM(b->out_use, atom)) { 12108cf6c252SPaul Traina last[atom]->code = NOP; 12118cf6c252SPaul Traina done = 0; 12128cf6c252SPaul Traina } 12138cf6c252SPaul Traina } 12148cf6c252SPaul Traina 12158cf6c252SPaul Traina static void 12168cf6c252SPaul Traina opt_blk(b, do_stmts) 12178cf6c252SPaul Traina struct block *b; 12188cf6c252SPaul Traina int do_stmts; 12198cf6c252SPaul Traina { 12208cf6c252SPaul Traina struct slist *s; 12218cf6c252SPaul Traina struct edge *p; 12228cf6c252SPaul Traina int i; 122304fb2745SSam Leffler bpf_int32 aval, xval; 12248cf6c252SPaul Traina 12258751327cSBill Fenner #if 0 12268751327cSBill Fenner for (s = b->stmts; s && s->next; s = s->next) 12278751327cSBill Fenner if (BPF_CLASS(s->s.code) == BPF_JMP) { 12288751327cSBill Fenner do_stmts = 0; 12298751327cSBill Fenner break; 12308751327cSBill Fenner } 12318751327cSBill Fenner #endif 12328751327cSBill Fenner 12338cf6c252SPaul Traina /* 12348cf6c252SPaul Traina * Initialize the atom values. 12358cf6c252SPaul Traina */ 12368cf6c252SPaul Traina p = b->in_edges; 123704fb2745SSam Leffler if (p == 0) { 123804fb2745SSam Leffler /* 123904fb2745SSam Leffler * We have no predecessors, so everything is undefined 124004fb2745SSam Leffler * upon entry to this block. 124104fb2745SSam Leffler */ 12428cf6c252SPaul Traina memset((char *)b->val, 0, sizeof(b->val)); 124304fb2745SSam Leffler } else { 124404fb2745SSam Leffler /* 124504fb2745SSam Leffler * Inherit values from our predecessors. 124604fb2745SSam Leffler * 124704fb2745SSam Leffler * First, get the values from the predecessor along the 124804fb2745SSam Leffler * first edge leading to this node. 124904fb2745SSam Leffler */ 12508cf6c252SPaul Traina memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val)); 125104fb2745SSam Leffler /* 125204fb2745SSam Leffler * Now look at all the other nodes leading to this node. 125304fb2745SSam Leffler * If, for the predecessor along that edge, a register 125404fb2745SSam Leffler * has a different value from the one we have (i.e., 125504fb2745SSam Leffler * control paths are merging, and the merging paths 125604fb2745SSam Leffler * assign different values to that register), give the 125704fb2745SSam Leffler * register the undefined value of 0. 125804fb2745SSam Leffler */ 12598cf6c252SPaul Traina while ((p = p->next) != NULL) { 12608cf6c252SPaul Traina for (i = 0; i < N_ATOMS; ++i) 12618cf6c252SPaul Traina if (b->val[i] != p->pred->val[i]) 12628cf6c252SPaul Traina b->val[i] = 0; 12638cf6c252SPaul Traina } 12648cf6c252SPaul Traina } 12658cf6c252SPaul Traina aval = b->val[A_ATOM]; 126604fb2745SSam Leffler xval = b->val[X_ATOM]; 12678cf6c252SPaul Traina for (s = b->stmts; s; s = s->next) 12688cf6c252SPaul Traina opt_stmt(&s->s, b->val, do_stmts); 12698cf6c252SPaul Traina 12708cf6c252SPaul Traina /* 12718cf6c252SPaul Traina * This is a special case: if we don't use anything from this 127204fb2745SSam Leffler * block, and we load the accumulator or index register with a 127304fb2745SSam Leffler * value that is already there, or if this block is a return, 12748cf6c252SPaul Traina * eliminate all the statements. 127504fb2745SSam Leffler * 127604fb2745SSam Leffler * XXX - what if it does a store? 127704fb2745SSam Leffler * 127804fb2745SSam Leffler * XXX - why does it matter whether we use anything from this 127904fb2745SSam Leffler * block? If the accumulator or index register doesn't change 128004fb2745SSam Leffler * its value, isn't that OK even if we use that value? 128104fb2745SSam Leffler * 128204fb2745SSam Leffler * XXX - if we load the accumulator with a different value, 128304fb2745SSam Leffler * and the block ends with a conditional branch, we obviously 128404fb2745SSam Leffler * can't eliminate it, as the branch depends on that value. 128504fb2745SSam Leffler * For the index register, the conditional branch only depends 128604fb2745SSam Leffler * on the index register value if the test is against the index 128704fb2745SSam Leffler * register value rather than a constant; if nothing uses the 128804fb2745SSam Leffler * value we put into the index register, and we're not testing 128904fb2745SSam Leffler * against the index register's value, and there aren't any 129004fb2745SSam Leffler * other problems that would keep us from eliminating this 129104fb2745SSam Leffler * block, can we eliminate it? 12928cf6c252SPaul Traina */ 12938cf6c252SPaul Traina if (do_stmts && 129404fb2745SSam Leffler ((b->out_use == 0 && aval != 0 && b->val[A_ATOM] == aval && 129504fb2745SSam Leffler xval != 0 && b->val[X_ATOM] == xval) || 12968cf6c252SPaul Traina BPF_CLASS(b->s.code) == BPF_RET)) { 12978cf6c252SPaul Traina if (b->stmts != 0) { 12988cf6c252SPaul Traina b->stmts = 0; 12998cf6c252SPaul Traina done = 0; 13008cf6c252SPaul Traina } 13018cf6c252SPaul Traina } else { 13028cf6c252SPaul Traina opt_peep(b); 13038cf6c252SPaul Traina opt_deadstores(b); 13048cf6c252SPaul Traina } 13058cf6c252SPaul Traina /* 13068cf6c252SPaul Traina * Set up values for branch optimizer. 13078cf6c252SPaul Traina */ 13088cf6c252SPaul Traina if (BPF_SRC(b->s.code) == BPF_K) 13098cf6c252SPaul Traina b->oval = K(b->s.k); 13108cf6c252SPaul Traina else 13118cf6c252SPaul Traina b->oval = b->val[X_ATOM]; 13128cf6c252SPaul Traina b->et.code = b->s.code; 13138cf6c252SPaul Traina b->ef.code = -b->s.code; 13148cf6c252SPaul Traina } 13158cf6c252SPaul Traina 13168cf6c252SPaul Traina /* 13178cf6c252SPaul Traina * Return true if any register that is used on exit from 'succ', has 13188cf6c252SPaul Traina * an exit value that is different from the corresponding exit value 13198cf6c252SPaul Traina * from 'b'. 13208cf6c252SPaul Traina */ 13218cf6c252SPaul Traina static int 13228cf6c252SPaul Traina use_conflict(b, succ) 13238cf6c252SPaul Traina struct block *b, *succ; 13248cf6c252SPaul Traina { 13258cf6c252SPaul Traina int atom; 13268cf6c252SPaul Traina atomset use = succ->out_use; 13278cf6c252SPaul Traina 13288cf6c252SPaul Traina if (use == 0) 13298cf6c252SPaul Traina return 0; 13308cf6c252SPaul Traina 13318cf6c252SPaul Traina for (atom = 0; atom < N_ATOMS; ++atom) 13328cf6c252SPaul Traina if (ATOMELEM(use, atom)) 13338cf6c252SPaul Traina if (b->val[atom] != succ->val[atom]) 13348cf6c252SPaul Traina return 1; 13358cf6c252SPaul Traina return 0; 13368cf6c252SPaul Traina } 13378cf6c252SPaul Traina 13388cf6c252SPaul Traina static struct block * 13398cf6c252SPaul Traina fold_edge(child, ep) 13408cf6c252SPaul Traina struct block *child; 13418cf6c252SPaul Traina struct edge *ep; 13428cf6c252SPaul Traina { 13438cf6c252SPaul Traina int sense; 13448cf6c252SPaul Traina int aval0, aval1, oval0, oval1; 13458cf6c252SPaul Traina int code = ep->code; 13468cf6c252SPaul Traina 13478cf6c252SPaul Traina if (code < 0) { 13488cf6c252SPaul Traina code = -code; 13498cf6c252SPaul Traina sense = 0; 13508cf6c252SPaul Traina } else 13518cf6c252SPaul Traina sense = 1; 13528cf6c252SPaul Traina 13538cf6c252SPaul Traina if (child->s.code != code) 13548cf6c252SPaul Traina return 0; 13558cf6c252SPaul Traina 13568cf6c252SPaul Traina aval0 = child->val[A_ATOM]; 13578cf6c252SPaul Traina oval0 = child->oval; 13588cf6c252SPaul Traina aval1 = ep->pred->val[A_ATOM]; 13598cf6c252SPaul Traina oval1 = ep->pred->oval; 13608cf6c252SPaul Traina 13618cf6c252SPaul Traina if (aval0 != aval1) 13628cf6c252SPaul Traina return 0; 13638cf6c252SPaul Traina 13648cf6c252SPaul Traina if (oval0 == oval1) 13658cf6c252SPaul Traina /* 136604fb2745SSam Leffler * The operands of the branch instructions are 136704fb2745SSam Leffler * identical, so the result is true if a true 136804fb2745SSam Leffler * branch was taken to get here, otherwise false. 13698cf6c252SPaul Traina */ 13708cf6c252SPaul Traina return sense ? JT(child) : JF(child); 13718cf6c252SPaul Traina 13728cf6c252SPaul Traina if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K)) 13738cf6c252SPaul Traina /* 13748cf6c252SPaul Traina * At this point, we only know the comparison if we 13758cf6c252SPaul Traina * came down the true branch, and it was an equality 137604fb2745SSam Leffler * comparison with a constant. 137704fb2745SSam Leffler * 137804fb2745SSam Leffler * I.e., if we came down the true branch, and the branch 137904fb2745SSam Leffler * was an equality comparison with a constant, we know the 138004fb2745SSam Leffler * accumulator contains that constant. If we came down 138104fb2745SSam Leffler * the false branch, or the comparison wasn't with a 138204fb2745SSam Leffler * constant, we don't know what was in the accumulator. 138304fb2745SSam Leffler * 138404fb2745SSam Leffler * We rely on the fact that distinct constants have distinct 138504fb2745SSam Leffler * value numbers. 13868cf6c252SPaul Traina */ 13878cf6c252SPaul Traina return JF(child); 13888cf6c252SPaul Traina 13898cf6c252SPaul Traina return 0; 13908cf6c252SPaul Traina } 13918cf6c252SPaul Traina 13928cf6c252SPaul Traina static void 13938cf6c252SPaul Traina opt_j(ep) 13948cf6c252SPaul Traina struct edge *ep; 13958cf6c252SPaul Traina { 13968cf6c252SPaul Traina register int i, k; 13978cf6c252SPaul Traina register struct block *target; 13988cf6c252SPaul Traina 13998cf6c252SPaul Traina if (JT(ep->succ) == 0) 14008cf6c252SPaul Traina return; 14018cf6c252SPaul Traina 14028cf6c252SPaul Traina if (JT(ep->succ) == JF(ep->succ)) { 14038cf6c252SPaul Traina /* 14048cf6c252SPaul Traina * Common branch targets can be eliminated, provided 14058cf6c252SPaul Traina * there is no data dependency. 14068cf6c252SPaul Traina */ 14078cf6c252SPaul Traina if (!use_conflict(ep->pred, ep->succ->et.succ)) { 14088cf6c252SPaul Traina done = 0; 14098cf6c252SPaul Traina ep->succ = JT(ep->succ); 14108cf6c252SPaul Traina } 14118cf6c252SPaul Traina } 14128cf6c252SPaul Traina /* 14138cf6c252SPaul Traina * For each edge dominator that matches the successor of this 14148cf6c252SPaul Traina * edge, promote the edge successor to the its grandchild. 14158cf6c252SPaul Traina * 14168cf6c252SPaul Traina * XXX We violate the set abstraction here in favor a reasonably 14178cf6c252SPaul Traina * efficient loop. 14188cf6c252SPaul Traina */ 14198cf6c252SPaul Traina top: 14208cf6c252SPaul Traina for (i = 0; i < edgewords; ++i) { 14218cf6c252SPaul Traina register bpf_u_int32 x = ep->edom[i]; 14228cf6c252SPaul Traina 14238cf6c252SPaul Traina while (x != 0) { 14248cf6c252SPaul Traina k = ffs(x) - 1; 14258cf6c252SPaul Traina x &=~ (1 << k); 14268cf6c252SPaul Traina k += i * BITS_PER_WORD; 14278cf6c252SPaul Traina 14288cf6c252SPaul Traina target = fold_edge(ep->succ, edges[k]); 14298cf6c252SPaul Traina /* 14308cf6c252SPaul Traina * Check that there is no data dependency between 14318cf6c252SPaul Traina * nodes that will be violated if we move the edge. 14328cf6c252SPaul Traina */ 14338cf6c252SPaul Traina if (target != 0 && !use_conflict(ep->pred, target)) { 14348cf6c252SPaul Traina done = 0; 14358cf6c252SPaul Traina ep->succ = target; 14368cf6c252SPaul Traina if (JT(target) != 0) 14378cf6c252SPaul Traina /* 14388cf6c252SPaul Traina * Start over unless we hit a leaf. 14398cf6c252SPaul Traina */ 14408cf6c252SPaul Traina goto top; 14418cf6c252SPaul Traina return; 14428cf6c252SPaul Traina } 14438cf6c252SPaul Traina } 14448cf6c252SPaul Traina } 14458cf6c252SPaul Traina } 14468cf6c252SPaul Traina 14478cf6c252SPaul Traina 14488cf6c252SPaul Traina static void 14498cf6c252SPaul Traina or_pullup(b) 14508cf6c252SPaul Traina struct block *b; 14518cf6c252SPaul Traina { 14528cf6c252SPaul Traina int val, at_top; 14538cf6c252SPaul Traina struct block *pull; 14548cf6c252SPaul Traina struct block **diffp, **samep; 14558cf6c252SPaul Traina struct edge *ep; 14568cf6c252SPaul Traina 14578cf6c252SPaul Traina ep = b->in_edges; 14588cf6c252SPaul Traina if (ep == 0) 14598cf6c252SPaul Traina return; 14608cf6c252SPaul Traina 14618cf6c252SPaul Traina /* 14628cf6c252SPaul Traina * Make sure each predecessor loads the same value. 14638cf6c252SPaul Traina * XXX why? 14648cf6c252SPaul Traina */ 14658cf6c252SPaul Traina val = ep->pred->val[A_ATOM]; 14668cf6c252SPaul Traina for (ep = ep->next; ep != 0; ep = ep->next) 14678cf6c252SPaul Traina if (val != ep->pred->val[A_ATOM]) 14688cf6c252SPaul Traina return; 14698cf6c252SPaul Traina 14708cf6c252SPaul Traina if (JT(b->in_edges->pred) == b) 14718cf6c252SPaul Traina diffp = &JT(b->in_edges->pred); 14728cf6c252SPaul Traina else 14738cf6c252SPaul Traina diffp = &JF(b->in_edges->pred); 14748cf6c252SPaul Traina 14758cf6c252SPaul Traina at_top = 1; 14768cf6c252SPaul Traina while (1) { 14778cf6c252SPaul Traina if (*diffp == 0) 14788cf6c252SPaul Traina return; 14798cf6c252SPaul Traina 14808cf6c252SPaul Traina if (JT(*diffp) != JT(b)) 14818cf6c252SPaul Traina return; 14828cf6c252SPaul Traina 14838cf6c252SPaul Traina if (!SET_MEMBER((*diffp)->dom, b->id)) 14848cf6c252SPaul Traina return; 14858cf6c252SPaul Traina 14868cf6c252SPaul Traina if ((*diffp)->val[A_ATOM] != val) 14878cf6c252SPaul Traina break; 14888cf6c252SPaul Traina 14898cf6c252SPaul Traina diffp = &JF(*diffp); 14908cf6c252SPaul Traina at_top = 0; 14918cf6c252SPaul Traina } 14928cf6c252SPaul Traina samep = &JF(*diffp); 14938cf6c252SPaul Traina while (1) { 14948cf6c252SPaul Traina if (*samep == 0) 14958cf6c252SPaul Traina return; 14968cf6c252SPaul Traina 14978cf6c252SPaul Traina if (JT(*samep) != JT(b)) 14988cf6c252SPaul Traina return; 14998cf6c252SPaul Traina 15008cf6c252SPaul Traina if (!SET_MEMBER((*samep)->dom, b->id)) 15018cf6c252SPaul Traina return; 15028cf6c252SPaul Traina 15038cf6c252SPaul Traina if ((*samep)->val[A_ATOM] == val) 15048cf6c252SPaul Traina break; 15058cf6c252SPaul Traina 15068cf6c252SPaul Traina /* XXX Need to check that there are no data dependencies 15078cf6c252SPaul Traina between dp0 and dp1. Currently, the code generator 15088cf6c252SPaul Traina will not produce such dependencies. */ 15098cf6c252SPaul Traina samep = &JF(*samep); 15108cf6c252SPaul Traina } 15118cf6c252SPaul Traina #ifdef notdef 15128cf6c252SPaul Traina /* XXX This doesn't cover everything. */ 15138cf6c252SPaul Traina for (i = 0; i < N_ATOMS; ++i) 15148cf6c252SPaul Traina if ((*samep)->val[i] != pred->val[i]) 15158cf6c252SPaul Traina return; 15168cf6c252SPaul Traina #endif 15178cf6c252SPaul Traina /* Pull up the node. */ 15188cf6c252SPaul Traina pull = *samep; 15198cf6c252SPaul Traina *samep = JF(pull); 15208cf6c252SPaul Traina JF(pull) = *diffp; 15218cf6c252SPaul Traina 15228cf6c252SPaul Traina /* 15238cf6c252SPaul Traina * At the top of the chain, each predecessor needs to point at the 15248cf6c252SPaul Traina * pulled up node. Inside the chain, there is only one predecessor 15258cf6c252SPaul Traina * to worry about. 15268cf6c252SPaul Traina */ 15278cf6c252SPaul Traina if (at_top) { 15288cf6c252SPaul Traina for (ep = b->in_edges; ep != 0; ep = ep->next) { 15298cf6c252SPaul Traina if (JT(ep->pred) == b) 15308cf6c252SPaul Traina JT(ep->pred) = pull; 15318cf6c252SPaul Traina else 15328cf6c252SPaul Traina JF(ep->pred) = pull; 15338cf6c252SPaul Traina } 15348cf6c252SPaul Traina } 15358cf6c252SPaul Traina else 15368cf6c252SPaul Traina *diffp = pull; 15378cf6c252SPaul Traina 15388cf6c252SPaul Traina done = 0; 15398cf6c252SPaul Traina } 15408cf6c252SPaul Traina 15418cf6c252SPaul Traina static void 15428cf6c252SPaul Traina and_pullup(b) 15438cf6c252SPaul Traina struct block *b; 15448cf6c252SPaul Traina { 15458cf6c252SPaul Traina int val, at_top; 15468cf6c252SPaul Traina struct block *pull; 15478cf6c252SPaul Traina struct block **diffp, **samep; 15488cf6c252SPaul Traina struct edge *ep; 15498cf6c252SPaul Traina 15508cf6c252SPaul Traina ep = b->in_edges; 15518cf6c252SPaul Traina if (ep == 0) 15528cf6c252SPaul Traina return; 15538cf6c252SPaul Traina 15548cf6c252SPaul Traina /* 15558cf6c252SPaul Traina * Make sure each predecessor loads the same value. 15568cf6c252SPaul Traina */ 15578cf6c252SPaul Traina val = ep->pred->val[A_ATOM]; 15588cf6c252SPaul Traina for (ep = ep->next; ep != 0; ep = ep->next) 15598cf6c252SPaul Traina if (val != ep->pred->val[A_ATOM]) 15608cf6c252SPaul Traina return; 15618cf6c252SPaul Traina 15628cf6c252SPaul Traina if (JT(b->in_edges->pred) == b) 15638cf6c252SPaul Traina diffp = &JT(b->in_edges->pred); 15648cf6c252SPaul Traina else 15658cf6c252SPaul Traina diffp = &JF(b->in_edges->pred); 15668cf6c252SPaul Traina 15678cf6c252SPaul Traina at_top = 1; 15688cf6c252SPaul Traina while (1) { 15698cf6c252SPaul Traina if (*diffp == 0) 15708cf6c252SPaul Traina return; 15718cf6c252SPaul Traina 15728cf6c252SPaul Traina if (JF(*diffp) != JF(b)) 15738cf6c252SPaul Traina return; 15748cf6c252SPaul Traina 15758cf6c252SPaul Traina if (!SET_MEMBER((*diffp)->dom, b->id)) 15768cf6c252SPaul Traina return; 15778cf6c252SPaul Traina 15788cf6c252SPaul Traina if ((*diffp)->val[A_ATOM] != val) 15798cf6c252SPaul Traina break; 15808cf6c252SPaul Traina 15818cf6c252SPaul Traina diffp = &JT(*diffp); 15828cf6c252SPaul Traina at_top = 0; 15838cf6c252SPaul Traina } 15848cf6c252SPaul Traina samep = &JT(*diffp); 15858cf6c252SPaul Traina while (1) { 15868cf6c252SPaul Traina if (*samep == 0) 15878cf6c252SPaul Traina return; 15888cf6c252SPaul Traina 15898cf6c252SPaul Traina if (JF(*samep) != JF(b)) 15908cf6c252SPaul Traina return; 15918cf6c252SPaul Traina 15928cf6c252SPaul Traina if (!SET_MEMBER((*samep)->dom, b->id)) 15938cf6c252SPaul Traina return; 15948cf6c252SPaul Traina 15958cf6c252SPaul Traina if ((*samep)->val[A_ATOM] == val) 15968cf6c252SPaul Traina break; 15978cf6c252SPaul Traina 15988cf6c252SPaul Traina /* XXX Need to check that there are no data dependencies 15998cf6c252SPaul Traina between diffp and samep. Currently, the code generator 16008cf6c252SPaul Traina will not produce such dependencies. */ 16018cf6c252SPaul Traina samep = &JT(*samep); 16028cf6c252SPaul Traina } 16038cf6c252SPaul Traina #ifdef notdef 16048cf6c252SPaul Traina /* XXX This doesn't cover everything. */ 16058cf6c252SPaul Traina for (i = 0; i < N_ATOMS; ++i) 16068cf6c252SPaul Traina if ((*samep)->val[i] != pred->val[i]) 16078cf6c252SPaul Traina return; 16088cf6c252SPaul Traina #endif 16098cf6c252SPaul Traina /* Pull up the node. */ 16108cf6c252SPaul Traina pull = *samep; 16118cf6c252SPaul Traina *samep = JT(pull); 16128cf6c252SPaul Traina JT(pull) = *diffp; 16138cf6c252SPaul Traina 16148cf6c252SPaul Traina /* 16158cf6c252SPaul Traina * At the top of the chain, each predecessor needs to point at the 16168cf6c252SPaul Traina * pulled up node. Inside the chain, there is only one predecessor 16178cf6c252SPaul Traina * to worry about. 16188cf6c252SPaul Traina */ 16198cf6c252SPaul Traina if (at_top) { 16208cf6c252SPaul Traina for (ep = b->in_edges; ep != 0; ep = ep->next) { 16218cf6c252SPaul Traina if (JT(ep->pred) == b) 16228cf6c252SPaul Traina JT(ep->pred) = pull; 16238cf6c252SPaul Traina else 16248cf6c252SPaul Traina JF(ep->pred) = pull; 16258cf6c252SPaul Traina } 16268cf6c252SPaul Traina } 16278cf6c252SPaul Traina else 16288cf6c252SPaul Traina *diffp = pull; 16298cf6c252SPaul Traina 16308cf6c252SPaul Traina done = 0; 16318cf6c252SPaul Traina } 16328cf6c252SPaul Traina 16338cf6c252SPaul Traina static void 16348cf6c252SPaul Traina opt_blks(root, do_stmts) 16358cf6c252SPaul Traina struct block *root; 16368cf6c252SPaul Traina int do_stmts; 16378cf6c252SPaul Traina { 16388cf6c252SPaul Traina int i, maxlevel; 16398cf6c252SPaul Traina struct block *p; 16408cf6c252SPaul Traina 16418cf6c252SPaul Traina init_val(); 16428cf6c252SPaul Traina maxlevel = root->level; 1643dc2c7305SBill Fenner 1644dc2c7305SBill Fenner find_inedges(root); 16458cf6c252SPaul Traina for (i = maxlevel; i >= 0; --i) 16468cf6c252SPaul Traina for (p = levels[i]; p; p = p->link) 16478cf6c252SPaul Traina opt_blk(p, do_stmts); 16488cf6c252SPaul Traina 16498cf6c252SPaul Traina if (do_stmts) 16508cf6c252SPaul Traina /* 16518cf6c252SPaul Traina * No point trying to move branches; it can't possibly 16528cf6c252SPaul Traina * make a difference at this point. 16538cf6c252SPaul Traina */ 16548cf6c252SPaul Traina return; 16558cf6c252SPaul Traina 16568cf6c252SPaul Traina for (i = 1; i <= maxlevel; ++i) { 16578cf6c252SPaul Traina for (p = levels[i]; p; p = p->link) { 16588cf6c252SPaul Traina opt_j(&p->et); 16598cf6c252SPaul Traina opt_j(&p->ef); 16608cf6c252SPaul Traina } 16618cf6c252SPaul Traina } 1662dc2c7305SBill Fenner 1663dc2c7305SBill Fenner find_inedges(root); 16648cf6c252SPaul Traina for (i = 1; i <= maxlevel; ++i) { 16658cf6c252SPaul Traina for (p = levels[i]; p; p = p->link) { 16668cf6c252SPaul Traina or_pullup(p); 16678cf6c252SPaul Traina and_pullup(p); 16688cf6c252SPaul Traina } 16698cf6c252SPaul Traina } 16708cf6c252SPaul Traina } 16718cf6c252SPaul Traina 16728cf6c252SPaul Traina static inline void 16738cf6c252SPaul Traina link_inedge(parent, child) 16748cf6c252SPaul Traina struct edge *parent; 16758cf6c252SPaul Traina struct block *child; 16768cf6c252SPaul Traina { 16778cf6c252SPaul Traina parent->next = child->in_edges; 16788cf6c252SPaul Traina child->in_edges = parent; 16798cf6c252SPaul Traina } 16808cf6c252SPaul Traina 16818cf6c252SPaul Traina static void 16828cf6c252SPaul Traina find_inedges(root) 16838cf6c252SPaul Traina struct block *root; 16848cf6c252SPaul Traina { 16858cf6c252SPaul Traina int i; 16868cf6c252SPaul Traina struct block *b; 16878cf6c252SPaul Traina 16888cf6c252SPaul Traina for (i = 0; i < n_blocks; ++i) 16898cf6c252SPaul Traina blocks[i]->in_edges = 0; 16908cf6c252SPaul Traina 16918cf6c252SPaul Traina /* 16928cf6c252SPaul Traina * Traverse the graph, adding each edge to the predecessor 16938cf6c252SPaul Traina * list of its successors. Skip the leaves (i.e. level 0). 16948cf6c252SPaul Traina */ 16958cf6c252SPaul Traina for (i = root->level; i > 0; --i) { 16968cf6c252SPaul Traina for (b = levels[i]; b != 0; b = b->link) { 16978cf6c252SPaul Traina link_inedge(&b->et, JT(b)); 16988cf6c252SPaul Traina link_inedge(&b->ef, JF(b)); 16998cf6c252SPaul Traina } 17008cf6c252SPaul Traina } 17018cf6c252SPaul Traina } 17028cf6c252SPaul Traina 17038cf6c252SPaul Traina static void 17048cf6c252SPaul Traina opt_root(b) 17058cf6c252SPaul Traina struct block **b; 17068cf6c252SPaul Traina { 17078cf6c252SPaul Traina struct slist *tmp, *s; 17088cf6c252SPaul Traina 17098cf6c252SPaul Traina s = (*b)->stmts; 17108cf6c252SPaul Traina (*b)->stmts = 0; 17118cf6c252SPaul Traina while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b)) 17128cf6c252SPaul Traina *b = JT(*b); 17138cf6c252SPaul Traina 17148cf6c252SPaul Traina tmp = (*b)->stmts; 17158cf6c252SPaul Traina if (tmp != 0) 17168cf6c252SPaul Traina sappend(s, tmp); 17178cf6c252SPaul Traina (*b)->stmts = s; 17188cf6c252SPaul Traina 17198cf6c252SPaul Traina /* 17208cf6c252SPaul Traina * If the root node is a return, then there is no 17218cf6c252SPaul Traina * point executing any statements (since the bpf machine 17228cf6c252SPaul Traina * has no side effects). 17238cf6c252SPaul Traina */ 17248cf6c252SPaul Traina if (BPF_CLASS((*b)->s.code) == BPF_RET) 17258cf6c252SPaul Traina (*b)->stmts = 0; 17268cf6c252SPaul Traina } 17278cf6c252SPaul Traina 17288cf6c252SPaul Traina static void 17298cf6c252SPaul Traina opt_loop(root, do_stmts) 17308cf6c252SPaul Traina struct block *root; 17318cf6c252SPaul Traina int do_stmts; 17328cf6c252SPaul Traina { 17338cf6c252SPaul Traina 17348cf6c252SPaul Traina #ifdef BDEBUG 17350a94d38fSBill Fenner if (dflag > 1) { 17360a94d38fSBill Fenner printf("opt_loop(root, %d) begin\n", do_stmts); 17378cf6c252SPaul Traina opt_dump(root); 17380a94d38fSBill Fenner } 17398cf6c252SPaul Traina #endif 17408cf6c252SPaul Traina do { 17418cf6c252SPaul Traina done = 1; 17428cf6c252SPaul Traina find_levels(root); 17438cf6c252SPaul Traina find_dom(root); 17448cf6c252SPaul Traina find_closure(root); 17458cf6c252SPaul Traina find_ud(root); 17468cf6c252SPaul Traina find_edom(root); 17478cf6c252SPaul Traina opt_blks(root, do_stmts); 17488cf6c252SPaul Traina #ifdef BDEBUG 17490a94d38fSBill Fenner if (dflag > 1) { 17500a94d38fSBill Fenner printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, done); 17518cf6c252SPaul Traina opt_dump(root); 17520a94d38fSBill Fenner } 17538cf6c252SPaul Traina #endif 17548cf6c252SPaul Traina } while (!done); 17558cf6c252SPaul Traina } 17568cf6c252SPaul Traina 17578cf6c252SPaul Traina /* 17588cf6c252SPaul Traina * Optimize the filter code in its dag representation. 17598cf6c252SPaul Traina */ 17608cf6c252SPaul Traina void 17618cf6c252SPaul Traina bpf_optimize(rootp) 17628cf6c252SPaul Traina struct block **rootp; 17638cf6c252SPaul Traina { 17648cf6c252SPaul Traina struct block *root; 17658cf6c252SPaul Traina 17668cf6c252SPaul Traina root = *rootp; 17678cf6c252SPaul Traina 17688cf6c252SPaul Traina opt_init(root); 17698cf6c252SPaul Traina opt_loop(root, 0); 17708cf6c252SPaul Traina opt_loop(root, 1); 17718cf6c252SPaul Traina intern_blocks(root); 17720a94d38fSBill Fenner #ifdef BDEBUG 17730a94d38fSBill Fenner if (dflag > 1) { 17740a94d38fSBill Fenner printf("after intern_blocks()\n"); 17750a94d38fSBill Fenner opt_dump(root); 17760a94d38fSBill Fenner } 17770a94d38fSBill Fenner #endif 17788cf6c252SPaul Traina opt_root(rootp); 17790a94d38fSBill Fenner #ifdef BDEBUG 17800a94d38fSBill Fenner if (dflag > 1) { 17810a94d38fSBill Fenner printf("after opt_root()\n"); 17820a94d38fSBill Fenner opt_dump(root); 17830a94d38fSBill Fenner } 17840a94d38fSBill Fenner #endif 17858cf6c252SPaul Traina opt_cleanup(); 17868cf6c252SPaul Traina } 17878cf6c252SPaul Traina 17888cf6c252SPaul Traina static void 17898cf6c252SPaul Traina make_marks(p) 17908cf6c252SPaul Traina struct block *p; 17918cf6c252SPaul Traina { 17928cf6c252SPaul Traina if (!isMarked(p)) { 17938cf6c252SPaul Traina Mark(p); 17948cf6c252SPaul Traina if (BPF_CLASS(p->s.code) != BPF_RET) { 17958cf6c252SPaul Traina make_marks(JT(p)); 17968cf6c252SPaul Traina make_marks(JF(p)); 17978cf6c252SPaul Traina } 17988cf6c252SPaul Traina } 17998cf6c252SPaul Traina } 18008cf6c252SPaul Traina 18018cf6c252SPaul Traina /* 18028cf6c252SPaul Traina * Mark code array such that isMarked(i) is true 18038cf6c252SPaul Traina * only for nodes that are alive. 18048cf6c252SPaul Traina */ 18058cf6c252SPaul Traina static void 18068cf6c252SPaul Traina mark_code(p) 18078cf6c252SPaul Traina struct block *p; 18088cf6c252SPaul Traina { 18098cf6c252SPaul Traina cur_mark += 1; 18108cf6c252SPaul Traina make_marks(p); 18118cf6c252SPaul Traina } 18128cf6c252SPaul Traina 18138cf6c252SPaul Traina /* 18148cf6c252SPaul Traina * True iff the two stmt lists load the same value from the packet into 18158cf6c252SPaul Traina * the accumulator. 18168cf6c252SPaul Traina */ 18178cf6c252SPaul Traina static int 18188cf6c252SPaul Traina eq_slist(x, y) 18198cf6c252SPaul Traina struct slist *x, *y; 18208cf6c252SPaul Traina { 18218cf6c252SPaul Traina while (1) { 18228cf6c252SPaul Traina while (x && x->s.code == NOP) 18238cf6c252SPaul Traina x = x->next; 18248cf6c252SPaul Traina while (y && y->s.code == NOP) 18258cf6c252SPaul Traina y = y->next; 18268cf6c252SPaul Traina if (x == 0) 18278cf6c252SPaul Traina return y == 0; 18288cf6c252SPaul Traina if (y == 0) 18298cf6c252SPaul Traina return x == 0; 18308cf6c252SPaul Traina if (x->s.code != y->s.code || x->s.k != y->s.k) 18318cf6c252SPaul Traina return 0; 18328cf6c252SPaul Traina x = x->next; 18338cf6c252SPaul Traina y = y->next; 18348cf6c252SPaul Traina } 18358cf6c252SPaul Traina } 18368cf6c252SPaul Traina 18378cf6c252SPaul Traina static inline int 18388cf6c252SPaul Traina eq_blk(b0, b1) 18398cf6c252SPaul Traina struct block *b0, *b1; 18408cf6c252SPaul Traina { 18418cf6c252SPaul Traina if (b0->s.code == b1->s.code && 18428cf6c252SPaul Traina b0->s.k == b1->s.k && 18438cf6c252SPaul Traina b0->et.succ == b1->et.succ && 18448cf6c252SPaul Traina b0->ef.succ == b1->ef.succ) 18458cf6c252SPaul Traina return eq_slist(b0->stmts, b1->stmts); 18468cf6c252SPaul Traina return 0; 18478cf6c252SPaul Traina } 18488cf6c252SPaul Traina 18498cf6c252SPaul Traina static void 18508cf6c252SPaul Traina intern_blocks(root) 18518cf6c252SPaul Traina struct block *root; 18528cf6c252SPaul Traina { 18538cf6c252SPaul Traina struct block *p; 18548cf6c252SPaul Traina int i, j; 1855ef96d74fSMax Laier int done1; /* don't shadow global */ 18568cf6c252SPaul Traina top: 1857ef96d74fSMax Laier done1 = 1; 18588cf6c252SPaul Traina for (i = 0; i < n_blocks; ++i) 18598cf6c252SPaul Traina blocks[i]->link = 0; 18608cf6c252SPaul Traina 18618cf6c252SPaul Traina mark_code(root); 18628cf6c252SPaul Traina 18638cf6c252SPaul Traina for (i = n_blocks - 1; --i >= 0; ) { 18648cf6c252SPaul Traina if (!isMarked(blocks[i])) 18658cf6c252SPaul Traina continue; 18668cf6c252SPaul Traina for (j = i + 1; j < n_blocks; ++j) { 18678cf6c252SPaul Traina if (!isMarked(blocks[j])) 18688cf6c252SPaul Traina continue; 18698cf6c252SPaul Traina if (eq_blk(blocks[i], blocks[j])) { 18708cf6c252SPaul Traina blocks[i]->link = blocks[j]->link ? 18718cf6c252SPaul Traina blocks[j]->link : blocks[j]; 18728cf6c252SPaul Traina break; 18738cf6c252SPaul Traina } 18748cf6c252SPaul Traina } 18758cf6c252SPaul Traina } 18768cf6c252SPaul Traina for (i = 0; i < n_blocks; ++i) { 18778cf6c252SPaul Traina p = blocks[i]; 18788cf6c252SPaul Traina if (JT(p) == 0) 18798cf6c252SPaul Traina continue; 18808cf6c252SPaul Traina if (JT(p)->link) { 1881ef96d74fSMax Laier done1 = 0; 18828cf6c252SPaul Traina JT(p) = JT(p)->link; 18838cf6c252SPaul Traina } 18848cf6c252SPaul Traina if (JF(p)->link) { 1885ef96d74fSMax Laier done1 = 0; 18868cf6c252SPaul Traina JF(p) = JF(p)->link; 18878cf6c252SPaul Traina } 18888cf6c252SPaul Traina } 1889ef96d74fSMax Laier if (!done1) 18908cf6c252SPaul Traina goto top; 18918cf6c252SPaul Traina } 18928cf6c252SPaul Traina 18938cf6c252SPaul Traina static void 18948cf6c252SPaul Traina opt_cleanup() 18958cf6c252SPaul Traina { 18968cf6c252SPaul Traina free((void *)vnode_base); 18978cf6c252SPaul Traina free((void *)vmap); 18988cf6c252SPaul Traina free((void *)edges); 18998cf6c252SPaul Traina free((void *)space); 19008cf6c252SPaul Traina free((void *)levels); 19018cf6c252SPaul Traina free((void *)blocks); 19028cf6c252SPaul Traina } 19038cf6c252SPaul Traina 19048cf6c252SPaul Traina /* 19058cf6c252SPaul Traina * Return the number of stmts in 's'. 19068cf6c252SPaul Traina */ 19078cf6c252SPaul Traina static int 19088cf6c252SPaul Traina slength(s) 19098cf6c252SPaul Traina struct slist *s; 19108cf6c252SPaul Traina { 19118cf6c252SPaul Traina int n = 0; 19128cf6c252SPaul Traina 19138cf6c252SPaul Traina for (; s; s = s->next) 19148cf6c252SPaul Traina if (s->s.code != NOP) 19158cf6c252SPaul Traina ++n; 19168cf6c252SPaul Traina return n; 19178cf6c252SPaul Traina } 19188cf6c252SPaul Traina 19198cf6c252SPaul Traina /* 19208cf6c252SPaul Traina * Return the number of nodes reachable by 'p'. 19218cf6c252SPaul Traina * All nodes should be initially unmarked. 19228cf6c252SPaul Traina */ 19238cf6c252SPaul Traina static int 19248cf6c252SPaul Traina count_blocks(p) 19258cf6c252SPaul Traina struct block *p; 19268cf6c252SPaul Traina { 19278cf6c252SPaul Traina if (p == 0 || isMarked(p)) 19288cf6c252SPaul Traina return 0; 19298cf6c252SPaul Traina Mark(p); 19308cf6c252SPaul Traina return count_blocks(JT(p)) + count_blocks(JF(p)) + 1; 19318cf6c252SPaul Traina } 19328cf6c252SPaul Traina 19338cf6c252SPaul Traina /* 19348cf6c252SPaul Traina * Do a depth first search on the flow graph, numbering the 19358cf6c252SPaul Traina * the basic blocks, and entering them into the 'blocks' array.` 19368cf6c252SPaul Traina */ 19378cf6c252SPaul Traina static void 19388cf6c252SPaul Traina number_blks_r(p) 19398cf6c252SPaul Traina struct block *p; 19408cf6c252SPaul Traina { 19418cf6c252SPaul Traina int n; 19428cf6c252SPaul Traina 19438cf6c252SPaul Traina if (p == 0 || isMarked(p)) 19448cf6c252SPaul Traina return; 19458cf6c252SPaul Traina 19468cf6c252SPaul Traina Mark(p); 19478cf6c252SPaul Traina n = n_blocks++; 19488cf6c252SPaul Traina p->id = n; 19498cf6c252SPaul Traina blocks[n] = p; 19508cf6c252SPaul Traina 19518cf6c252SPaul Traina number_blks_r(JT(p)); 19528cf6c252SPaul Traina number_blks_r(JF(p)); 19538cf6c252SPaul Traina } 19548cf6c252SPaul Traina 19558cf6c252SPaul Traina /* 19568cf6c252SPaul Traina * Return the number of stmts in the flowgraph reachable by 'p'. 19578cf6c252SPaul Traina * The nodes should be unmarked before calling. 1958dc2c7305SBill Fenner * 1959dc2c7305SBill Fenner * Note that "stmts" means "instructions", and that this includes 1960dc2c7305SBill Fenner * 1961dc2c7305SBill Fenner * side-effect statements in 'p' (slength(p->stmts)); 1962dc2c7305SBill Fenner * 1963dc2c7305SBill Fenner * statements in the true branch from 'p' (count_stmts(JT(p))); 1964dc2c7305SBill Fenner * 1965dc2c7305SBill Fenner * statements in the false branch from 'p' (count_stmts(JF(p))); 1966dc2c7305SBill Fenner * 1967dc2c7305SBill Fenner * the conditional jump itself (1); 1968dc2c7305SBill Fenner * 1969dc2c7305SBill Fenner * an extra long jump if the true branch requires it (p->longjt); 1970dc2c7305SBill Fenner * 1971dc2c7305SBill Fenner * an extra long jump if the false branch requires it (p->longjf). 19728cf6c252SPaul Traina */ 19738cf6c252SPaul Traina static int 19748cf6c252SPaul Traina count_stmts(p) 19758cf6c252SPaul Traina struct block *p; 19768cf6c252SPaul Traina { 19778cf6c252SPaul Traina int n; 19788cf6c252SPaul Traina 19798cf6c252SPaul Traina if (p == 0 || isMarked(p)) 19808cf6c252SPaul Traina return 0; 19818cf6c252SPaul Traina Mark(p); 19828cf6c252SPaul Traina n = count_stmts(JT(p)) + count_stmts(JF(p)); 1983dc2c7305SBill Fenner return slength(p->stmts) + n + 1 + p->longjt + p->longjf; 19848cf6c252SPaul Traina } 19858cf6c252SPaul Traina 19868cf6c252SPaul Traina /* 19878cf6c252SPaul Traina * Allocate memory. All allocation is done before optimization 19888cf6c252SPaul Traina * is begun. A linear bound on the size of all data structures is computed 19898cf6c252SPaul Traina * from the total number of blocks and/or statements. 19908cf6c252SPaul Traina */ 19918cf6c252SPaul Traina static void 19928cf6c252SPaul Traina opt_init(root) 19938cf6c252SPaul Traina struct block *root; 19948cf6c252SPaul Traina { 19958cf6c252SPaul Traina bpf_u_int32 *p; 19968cf6c252SPaul Traina int i, n, max_stmts; 19978cf6c252SPaul Traina 19988cf6c252SPaul Traina /* 19998cf6c252SPaul Traina * First, count the blocks, so we can malloc an array to map 20008cf6c252SPaul Traina * block number to block. Then, put the blocks into the array. 20018cf6c252SPaul Traina */ 20028cf6c252SPaul Traina unMarkAll(); 20038cf6c252SPaul Traina n = count_blocks(root); 2004ef96d74fSMax Laier blocks = (struct block **)calloc(n, sizeof(*blocks)); 2005feb4ecdbSBruce M Simpson if (blocks == NULL) 2006feb4ecdbSBruce M Simpson bpf_error("malloc"); 20078cf6c252SPaul Traina unMarkAll(); 20088cf6c252SPaul Traina n_blocks = 0; 20098cf6c252SPaul Traina number_blks_r(root); 20108cf6c252SPaul Traina 20118cf6c252SPaul Traina n_edges = 2 * n_blocks; 2012ef96d74fSMax Laier edges = (struct edge **)calloc(n_edges, sizeof(*edges)); 2013feb4ecdbSBruce M Simpson if (edges == NULL) 2014feb4ecdbSBruce M Simpson bpf_error("malloc"); 20158cf6c252SPaul Traina 20168cf6c252SPaul Traina /* 20178cf6c252SPaul Traina * The number of levels is bounded by the number of nodes. 20188cf6c252SPaul Traina */ 2019ef96d74fSMax Laier levels = (struct block **)calloc(n_blocks, sizeof(*levels)); 2020feb4ecdbSBruce M Simpson if (levels == NULL) 2021feb4ecdbSBruce M Simpson bpf_error("malloc"); 20228cf6c252SPaul Traina 20238cf6c252SPaul Traina edgewords = n_edges / (8 * sizeof(bpf_u_int32)) + 1; 20248cf6c252SPaul Traina nodewords = n_blocks / (8 * sizeof(bpf_u_int32)) + 1; 20258cf6c252SPaul Traina 20268cf6c252SPaul Traina /* XXX */ 20278cf6c252SPaul Traina space = (bpf_u_int32 *)malloc(2 * n_blocks * nodewords * sizeof(*space) 20288cf6c252SPaul Traina + n_edges * edgewords * sizeof(*space)); 2029feb4ecdbSBruce M Simpson if (space == NULL) 2030feb4ecdbSBruce M Simpson bpf_error("malloc"); 20318cf6c252SPaul Traina p = space; 20328cf6c252SPaul Traina all_dom_sets = p; 20338cf6c252SPaul Traina for (i = 0; i < n; ++i) { 20348cf6c252SPaul Traina blocks[i]->dom = p; 20358cf6c252SPaul Traina p += nodewords; 20368cf6c252SPaul Traina } 20378cf6c252SPaul Traina all_closure_sets = p; 20388cf6c252SPaul Traina for (i = 0; i < n; ++i) { 20398cf6c252SPaul Traina blocks[i]->closure = p; 20408cf6c252SPaul Traina p += nodewords; 20418cf6c252SPaul Traina } 20428cf6c252SPaul Traina all_edge_sets = p; 20438cf6c252SPaul Traina for (i = 0; i < n; ++i) { 20448cf6c252SPaul Traina register struct block *b = blocks[i]; 20458cf6c252SPaul Traina 20468cf6c252SPaul Traina b->et.edom = p; 20478cf6c252SPaul Traina p += edgewords; 20488cf6c252SPaul Traina b->ef.edom = p; 20498cf6c252SPaul Traina p += edgewords; 20508cf6c252SPaul Traina b->et.id = i; 20518cf6c252SPaul Traina edges[i] = &b->et; 20528cf6c252SPaul Traina b->ef.id = n_blocks + i; 20538cf6c252SPaul Traina edges[n_blocks + i] = &b->ef; 20548cf6c252SPaul Traina b->et.pred = b; 20558cf6c252SPaul Traina b->ef.pred = b; 20568cf6c252SPaul Traina } 20578cf6c252SPaul Traina max_stmts = 0; 20588cf6c252SPaul Traina for (i = 0; i < n; ++i) 20598cf6c252SPaul Traina max_stmts += slength(blocks[i]->stmts) + 1; 20608cf6c252SPaul Traina /* 20618cf6c252SPaul Traina * We allocate at most 3 value numbers per statement, 20628cf6c252SPaul Traina * so this is an upper bound on the number of valnodes 20638cf6c252SPaul Traina * we'll need. 20648cf6c252SPaul Traina */ 20658cf6c252SPaul Traina maxval = 3 * max_stmts; 2066ef96d74fSMax Laier vmap = (struct vmapinfo *)calloc(maxval, sizeof(*vmap)); 2067ef96d74fSMax Laier vnode_base = (struct valnode *)calloc(maxval, sizeof(*vnode_base)); 2068feb4ecdbSBruce M Simpson if (vmap == NULL || vnode_base == NULL) 2069feb4ecdbSBruce M Simpson bpf_error("malloc"); 20708cf6c252SPaul Traina } 20718cf6c252SPaul Traina 20728cf6c252SPaul Traina /* 20738cf6c252SPaul Traina * Some pointers used to convert the basic block form of the code, 20748cf6c252SPaul Traina * into the array form that BPF requires. 'fstart' will point to 20758cf6c252SPaul Traina * the malloc'd array while 'ftail' is used during the recursive traversal. 20768cf6c252SPaul Traina */ 20778cf6c252SPaul Traina static struct bpf_insn *fstart; 20788cf6c252SPaul Traina static struct bpf_insn *ftail; 20798cf6c252SPaul Traina 20808cf6c252SPaul Traina #ifdef BDEBUG 20818cf6c252SPaul Traina int bids[1000]; 20828cf6c252SPaul Traina #endif 20838cf6c252SPaul Traina 20848cf6c252SPaul Traina /* 20858cf6c252SPaul Traina * Returns true if successful. Returns false if a branch has 20868cf6c252SPaul Traina * an offset that is too large. If so, we have marked that 20878cf6c252SPaul Traina * branch so that on a subsequent iteration, it will be treated 20888cf6c252SPaul Traina * properly. 20898cf6c252SPaul Traina */ 20908cf6c252SPaul Traina static int 20918cf6c252SPaul Traina convert_code_r(p) 20928cf6c252SPaul Traina struct block *p; 20938cf6c252SPaul Traina { 20948cf6c252SPaul Traina struct bpf_insn *dst; 20958cf6c252SPaul Traina struct slist *src; 20968cf6c252SPaul Traina int slen; 20978cf6c252SPaul Traina u_int off; 20988cf6c252SPaul Traina int extrajmps; /* number of extra jumps inserted */ 20998751327cSBill Fenner struct slist **offset = NULL; 21008cf6c252SPaul Traina 21018cf6c252SPaul Traina if (p == 0 || isMarked(p)) 21028cf6c252SPaul Traina return (1); 21038cf6c252SPaul Traina Mark(p); 21048cf6c252SPaul Traina 21058cf6c252SPaul Traina if (convert_code_r(JF(p)) == 0) 21068cf6c252SPaul Traina return (0); 21078cf6c252SPaul Traina if (convert_code_r(JT(p)) == 0) 21088cf6c252SPaul Traina return (0); 21098cf6c252SPaul Traina 21108cf6c252SPaul Traina slen = slength(p->stmts); 21118cf6c252SPaul Traina dst = ftail -= (slen + 1 + p->longjt + p->longjf); 21128cf6c252SPaul Traina /* inflate length by any extra jumps */ 21138cf6c252SPaul Traina 21148cf6c252SPaul Traina p->offset = dst - fstart; 21158cf6c252SPaul Traina 21168751327cSBill Fenner /* generate offset[] for convenience */ 21178751327cSBill Fenner if (slen) { 2118feb4ecdbSBruce M Simpson offset = (struct slist **)calloc(slen, sizeof(struct slist *)); 21198751327cSBill Fenner if (!offset) { 21208751327cSBill Fenner bpf_error("not enough core"); 21218751327cSBill Fenner /*NOTREACHED*/ 21228751327cSBill Fenner } 21238751327cSBill Fenner } 21248751327cSBill Fenner src = p->stmts; 21258751327cSBill Fenner for (off = 0; off < slen && src; off++) { 21268751327cSBill Fenner #if 0 21278751327cSBill Fenner printf("off=%d src=%x\n", off, src); 21288751327cSBill Fenner #endif 21298751327cSBill Fenner offset[off] = src; 21308751327cSBill Fenner src = src->next; 21318751327cSBill Fenner } 21328751327cSBill Fenner 21338751327cSBill Fenner off = 0; 21348cf6c252SPaul Traina for (src = p->stmts; src; src = src->next) { 21358cf6c252SPaul Traina if (src->s.code == NOP) 21368cf6c252SPaul Traina continue; 21378cf6c252SPaul Traina dst->code = (u_short)src->s.code; 21388cf6c252SPaul Traina dst->k = src->s.k; 21398751327cSBill Fenner 21408751327cSBill Fenner /* fill block-local relative jump */ 2141dc2c7305SBill Fenner if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) { 21428751327cSBill Fenner #if 0 21438751327cSBill Fenner if (src->s.jt || src->s.jf) { 21448751327cSBill Fenner bpf_error("illegal jmp destination"); 21458751327cSBill Fenner /*NOTREACHED*/ 21468cf6c252SPaul Traina } 21478751327cSBill Fenner #endif 21488751327cSBill Fenner goto filled; 21498751327cSBill Fenner } 21508751327cSBill Fenner if (off == slen - 2) /*???*/ 21518751327cSBill Fenner goto filled; 21528751327cSBill Fenner 21538751327cSBill Fenner { 21548751327cSBill Fenner int i; 21558751327cSBill Fenner int jt, jf; 2156ef96d74fSMax Laier const char *ljerr = "%s for block-local relative jump: off=%d"; 21578751327cSBill Fenner 21588751327cSBill Fenner #if 0 21598751327cSBill Fenner printf("code=%x off=%d %x %x\n", src->s.code, 21608751327cSBill Fenner off, src->s.jt, src->s.jf); 21618751327cSBill Fenner #endif 21628751327cSBill Fenner 21638751327cSBill Fenner if (!src->s.jt || !src->s.jf) { 21648751327cSBill Fenner bpf_error(ljerr, "no jmp destination", off); 21658751327cSBill Fenner /*NOTREACHED*/ 21668751327cSBill Fenner } 21678751327cSBill Fenner 21688751327cSBill Fenner jt = jf = 0; 21698751327cSBill Fenner for (i = 0; i < slen; i++) { 21708751327cSBill Fenner if (offset[i] == src->s.jt) { 21718751327cSBill Fenner if (jt) { 21728751327cSBill Fenner bpf_error(ljerr, "multiple matches", off); 21738751327cSBill Fenner /*NOTREACHED*/ 21748751327cSBill Fenner } 21758751327cSBill Fenner 21768751327cSBill Fenner dst->jt = i - off - 1; 21778751327cSBill Fenner jt++; 21788751327cSBill Fenner } 21798751327cSBill Fenner if (offset[i] == src->s.jf) { 21808751327cSBill Fenner if (jf) { 21818751327cSBill Fenner bpf_error(ljerr, "multiple matches", off); 21828751327cSBill Fenner /*NOTREACHED*/ 21838751327cSBill Fenner } 21848751327cSBill Fenner dst->jf = i - off - 1; 21858751327cSBill Fenner jf++; 21868751327cSBill Fenner } 21878751327cSBill Fenner } 21888751327cSBill Fenner if (!jt || !jf) { 21898751327cSBill Fenner bpf_error(ljerr, "no destination found", off); 21908751327cSBill Fenner /*NOTREACHED*/ 21918751327cSBill Fenner } 21928751327cSBill Fenner } 21938751327cSBill Fenner filled: 21948751327cSBill Fenner ++dst; 21958751327cSBill Fenner ++off; 21968751327cSBill Fenner } 21978751327cSBill Fenner if (offset) 21988751327cSBill Fenner free(offset); 21998751327cSBill Fenner 22008cf6c252SPaul Traina #ifdef BDEBUG 22018cf6c252SPaul Traina bids[dst - fstart] = p->id + 1; 22028cf6c252SPaul Traina #endif 22038cf6c252SPaul Traina dst->code = (u_short)p->s.code; 22048cf6c252SPaul Traina dst->k = p->s.k; 22058cf6c252SPaul Traina if (JT(p)) { 22068cf6c252SPaul Traina extrajmps = 0; 22078cf6c252SPaul Traina off = JT(p)->offset - (p->offset + slen) - 1; 22088cf6c252SPaul Traina if (off >= 256) { 22098cf6c252SPaul Traina /* offset too large for branch, must add a jump */ 22108cf6c252SPaul Traina if (p->longjt == 0) { 22118cf6c252SPaul Traina /* mark this instruction and retry */ 22128cf6c252SPaul Traina p->longjt++; 22138cf6c252SPaul Traina return(0); 22148cf6c252SPaul Traina } 22158cf6c252SPaul Traina /* branch if T to following jump */ 22168cf6c252SPaul Traina dst->jt = extrajmps; 22178cf6c252SPaul Traina extrajmps++; 22188cf6c252SPaul Traina dst[extrajmps].code = BPF_JMP|BPF_JA; 22198cf6c252SPaul Traina dst[extrajmps].k = off - extrajmps; 22208cf6c252SPaul Traina } 22218cf6c252SPaul Traina else 22228cf6c252SPaul Traina dst->jt = off; 22238cf6c252SPaul Traina off = JF(p)->offset - (p->offset + slen) - 1; 22248cf6c252SPaul Traina if (off >= 256) { 22258cf6c252SPaul Traina /* offset too large for branch, must add a jump */ 22268cf6c252SPaul Traina if (p->longjf == 0) { 22278cf6c252SPaul Traina /* mark this instruction and retry */ 22288cf6c252SPaul Traina p->longjf++; 22298cf6c252SPaul Traina return(0); 22308cf6c252SPaul Traina } 22318cf6c252SPaul Traina /* branch if F to following jump */ 22328cf6c252SPaul Traina /* if two jumps are inserted, F goes to second one */ 22338cf6c252SPaul Traina dst->jf = extrajmps; 22348cf6c252SPaul Traina extrajmps++; 22358cf6c252SPaul Traina dst[extrajmps].code = BPF_JMP|BPF_JA; 22368cf6c252SPaul Traina dst[extrajmps].k = off - extrajmps; 22378cf6c252SPaul Traina } 22388cf6c252SPaul Traina else 22398cf6c252SPaul Traina dst->jf = off; 22408cf6c252SPaul Traina } 22418cf6c252SPaul Traina return (1); 22428cf6c252SPaul Traina } 22438cf6c252SPaul Traina 22448cf6c252SPaul Traina 22458cf6c252SPaul Traina /* 22468cf6c252SPaul Traina * Convert flowgraph intermediate representation to the 22478cf6c252SPaul Traina * BPF array representation. Set *lenp to the number of instructions. 2248ef96d74fSMax Laier * 2249ef96d74fSMax Laier * This routine does *NOT* leak the memory pointed to by fp. It *must 2250ef96d74fSMax Laier * not* do free(fp) before returning fp; doing so would make no sense, 2251ef96d74fSMax Laier * as the BPF array pointed to by the return value of icode_to_fcode() 2252ef96d74fSMax Laier * must be valid - it's being returned for use in a bpf_program structure. 2253ef96d74fSMax Laier * 2254ef96d74fSMax Laier * If it appears that icode_to_fcode() is leaking, the problem is that 2255ef96d74fSMax Laier * the program using pcap_compile() is failing to free the memory in 2256ef96d74fSMax Laier * the BPF program when it's done - the leak is in the program, not in 2257ef96d74fSMax Laier * the routine that happens to be allocating the memory. (By analogy, if 2258ef96d74fSMax Laier * a program calls fopen() without ever calling fclose() on the FILE *, 2259ef96d74fSMax Laier * it will leak the FILE structure; the leak is not in fopen(), it's in 2260ef96d74fSMax Laier * the program.) Change the program to use pcap_freecode() when it's 2261ef96d74fSMax Laier * done with the filter program. See the pcap man page. 22628cf6c252SPaul Traina */ 22638cf6c252SPaul Traina struct bpf_insn * 22648cf6c252SPaul Traina icode_to_fcode(root, lenp) 22658cf6c252SPaul Traina struct block *root; 22668cf6c252SPaul Traina int *lenp; 22678cf6c252SPaul Traina { 22688cf6c252SPaul Traina int n; 22698cf6c252SPaul Traina struct bpf_insn *fp; 22708cf6c252SPaul Traina 22718cf6c252SPaul Traina /* 22720a94d38fSBill Fenner * Loop doing convert_code_r() until no branches remain 22738cf6c252SPaul Traina * with too-large offsets. 22748cf6c252SPaul Traina */ 22758cf6c252SPaul Traina while (1) { 22768cf6c252SPaul Traina unMarkAll(); 22778cf6c252SPaul Traina n = *lenp = count_stmts(root); 22788cf6c252SPaul Traina 22798cf6c252SPaul Traina fp = (struct bpf_insn *)malloc(sizeof(*fp) * n); 2280feb4ecdbSBruce M Simpson if (fp == NULL) 2281feb4ecdbSBruce M Simpson bpf_error("malloc"); 22828cf6c252SPaul Traina memset((char *)fp, 0, sizeof(*fp) * n); 22838cf6c252SPaul Traina fstart = fp; 22848cf6c252SPaul Traina ftail = fp + n; 22858cf6c252SPaul Traina 22868cf6c252SPaul Traina unMarkAll(); 22878cf6c252SPaul Traina if (convert_code_r(root)) 22888cf6c252SPaul Traina break; 22898cf6c252SPaul Traina free(fp); 22908cf6c252SPaul Traina } 22918cf6c252SPaul Traina 22928cf6c252SPaul Traina return fp; 22938cf6c252SPaul Traina } 22948cf6c252SPaul Traina 2295dc2c7305SBill Fenner /* 2296dc2c7305SBill Fenner * Make a copy of a BPF program and put it in the "fcode" member of 2297dc2c7305SBill Fenner * a "pcap_t". 2298dc2c7305SBill Fenner * 2299dc2c7305SBill Fenner * If we fail to allocate memory for the copy, fill in the "errbuf" 2300dc2c7305SBill Fenner * member of the "pcap_t" with an error message, and return -1; 2301dc2c7305SBill Fenner * otherwise, return 0. 2302dc2c7305SBill Fenner */ 2303dc2c7305SBill Fenner int 2304dc2c7305SBill Fenner install_bpf_program(pcap_t *p, struct bpf_program *fp) 2305dc2c7305SBill Fenner { 2306dc2c7305SBill Fenner size_t prog_size; 2307dc2c7305SBill Fenner 2308dc2c7305SBill Fenner /* 2309a8e07101SRui Paulo * Validate the program. 2310a8e07101SRui Paulo */ 2311a8e07101SRui Paulo if (!bpf_validate(fp->bf_insns, fp->bf_len)) { 2312a8e07101SRui Paulo snprintf(p->errbuf, sizeof(p->errbuf), 2313a8e07101SRui Paulo "BPF program is not valid"); 2314a8e07101SRui Paulo return (-1); 2315a8e07101SRui Paulo } 2316a8e07101SRui Paulo 2317a8e07101SRui Paulo /* 2318dc2c7305SBill Fenner * Free up any already installed program. 2319dc2c7305SBill Fenner */ 2320dc2c7305SBill Fenner pcap_freecode(&p->fcode); 2321dc2c7305SBill Fenner 2322dc2c7305SBill Fenner prog_size = sizeof(*fp->bf_insns) * fp->bf_len; 2323dc2c7305SBill Fenner p->fcode.bf_len = fp->bf_len; 2324dc2c7305SBill Fenner p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size); 2325dc2c7305SBill Fenner if (p->fcode.bf_insns == NULL) { 2326dc2c7305SBill Fenner snprintf(p->errbuf, sizeof(p->errbuf), 2327dc2c7305SBill Fenner "malloc: %s", pcap_strerror(errno)); 2328dc2c7305SBill Fenner return (-1); 2329dc2c7305SBill Fenner } 2330dc2c7305SBill Fenner memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size); 2331dc2c7305SBill Fenner return (0); 2332dc2c7305SBill Fenner } 2333dc2c7305SBill Fenner 23348cf6c252SPaul Traina #ifdef BDEBUG 23358cf6c252SPaul Traina static void 23368cf6c252SPaul Traina opt_dump(root) 23378cf6c252SPaul Traina struct block *root; 23388cf6c252SPaul Traina { 23398cf6c252SPaul Traina struct bpf_program f; 23408cf6c252SPaul Traina 23418cf6c252SPaul Traina memset(bids, 0, sizeof bids); 23428cf6c252SPaul Traina f.bf_insns = icode_to_fcode(root, &f.bf_len); 23438cf6c252SPaul Traina bpf_dump(&f, 1); 23448cf6c252SPaul Traina putchar('\n'); 23458cf6c252SPaul Traina free((char *)f.bf_insns); 23468cf6c252SPaul Traina } 23478cf6c252SPaul Traina #endif 2348