1 /*
2  *	BIRD -- The Border Gateway Protocol
3  *
4  *	(c) 2000 Martin Mares <mj@ucw.cz>
5  *
6  *	Can be freely distributed and used under the terms of the GNU GPL.
7  */
8 
9 #ifndef _BIRD_BGP_H_
10 #define _BIRD_BGP_H_
11 
12 #include <stdint.h>
13 #include "nest/route.h"
14 #include "nest/bfd.h"
15 #include "lib/hash.h"
16 
17 struct linpool;
18 struct eattr;
19 
20 struct bgp_config {
21   struct proto_config c;
22   u32 local_as, remote_as;
23   ip_addr remote_ip;
24   ip_addr source_addr;			/* Source address to use */
25   struct iface *iface;			/* Interface for link-local addresses */
26   u16 remote_port; 			/* Neighbor destination port */
27   int multihop;				/* Number of hops if multihop */
28   int ttl_security;			/* Enable TTL security [RFC5082] */
29   int next_hop_self;			/* Always set next hop to local IP address */
30   int next_hop_keep;			/* Do not touch next hop attribute */
31   int missing_lladdr;			/* What we will do when we don' know link-local addr, see MLL_* */
32   int gw_mode;				/* How we compute route gateway from next_hop attr, see GW_* */
33   int compare_path_lengths;		/* Use path lengths when selecting best route */
34   int med_metric;			/* Compare MULTI_EXIT_DISC even between routes from differen ASes */
35   int igp_metric;			/* Use IGP metrics when selecting best route */
36   int prefer_older;			/* Prefer older routes according to RFC 5004 */
37   int deterministic_med;		/* Use more complicated algo to have strict RFC 4271 MED comparison */
38   u32 default_local_pref;		/* Default value for LOCAL_PREF attribute */
39   u32 default_med;			/* Default value for MULTI_EXIT_DISC attribute */
40   int capabilities;			/* Enable capability handshake [RFC3392] */
41   int enable_refresh;			/* Enable local support for route refresh [RFC2918] */
42   int enable_as4;			/* Enable local support for 4B AS numbers [RFC4893] */
43   int enable_extended_messages;		/* Enable local support for extended messages [draft] */
44   u32 rr_cluster_id;			/* Route reflector cluster ID, if different from local ID */
45   int rr_client;			/* Whether neighbor is RR client of me */
46   int rs_client;			/* Whether neighbor is RS client of me */
47   int advertise_ipv4;			/* Whether we should add IPv4 capability advertisement to OPEN message */
48   int passive;				/* Do not initiate outgoing connection */
49   int interpret_communities;		/* Hardwired handling of well-known communities */
50   int secondary;			/* Accept also non-best routes (i.e. RA_ACCEPTED) */
51   int add_path;				/* Use ADD-PATH extension [RFC7911] */
52   int allow_local_as;			/* Allow that number of local ASNs in incoming AS_PATHs */
53   int allow_local_pref;			/* Allow LOCAL_PREF in EBGP sessions */
54   int gr_mode;				/* Graceful restart mode (BGP_GR_*) */
55   int llgr_mode;			/* Long-lived graceful restart mode (BGP_LLGR_*) */
56   int setkey;				/* Set MD5 password to system SA/SP database */
57   unsigned gr_time;			/* Graceful restart timeout */
58   unsigned llgr_time;			/* Long-lived graceful restart timeout */
59   unsigned connect_delay_time;		/* Minimum delay between connect attempts */
60   unsigned connect_retry_time;		/* Timeout for connect attempts */
61   unsigned hold_time, initial_hold_time;
62   unsigned keepalive_time;
63   unsigned error_amnesia_time;		/* Errors are forgotten after */
64   unsigned error_delay_time_min;	/* Time to wait after an error is detected */
65   unsigned error_delay_time_max;
66   unsigned disable_after_error;		/* Disable the protocol when error is detected */
67   u32 disable_after_cease;		/* Disable it when cease is received, bitfield */
68 
69   char *password;			/* Password used for MD5 authentication */
70   struct rtable_config *igp_table;	/* Table used for recursive next hop lookups */
71   int check_link;			/* Use iface link state for liveness detection */
72   int bfd;				/* Use BFD for liveness detection */
73 };
74 
75 #define MLL_SELF 1
76 #define MLL_DROP 2
77 #define MLL_IGNORE 3
78 
79 #define GW_DIRECT 1
80 #define GW_RECURSIVE 2
81 
82 #define ADD_PATH_RX 1
83 #define ADD_PATH_TX 2
84 #define ADD_PATH_FULL 3
85 
86 #define BGP_GR_ABLE 1
87 #define BGP_GR_AWARE 2
88 
89 /* For peer_gr_flags */
90 #define BGP_GRF_RESTART 0x80
91 
92 /* For peer_gr_aflags */
93 #define BGP_GRF_FORWARDING 0x80
94 
95 #define BGP_LLGR_ABLE 1
96 #define BGP_LLGR_AWARE 2
97 
98 #define BGP_LLGRF_FORWARDING 0x80
99 
100 #define BGP_GRS_NONE		0	/* No GR  */
101 #define BGP_GRS_ACTIVE		1	/* Graceful restart per RFC 4724 */
102 #define BGP_GRS_LLGR_1		2	/* Long-lived GR phase 1 (restart time) */
103 #define BGP_GRS_LLGR_2		3	/* Long-lived GR phase 2 (stale time) */
104 
105 #define BGP_BFD_GRACEFUL	2	/* BFD down triggers graceful restart */
106 
107 
108 struct bgp_conn {
109   struct bgp_proto *bgp;
110   struct birdsock *sk;
111   uint state;				/* State of connection state machine */
112   struct timer *connect_retry_timer;
113   struct timer *hold_timer;
114   struct timer *keepalive_timer;
115   struct event *tx_ev;
116   int packets_to_send;			/* Bitmap of packet types to be sent */
117   int notify_code, notify_subcode, notify_size;
118   byte *notify_data;
119   u32 advertised_as;			/* Temporary value for AS number received */
120   int start_state;			/* protocol start_state snapshot when connection established */
121   u8 peer_refresh_support;		/* Peer supports route refresh [RFC2918] */
122   u8 peer_as4_support;			/* Peer supports 4B AS numbers [RFC4893] */
123   u8 peer_add_path;			/* Peer supports ADD-PATH [RFC7911] */
124   u8 peer_enhanced_refresh_support;	/* Peer supports enhanced refresh [RFC7313] */
125   u8 peer_gr_aware;
126   u8 peer_gr_able;
127   u16 peer_gr_time;
128   u8 peer_gr_flags;
129   u8 peer_gr_aflags;
130   u8 peer_llgr_aware;
131   u8 peer_llgr_able;
132   uint peer_llgr_time;
133   u8 peer_llgr_aflags;
134   u8 peer_ext_messages_support;		/* Peer supports extended message length [draft] */
135   unsigned hold_time, keepalive_time;	/* Times calculated from my and neighbor's requirements */
136 };
137 
138 struct bgp_proto {
139   struct proto p;
140   struct bgp_config *cf;		/* Shortcut to BGP configuration */
141   u32 local_as, remote_as;
142   int start_state;			/* Substates that partitions BS_START */
143   u8 is_internal;			/* Internal BGP connection (local_as == remote_as) */
144   u8 as4_session;			/* Session uses 4B AS numbers in AS_PATH (both sides support it) */
145   u8 add_path_rx;			/* Session expects receive of ADD-PATH extended NLRI */
146   u8 add_path_tx;			/* Session expects transmit of ADD-PATH extended NLRI */
147   u8 ext_messages;			/* Session allows to use extended messages (both sides support it) */
148   u32 local_id;				/* BGP identifier of this router */
149   u32 remote_id;			/* BGP identifier of the neighbor */
150   u32 rr_cluster_id;			/* Route reflector cluster ID */
151   int rr_client;			/* Whether neighbor is RR client of me */
152   int rs_client;			/* Whether neighbor is RS client of me */
153   u8 gr_ready;				/* Neighbor could do graceful restart */
154   u8 gr_active;				/* Neighbor is doing graceful restart (BGP_GRS_*) */
155   u8 feed_state;			/* Feed state (TX) for EoR, RR packets, see BFS_* */
156   u8 load_state;			/* Load state (RX) for EoR, RR packets, see BFS_* */
157   uint stale_time;			/* Long-lived stale time for LLGR */
158   struct bgp_conn *conn;		/* Connection we have established */
159   struct bgp_conn outgoing_conn;	/* Outgoing connection we're working with */
160   struct bgp_conn incoming_conn;	/* Incoming connection we have neither accepted nor rejected yet */
161   struct object_lock *lock;		/* Lock for neighbor connection */
162   struct neighbor *neigh;		/* Neighbor entry corresponding to remote ip, NULL if multihop */
163   struct bfd_request *bfd_req;		/* BFD request, if BFD is used */
164   ip_addr source_addr;			/* Local address used as an advertised next hop */
165   rtable *igp_table;			/* Table used for recursive next hop lookups */
166   struct event *event;			/* Event for respawning and shutting process */
167   struct timer *startup_timer;		/* Timer used to delay protocol startup due to previous errors (startup_delay) */
168   struct timer *gr_timer;		/* Timer waiting for reestablishment after graceful restart */
169   struct bgp_bucket **bucket_hash;	/* Hash table of attribute buckets */
170   uint hash_size, hash_count, hash_limit;
171   HASH(struct bgp_prefix) prefix_hash;	/* Prefixes to be sent */
172   slab *prefix_slab;			/* Slab holding prefix nodes */
173   list bucket_queue;			/* Queue of buckets to send */
174   struct bgp_bucket *withdraw_bucket;	/* Withdrawn routes */
175   unsigned startup_delay;		/* Time to delay protocol startup by due to errors */
176   bird_clock_t last_proto_error;	/* Time of last error that leads to protocol stop */
177   u8 last_error_class; 			/* Error class of last error */
178   u32 last_error_code;			/* Error code of last error. BGP protocol errors
179 					   are encoded as (bgp_err_code << 16 | bgp_err_subcode) */
180 #ifdef IPV6
181   byte *mp_reach_start, *mp_unreach_start; /* Multiprotocol BGP attribute notes */
182   unsigned mp_reach_len, mp_unreach_len;
183   ip_addr local_link;			/* Link-level version of source_addr */
184 #endif
185 };
186 
187 struct bgp_prefix {
188   struct {
189     ip_addr prefix;
190     int pxlen;
191   } n;
192   u32 path_id;
193   struct bgp_prefix *next;
194   node bucket_node;			/* Node in per-bucket list */
195 };
196 
197 struct bgp_bucket {
198   node send_node;			/* Node in send queue */
199   struct bgp_bucket *hash_next, *hash_prev;	/* Node in bucket hash table */
200   unsigned hash;			/* Hash over extended attributes */
201   list prefixes;			/* Prefixes in this buckets */
202   ea_list eattrs[0];			/* Per-bucket extended attributes */
203 };
204 
205 #define BGP_PORT		179
206 #define BGP_VERSION		4
207 #define BGP_HEADER_LENGTH	19
208 #define BGP_MAX_MESSAGE_LENGTH	4096
209 #define BGP_MAX_EXT_MSG_LENGTH	65535
210 #define BGP_RX_BUFFER_SIZE	4096
211 #define BGP_TX_BUFFER_SIZE	4096
212 #define BGP_RX_BUFFER_EXT_SIZE	65535
213 #define BGP_TX_BUFFER_EXT_SIZE	65535
214 
bgp_max_packet_length(struct bgp_proto * p)215 static inline uint bgp_max_packet_length(struct bgp_proto *p)
216 { return p->ext_messages ? BGP_MAX_EXT_MSG_LENGTH : BGP_MAX_MESSAGE_LENGTH; }
217 
218 extern struct linpool *bgp_linpool;
219 
220 
221 void bgp_start_timer(struct timer *t, int value);
222 void bgp_check_config(struct bgp_config *c);
223 void bgp_error(struct bgp_conn *c, unsigned code, unsigned subcode, byte *data, int len);
224 void bgp_close_conn(struct bgp_conn *c);
225 void bgp_update_startup_delay(struct bgp_proto *p);
226 void bgp_conn_enter_openconfirm_state(struct bgp_conn *conn);
227 void bgp_conn_enter_established_state(struct bgp_conn *conn);
228 void bgp_conn_enter_close_state(struct bgp_conn *conn);
229 void bgp_conn_enter_idle_state(struct bgp_conn *conn);
230 void bgp_handle_graceful_restart(struct bgp_proto *p);
231 void bgp_graceful_restart_done(struct bgp_proto *p);
232 void bgp_refresh_begin(struct bgp_proto *p);
233 void bgp_refresh_end(struct bgp_proto *p);
234 void bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code);
235 void bgp_stop(struct bgp_proto *p, uint subcode, byte *data, uint len);
236 
237 struct rte_source *bgp_find_source(struct bgp_proto *p, u32 path_id);
238 struct rte_source *bgp_get_source(struct bgp_proto *p, u32 path_id);
239 
240 
241 
242 #ifdef LOCAL_DEBUG
243 #define BGP_FORCE_DEBUG 1
244 #else
245 #define BGP_FORCE_DEBUG 0
246 #endif
247 #define BGP_TRACE(flags, msg, args...) do { if ((p->p.debug & flags) || BGP_FORCE_DEBUG) \
248 	log(L_TRACE "%s: " msg, p->p.name , ## args ); } while(0)
249 
250 #define BGP_TRACE_RL(rl, flags, msg, args...) do { if ((p->p.debug & flags) || BGP_FORCE_DEBUG) \
251 	log_rl(rl, L_TRACE "%s: " msg, p->p.name , ## args ); } while(0)
252 
253 
254 /* attrs.c */
255 
256 /* Hack: although BA_NEXT_HOP attribute has type EAF_TYPE_IP_ADDRESS, in IPv6
257  * we store two addesses in it - a global address and a link local address.
258  */
259 #ifdef IPV6
260 #define NEXT_HOP_LENGTH (2*sizeof(ip_addr))
set_next_hop(byte * b,ip_addr addr)261 static inline void set_next_hop(byte *b, ip_addr addr) { ((ip_addr *) b)[0] = addr; ((ip_addr *) b)[1] = IPA_NONE; }
262 #else
263 #define NEXT_HOP_LENGTH sizeof(ip_addr)
set_next_hop(byte * b,ip_addr addr)264 static inline void set_next_hop(byte *b, ip_addr addr) { ((ip_addr *) b)[0] = addr; }
265 #endif
266 
267 void bgp_attach_attr(struct ea_list **to, struct linpool *pool, unsigned attr, uintptr_t val);
268 byte *bgp_attach_attr_wa(struct ea_list **to, struct linpool *pool, unsigned attr, unsigned len);
269 struct rta *bgp_decode_attrs(struct bgp_conn *conn, byte *a, uint len, struct linpool *pool, int mandatory);
270 int bgp_get_attr(struct eattr *e, byte *buf, int buflen);
271 int bgp_rte_better(struct rte *, struct rte *);
272 int bgp_rte_mergable(rte *pri, rte *sec);
273 int bgp_rte_recalculate(rtable *table, net *net, rte *new, rte *old, rte *old_best);
274 struct rte *bgp_rte_modify_stale(struct rte *r, struct linpool *pool);
275 void bgp_rt_notify(struct proto *P, rtable *tbl UNUSED, net *n, rte *new, rte *old UNUSED, ea_list *attrs);
276 int bgp_import_control(struct proto *, struct rte **, struct ea_list **, struct linpool *);
277 void bgp_init_bucket_table(struct bgp_proto *);
278 void bgp_free_bucket_table(struct bgp_proto *p);
279 void bgp_free_bucket(struct bgp_proto *p, struct bgp_bucket *buck);
280 void bgp_init_prefix_table(struct bgp_proto *p, u32 order);
281 void bgp_free_prefix_table(struct bgp_proto *p);
282 void bgp_free_prefix(struct bgp_proto *p, struct bgp_prefix *bp);
283 uint bgp_encode_attrs(struct bgp_proto *p, byte *w, ea_list *attrs, int remains);
284 void bgp_get_route_info(struct rte *, byte *buf, struct ea_list *attrs);
285 
bgp_attach_attr_ip(struct ea_list ** to,struct linpool * pool,unsigned attr,ip_addr a)286 inline static void bgp_attach_attr_ip(struct ea_list **to, struct linpool *pool, unsigned attr, ip_addr a)
287 { *(ip_addr *) bgp_attach_attr_wa(to, pool, attr, sizeof(ip_addr)) = a; }
288 
289 /* packets.c */
290 
291 void bgp_dump_state_change(struct bgp_conn *conn, uint old, uint new);
292 void bgp_schedule_packet(struct bgp_conn *conn, int type);
293 void bgp_kick_tx(void *vconn);
294 void bgp_tx(struct birdsock *sk);
295 int bgp_rx(struct birdsock *sk, uint size);
296 const char * bgp_error_dsc(unsigned code, unsigned subcode);
297 void bgp_log_error(struct bgp_proto *p, u8 class, char *msg, unsigned code, unsigned subcode, byte *data, unsigned len);
298 
299 /* Packet types */
300 
301 #define PKT_OPEN		0x01
302 #define PKT_UPDATE		0x02
303 #define PKT_NOTIFICATION	0x03
304 #define PKT_KEEPALIVE		0x04
305 #define PKT_ROUTE_REFRESH	0x05	/* [RFC2918] */
306 #define PKT_BEGIN_REFRESH	0x1e	/* Dummy type for BoRR packet [RFC7313] */
307 #define PKT_SCHEDULE_CLOSE	0x1f	/* Used internally to schedule socket close */
308 
309 /* Attributes */
310 
311 #define BAF_OPTIONAL		0x80
312 #define BAF_TRANSITIVE		0x40
313 #define BAF_PARTIAL		0x20
314 #define BAF_EXT_LEN		0x10
315 
316 #define BA_ORIGIN		0x01	/* [RFC1771] */		/* WM */
317 #define BA_AS_PATH		0x02				/* WM */
318 #define BA_NEXT_HOP		0x03				/* WM */
319 #define BA_MULTI_EXIT_DISC	0x04				/* ON */
320 #define BA_LOCAL_PREF		0x05				/* WD */
321 #define BA_ATOMIC_AGGR		0x06				/* WD */
322 #define BA_AGGREGATOR		0x07				/* OT */
323 #define BA_COMMUNITY		0x08	/* [RFC1997] */		/* OT */
324 #define BA_ORIGINATOR_ID	0x09	/* [RFC1966] */		/* ON */
325 #define BA_CLUSTER_LIST		0x0a				/* ON */
326 /* We don't support these: */
327 #define BA_DPA			0x0b	/* ??? */
328 #define BA_ADVERTISER		0x0c	/* [RFC1863] */
329 #define BA_RCID_PATH		0x0d
330 #define BA_MP_REACH_NLRI	0x0e	/* [RFC2283] */
331 #define BA_MP_UNREACH_NLRI	0x0f
332 #define BA_EXT_COMMUNITY	0x10	/* [RFC4360] */
333 #define BA_AS4_PATH             0x11    /* [RFC4893] */
334 #define BA_AS4_AGGREGATOR       0x12
335 #define BA_LARGE_COMMUNITY	0x20	/* [RFC8092] */
336 
337 /* BGP connection states */
338 
339 #define BS_IDLE			0
340 #define BS_CONNECT		1	/* Attempting to connect */
341 #define BS_ACTIVE		2	/* Waiting for connection retry & listening */
342 #define BS_OPENSENT		3
343 #define BS_OPENCONFIRM		4
344 #define BS_ESTABLISHED		5
345 #define BS_CLOSE		6	/* Used during transition to BS_IDLE */
346 
347 #define BS_MAX			7
348 
349 /* BGP start states
350  *
351  * Used in PS_START for fine-grained specification of starting state.
352  *
353  * When BGP protocol is started by core, it goes to BSS_PREPARE. When BGP
354  * protocol done what is neccessary to start itself (like acquiring the lock),
355  * it goes to BSS_CONNECT.  When some connection attempt failed because of
356  * option or capability error, it goes to BSS_CONNECT_NOCAP.
357  */
358 
359 #define BSS_PREPARE		0	/* Used before ordinary BGP started, i. e. waiting for lock */
360 #define BSS_DELAY		1	/* Startup delay due to previous errors */
361 #define BSS_CONNECT		2	/* Ordinary BGP connecting */
362 #define BSS_CONNECT_NOCAP	3	/* Legacy BGP connecting (without capabilities) */
363 
364 
365 /* BGP feed states (TX)
366  *
367  * RFC 4724 specifies that an initial feed should end with End-of-RIB mark.
368  *
369  * RFC 7313 specifies that a route refresh should be demarcated by BoRR and EoRR packets.
370  *
371  * These states (stored in p->feed_state) are used to keep track of these
372  * requirements. When such feed is started, BFS_LOADING / BFS_REFRESHING is
373  * set. When it ended, BFS_LOADED / BFS_REFRESHED is set to schedule End-of-RIB
374  * or EoRR packet. When the packet is sent, the state returned to BFS_NONE.
375  *
376  * Note that when a non-demarcated feed (e.g. plain RFC 4271 initial load
377  * without End-of-RIB or plain RFC 2918 route refresh without BoRR/EoRR
378  * demarcation) is active, BFS_NONE is set.
379  *
380  * BFS_NONE, BFS_LOADING and BFS_REFRESHING are also used as load states (RX)
381  * with correspondent semantics (-, expecting End-of-RIB, expecting EoRR).
382  */
383 
384 #define BFS_NONE		0	/* No feed or original non-demarcated feed */
385 #define BFS_LOADING		1	/* Initial feed active, End-of-RIB planned */
386 #define BFS_LOADED		2	/* Loading done, End-of-RIB marker scheduled */
387 #define BFS_REFRESHING		3	/* Route refresh (introduced by BoRR) active */
388 #define BFS_REFRESHED		4	/* Refresh done, EoRR packet scheduled */
389 
390 
391 /* Error classes */
392 
393 #define BE_NONE			0
394 #define BE_MISC			1	/* Miscellaneous error */
395 #define BE_SOCKET		2	/* Socket error */
396 #define BE_BGP_RX		3	/* BGP protocol error notification received */
397 #define BE_BGP_TX		4	/* BGP protocol error notification sent */
398 #define BE_AUTO_DOWN		5	/* Automatic shutdown */
399 #define BE_MAN_DOWN		6	/* Manual shutdown */
400 
401 /* Misc error codes */
402 
403 #define BEM_NEIGHBOR_LOST	1
404 #define BEM_INVALID_NEXT_HOP	2
405 #define BEM_INVALID_MD5		3	/* MD5 authentication kernel request failed (possibly not supported) */
406 #define BEM_NO_SOCKET		4
407 #define BEM_LINK_DOWN		5
408 #define BEM_BFD_DOWN		6
409 #define BEM_GRACEFUL_RESTART	7
410 
411 /* Automatic shutdown error codes */
412 
413 #define BEA_ROUTE_LIMIT_EXCEEDED 1
414 
415 /* Well-known communities */
416 
417 #define BGP_COMM_NO_EXPORT		0xffffff01	/* Don't export outside local AS / confed. */
418 #define BGP_COMM_NO_ADVERTISE		0xffffff02	/* Don't export at all */
419 #define BGP_COMM_NO_EXPORT_SUBCONFED	0xffffff03	/* NO_EXPORT even in local confederation */
420 
421 #define BGP_COMM_LLGR_STALE		0xffff0006	/* Route is stale according to LLGR */
422 #define BGP_COMM_NO_LLGR		0xffff0007	/* Do not treat the route according to LLGR */
423 
424 /* Origins */
425 
426 #define ORIGIN_IGP		0
427 #define ORIGIN_EGP		1
428 #define ORIGIN_INCOMPLETE	2
429 
430 /* Address families */
431 
432 #define BGP_AF_IPV4		1
433 #define BGP_AF_IPV6		2
434 
435 #ifdef IPV6
436 #define BGP_AF BGP_AF_IPV6
437 #else
438 #define BGP_AF BGP_AF_IPV4
439 #endif
440 
441 #endif
442