1 /*
2 * BIRD -- The Border Gateway Protocol
3 *
4 * (c) 2000 Martin Mares <mj@ucw.cz>
5 *
6 * Can be freely distributed and used under the terms of the GNU GPL.
7 */
8
9 /**
10 * DOC: Border Gateway Protocol
11 *
12 * The BGP protocol is implemented in three parts: |bgp.c| which takes care of the
13 * connection and most of the interface with BIRD core, |packets.c| handling
14 * both incoming and outgoing BGP packets and |attrs.c| containing functions for
15 * manipulation with BGP attribute lists.
16 *
17 * As opposed to the other existing routing daemons, BIRD has a sophisticated core
18 * architecture which is able to keep all the information needed by BGP in the
19 * primary routing table, therefore no complex data structures like a central
20 * BGP table are needed. This increases memory footprint of a BGP router with
21 * many connections, but not too much and, which is more important, it makes
22 * BGP much easier to implement.
23 *
24 * Each instance of BGP (corresponding to a single BGP peer) is described by a &bgp_proto
25 * structure to which are attached individual connections represented by &bgp_connection
26 * (usually, there exists only one connection, but during BGP session setup, there
27 * can be more of them). The connections are handled according to the BGP state machine
28 * defined in the RFC with all the timers and all the parameters configurable.
29 *
30 * In incoming direction, we listen on the connection's socket and each time we receive
31 * some input, we pass it to bgp_rx(). It decodes packet headers and the markers and
32 * passes complete packets to bgp_rx_packet() which distributes the packet according
33 * to its type.
34 *
35 * In outgoing direction, we gather all the routing updates and sort them to buckets
36 * (&bgp_bucket) according to their attributes (we keep a hash table for fast comparison
37 * of &rta's and a &fib which helps us to find if we already have another route for
38 * the same destination queued for sending, so that we can replace it with the new one
39 * immediately instead of sending both updates). There also exists a special bucket holding
40 * all the route withdrawals which cannot be queued anywhere else as they don't have any
41 * attributes. If we have any packet to send (due to either new routes or the connection
42 * tracking code wanting to send a Open, Keepalive or Notification message), we call
43 * bgp_schedule_packet() which sets the corresponding bit in a @packet_to_send
44 * bit field in &bgp_conn and as soon as the transmit socket buffer becomes empty,
45 * we call bgp_fire_tx(). It inspects state of all the packet type bits and calls
46 * the corresponding bgp_create_xx() functions, eventually rescheduling the same packet
47 * type if we have more data of the same type to send.
48 *
49 * The processing of attributes consists of two functions: bgp_decode_attrs() for checking
50 * of the attribute blocks and translating them to the language of BIRD's extended attributes
51 * and bgp_encode_attrs() which does the converse. Both functions are built around a
52 * @bgp_attr_table array describing all important characteristics of all known attributes.
53 * Unknown transitive attributes are attached to the route as %EAF_TYPE_OPAQUE byte streams.
54 *
55 * BGP protocol implements graceful restart in both restarting (local restart)
56 * and receiving (neighbor restart) roles. The first is handled mostly by the
57 * graceful restart code in the nest, BGP protocol just handles capabilities,
58 * sets @gr_wait and locks graceful restart until end-of-RIB mark is received.
59 * The second is implemented by internal restart of the BGP state to %BS_IDLE
60 * and protocol state to %PS_START, but keeping the protocol up from the core
61 * point of view and therefore maintaining received routes. Routing table
62 * refresh cycle (rt_refresh_begin(), rt_refresh_end()) is used for removing
63 * stale routes after reestablishment of BGP session during graceful restart.
64 */
65
66 #undef LOCAL_DEBUG
67
68 #include "nest/bird.h"
69 #include "nest/iface.h"
70 #include "nest/protocol.h"
71 #include "nest/route.h"
72 #include "nest/cli.h"
73 #include "nest/locks.h"
74 #include "conf/conf.h"
75 #include "lib/socket.h"
76 #include "lib/resource.h"
77 #include "lib/string.h"
78
79 #include "bgp.h"
80
81
82 struct linpool *bgp_linpool; /* Global temporary pool */
83 static sock *bgp_listen_sk; /* Global listening socket */
84 static int bgp_counter; /* Number of protocol instances using the listening socket */
85
86 static void bgp_close(struct bgp_proto *p, int apply_md5);
87 static void bgp_connect(struct bgp_proto *p);
88 static void bgp_active(struct bgp_proto *p);
89 static sock *bgp_setup_listen_sk(ip_addr addr, unsigned port, u32 flags);
90 static void bgp_update_bfd(struct bgp_proto *p, int use_bfd);
91
92
93 /**
94 * bgp_open - open a BGP instance
95 * @p: BGP instance
96 *
97 * This function allocates and configures shared BGP resources.
98 * Should be called as the last step during initialization
99 * (when lock is acquired and neighbor is ready).
100 * When error, state changed to PS_DOWN, -1 is returned and caller
101 * should return immediately.
102 */
103 static int
bgp_open(struct bgp_proto * p)104 bgp_open(struct bgp_proto *p)
105 {
106 struct config *cfg = p->cf->c.global;
107 int errcode;
108
109 if (!bgp_listen_sk)
110 bgp_listen_sk = bgp_setup_listen_sk(cfg->listen_bgp_addr, cfg->listen_bgp_port, cfg->listen_bgp_flags);
111
112 if (!bgp_listen_sk)
113 {
114 errcode = BEM_NO_SOCKET;
115 goto err;
116 }
117
118 if (!bgp_linpool)
119 bgp_linpool = lp_new(&root_pool, 4080);
120
121 bgp_counter++;
122
123 if (p->cf->password)
124 if (sk_set_md5_auth(bgp_listen_sk, p->cf->source_addr, p->cf->remote_ip,
125 p->cf->iface, p->cf->password, p->cf->setkey) < 0)
126 {
127 sk_log_error(bgp_listen_sk, p->p.name);
128 bgp_close(p, 0);
129 errcode = BEM_INVALID_MD5;
130 goto err;
131 }
132
133 return 0;
134
135 err:
136 p->p.disabled = 1;
137 bgp_store_error(p, NULL, BE_MISC, errcode);
138 proto_notify_state(&p->p, PS_DOWN);
139 return -1;
140 }
141
142 static void
bgp_startup(struct bgp_proto * p)143 bgp_startup(struct bgp_proto *p)
144 {
145 BGP_TRACE(D_EVENTS, "Started");
146 p->start_state = p->cf->capabilities ? BSS_CONNECT : BSS_CONNECT_NOCAP;
147
148 if (!p->cf->passive)
149 bgp_active(p);
150 }
151
152 static void
bgp_startup_timeout(timer * t)153 bgp_startup_timeout(timer *t)
154 {
155 bgp_startup(t->data);
156 }
157
158
159 static void
bgp_initiate(struct bgp_proto * p)160 bgp_initiate(struct bgp_proto *p)
161 {
162 int rv = bgp_open(p);
163 if (rv < 0)
164 return;
165
166 if (p->cf->bfd)
167 bgp_update_bfd(p, p->cf->bfd);
168
169 if (p->startup_delay)
170 {
171 p->start_state = BSS_DELAY;
172 BGP_TRACE(D_EVENTS, "Startup delayed by %d seconds due to errors", p->startup_delay);
173 bgp_start_timer(p->startup_timer, p->startup_delay);
174 }
175 else
176 bgp_startup(p);
177 }
178
179 /**
180 * bgp_close - close a BGP instance
181 * @p: BGP instance
182 * @apply_md5: 0 to disable unsetting MD5 auth
183 *
184 * This function frees and deconfigures shared BGP resources.
185 * @apply_md5 is set to 0 when bgp_close is called as a cleanup
186 * from failed bgp_open().
187 */
188 static void
bgp_close(struct bgp_proto * p,int apply_md5)189 bgp_close(struct bgp_proto *p, int apply_md5)
190 {
191 ASSERT(bgp_counter);
192 bgp_counter--;
193
194 if (p->cf->password && apply_md5)
195 if (sk_set_md5_auth(bgp_listen_sk, p->cf->source_addr, p->cf->remote_ip,
196 p->cf->iface, NULL, p->cf->setkey) < 0)
197 sk_log_error(bgp_listen_sk, p->p.name);
198
199 if (!bgp_counter)
200 {
201 rfree(bgp_listen_sk);
202 bgp_listen_sk = NULL;
203 rfree(bgp_linpool);
204 bgp_linpool = NULL;
205 }
206 }
207
208 /**
209 * bgp_start_timer - start a BGP timer
210 * @t: timer
211 * @value: time to fire (0 to disable the timer)
212 *
213 * This functions calls tm_start() on @t with time @value and the
214 * amount of randomization suggested by the BGP standard. Please use
215 * it for all BGP timers.
216 */
217 void
bgp_start_timer(timer * t,int value)218 bgp_start_timer(timer *t, int value)
219 {
220 if (value)
221 {
222 /* The randomization procedure is specified in RFC 1771: 9.2.3.3 */
223 t->randomize = value / 4;
224 tm_start(t, value - t->randomize);
225 }
226 else
227 tm_stop(t);
228 }
229
230 /**
231 * bgp_close_conn - close a BGP connection
232 * @conn: connection to close
233 *
234 * This function takes a connection described by the &bgp_conn structure,
235 * closes its socket and frees all resources associated with it.
236 */
237 void
bgp_close_conn(struct bgp_conn * conn)238 bgp_close_conn(struct bgp_conn *conn)
239 {
240 // struct bgp_proto *p = conn->bgp;
241
242 DBG("BGP: Closing connection\n");
243 conn->packets_to_send = 0;
244 rfree(conn->connect_retry_timer);
245 conn->connect_retry_timer = NULL;
246 rfree(conn->keepalive_timer);
247 conn->keepalive_timer = NULL;
248 rfree(conn->hold_timer);
249 conn->hold_timer = NULL;
250 rfree(conn->sk);
251 conn->sk = NULL;
252 rfree(conn->tx_ev);
253 conn->tx_ev = NULL;
254 }
255
256
257 /**
258 * bgp_update_startup_delay - update a startup delay
259 * @p: BGP instance
260 *
261 * This function updates a startup delay that is used to postpone next BGP connect.
262 * It also handles disable_after_error and might stop BGP instance when error
263 * happened and disable_after_error is on.
264 *
265 * It should be called when BGP protocol error happened.
266 */
267 void
bgp_update_startup_delay(struct bgp_proto * p)268 bgp_update_startup_delay(struct bgp_proto *p)
269 {
270 struct bgp_config *cf = p->cf;
271
272 DBG("BGP: Updating startup delay\n");
273
274 if (p->last_proto_error && ((now - p->last_proto_error) >= (int) cf->error_amnesia_time))
275 p->startup_delay = 0;
276
277 p->last_proto_error = now;
278
279 if (cf->disable_after_error)
280 {
281 p->startup_delay = 0;
282 p->p.disabled = 1;
283 return;
284 }
285
286 if (!p->startup_delay)
287 p->startup_delay = cf->error_delay_time_min;
288 else
289 p->startup_delay = MIN(2 * p->startup_delay, cf->error_delay_time_max);
290 }
291
292 static void
bgp_graceful_close_conn(struct bgp_conn * conn,uint subcode,byte * data,uint len)293 bgp_graceful_close_conn(struct bgp_conn *conn, uint subcode, byte *data, uint len)
294 {
295 switch (conn->state)
296 {
297 case BS_IDLE:
298 case BS_CLOSE:
299 return;
300 case BS_CONNECT:
301 case BS_ACTIVE:
302 bgp_conn_enter_idle_state(conn);
303 return;
304 case BS_OPENSENT:
305 case BS_OPENCONFIRM:
306 case BS_ESTABLISHED:
307 bgp_error(conn, 6, subcode, data, len);
308 return;
309 default:
310 bug("bgp_graceful_close_conn: Unknown state %d", conn->state);
311 }
312 }
313
314 static void
bgp_down(struct bgp_proto * p)315 bgp_down(struct bgp_proto *p)
316 {
317 if (p->start_state > BSS_PREPARE)
318 bgp_close(p, 1);
319
320 BGP_TRACE(D_EVENTS, "Down");
321 proto_notify_state(&p->p, PS_DOWN);
322 }
323
324 static void
bgp_decision(void * vp)325 bgp_decision(void *vp)
326 {
327 struct bgp_proto *p = vp;
328
329 DBG("BGP: Decision start\n");
330 if ((p->p.proto_state == PS_START)
331 && (p->outgoing_conn.state == BS_IDLE)
332 && (p->incoming_conn.state != BS_OPENCONFIRM)
333 && (!p->cf->passive))
334 bgp_active(p);
335
336 if ((p->p.proto_state == PS_STOP)
337 && (p->outgoing_conn.state == BS_IDLE)
338 && (p->incoming_conn.state == BS_IDLE))
339 bgp_down(p);
340 }
341
342 void
bgp_stop(struct bgp_proto * p,uint subcode,byte * data,uint len)343 bgp_stop(struct bgp_proto *p, uint subcode, byte *data, uint len)
344 {
345 proto_notify_state(&p->p, PS_STOP);
346 bgp_graceful_close_conn(&p->outgoing_conn, subcode, data, len);
347 bgp_graceful_close_conn(&p->incoming_conn, subcode, data, len);
348 ev_schedule(p->event);
349 }
350
351 static inline void
bgp_conn_set_state(struct bgp_conn * conn,unsigned new_state)352 bgp_conn_set_state(struct bgp_conn *conn, unsigned new_state)
353 {
354 if (conn->bgp->p.mrtdump & MD_STATES)
355 bgp_dump_state_change(conn, conn->state, new_state);
356
357 conn->state = new_state;
358 }
359
360 void
bgp_conn_enter_openconfirm_state(struct bgp_conn * conn)361 bgp_conn_enter_openconfirm_state(struct bgp_conn *conn)
362 {
363 /* Really, most of the work is done in bgp_rx_open(). */
364 bgp_conn_set_state(conn, BS_OPENCONFIRM);
365 }
366
367 void
bgp_conn_enter_established_state(struct bgp_conn * conn)368 bgp_conn_enter_established_state(struct bgp_conn *conn)
369 {
370 struct bgp_proto *p = conn->bgp;
371
372 BGP_TRACE(D_EVENTS, "BGP session established");
373 DBG("BGP: UP!!!\n");
374
375 /* For multi-hop BGP sessions */
376 if (ipa_zero(p->source_addr))
377 p->source_addr = conn->sk->saddr;
378
379 conn->sk->fast_rx = 0;
380
381 p->conn = conn;
382 p->last_error_class = 0;
383 p->last_error_code = 0;
384 p->feed_state = BFS_NONE;
385 p->load_state = BFS_NONE;
386 bgp_init_bucket_table(p);
387 bgp_init_prefix_table(p, 8);
388
389 int peer_gr_ready = conn->peer_gr_aware && !(conn->peer_gr_flags & BGP_GRF_RESTART);
390
391 if (p->p.gr_recovery && !peer_gr_ready)
392 proto_graceful_restart_unlock(&p->p);
393
394 if (p->p.gr_recovery && (p->cf->gr_mode == BGP_GR_ABLE) && peer_gr_ready)
395 p->p.gr_wait = 1;
396
397 if (p->gr_active == BGP_GRS_ACTIVE)
398 tm_stop(p->gr_timer);
399
400 /* Check F-bit for regular graceful restart */
401 if ((p->gr_active == BGP_GRS_ACTIVE) &&
402 (!conn->peer_gr_able || !(conn->peer_gr_aflags & BGP_GRF_FORWARDING)))
403 bgp_graceful_restart_done(p);
404
405 /* Check F-bit for long-lived graceful restart */
406 if (((p->gr_active == BGP_GRS_LLGR_1) || (p->gr_active == BGP_GRS_LLGR_2)) &&
407 (!conn->peer_llgr_able || !(conn->peer_llgr_aflags & BGP_LLGRF_FORWARDING)))
408 bgp_graceful_restart_done(p);
409
410 /* GR capability implies that neighbor will send End-of-RIB */
411 if (conn->peer_gr_aware)
412 p->load_state = BFS_LOADING;
413
414 /* proto_notify_state() will likely call bgp_feed_begin(), setting p->feed_state */
415
416 bgp_conn_set_state(conn, BS_ESTABLISHED);
417 proto_notify_state(&p->p, PS_UP);
418 }
419
420 static void
bgp_conn_leave_established_state(struct bgp_proto * p)421 bgp_conn_leave_established_state(struct bgp_proto *p)
422 {
423 BGP_TRACE(D_EVENTS, "BGP session closed");
424 p->conn = NULL;
425
426 bgp_free_prefix_table(p);
427 bgp_free_bucket_table(p);
428
429 if (p->p.proto_state == PS_UP)
430 bgp_stop(p, 0, NULL, 0);
431 }
432
433 void
bgp_conn_enter_close_state(struct bgp_conn * conn)434 bgp_conn_enter_close_state(struct bgp_conn *conn)
435 {
436 struct bgp_proto *p = conn->bgp;
437 int os = conn->state;
438
439 bgp_conn_set_state(conn, BS_CLOSE);
440 tm_stop(conn->keepalive_timer);
441 conn->sk->rx_hook = NULL;
442
443 /* Timeout for CLOSE state, if we cannot send notification soon then we just hangup */
444 bgp_start_timer(conn->hold_timer, 10);
445
446 if (os == BS_ESTABLISHED)
447 bgp_conn_leave_established_state(p);
448 }
449
450 void
bgp_conn_enter_idle_state(struct bgp_conn * conn)451 bgp_conn_enter_idle_state(struct bgp_conn *conn)
452 {
453 struct bgp_proto *p = conn->bgp;
454 int os = conn->state;
455
456 bgp_close_conn(conn);
457 bgp_conn_set_state(conn, BS_IDLE);
458 ev_schedule(p->event);
459
460 if (os == BS_ESTABLISHED)
461 bgp_conn_leave_established_state(p);
462 }
463
464 /**
465 * bgp_handle_graceful_restart - handle detected BGP graceful restart
466 * @p: BGP instance
467 *
468 * This function is called when a BGP graceful restart of the neighbor is
469 * detected (when the TCP connection fails or when a new TCP connection
470 * appears). The function activates processing of the restart - starts routing
471 * table refresh cycle and activates BGP restart timer. The protocol state goes
472 * back to %PS_START, but changing BGP state back to %BS_IDLE is left for the
473 * caller.
474 */
475 void
bgp_handle_graceful_restart(struct bgp_proto * p)476 bgp_handle_graceful_restart(struct bgp_proto *p)
477 {
478 ASSERT(p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready);
479
480 BGP_TRACE(D_EVENTS, "Neighbor graceful restart detected%s",
481 p->gr_active ? " - already pending" : "");
482 proto_notify_state(&p->p, PS_START);
483
484 switch (p->gr_active)
485 {
486 case BGP_GRS_ACTIVE:
487 rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
488 break;
489
490 case BGP_GRS_LLGR_1:
491 rt_refresh_begin(p->p.main_ahook->table, p->p.main_ahook);
492 return;
493
494 case BGP_GRS_LLGR_2:
495 rt_refresh_begin(p->p.main_ahook->table, p->p.main_ahook);
496 rt_modify_stale(p->p.main_ahook->table, p->p.main_ahook);
497 return;
498 }
499
500 p->stale_time = p->cf->llgr_mode ? p->conn->peer_llgr_time : 0;
501 p->gr_active = !p->stale_time ? BGP_GRS_ACTIVE : BGP_GRS_LLGR_1;
502 tm_start(p->gr_timer, p->conn->peer_gr_time);
503 rt_refresh_begin(p->p.main_ahook->table, p->p.main_ahook);
504 }
505
506 /**
507 * bgp_graceful_restart_done - finish active BGP graceful restart
508 * @p: BGP instance
509 *
510 * This function is called when the active BGP graceful restart of the neighbor
511 * should be finished - either successfully (the neighbor sends all paths and
512 * reports end-of-RIB on the new session) or unsuccessfully (the neighbor does
513 * not support BGP graceful restart on the new session). The function ends
514 * routing table refresh cycle and stops BGP restart timer.
515 */
516 void
bgp_graceful_restart_done(struct bgp_proto * p)517 bgp_graceful_restart_done(struct bgp_proto *p)
518 {
519 BGP_TRACE(D_EVENTS, "Neighbor graceful restart done");
520 p->gr_active = 0;
521 tm_stop(p->gr_timer);
522 rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
523 }
524
525 /**
526 * bgp_graceful_restart_timeout - timeout of graceful restart 'restart timer'
527 * @t: timer
528 *
529 * This function is a timeout hook for @gr_timer, implementing BGP restart time
530 * limit for reestablisment of the BGP session after the graceful restart. When
531 * fired, we just proceed with the usual protocol restart.
532 */
533
534 static void
bgp_graceful_restart_timeout(timer * t)535 bgp_graceful_restart_timeout(timer *t)
536 {
537 struct bgp_proto *p = t->data;
538
539 switch (p->gr_active)
540 {
541 case BGP_GRS_ACTIVE:
542 BGP_TRACE(D_EVENTS, "Neighbor graceful restart timeout");
543 bgp_stop(p, 0, NULL, 0);
544 return;
545
546 case BGP_GRS_LLGR_1:
547 BGP_TRACE(D_EVENTS, "Neighbor graceful restart timeout");
548 p->gr_active = BGP_GRS_LLGR_2;
549 tm_start(p->gr_timer, p->stale_time);
550 rt_modify_stale(p->p.main_ahook->table, p->p.main_ahook);
551 return;
552
553 case BGP_GRS_LLGR_2:
554 BGP_TRACE(D_EVENTS, "Long-lived graceful restart timeout");
555 p->gr_active = 0;
556 rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
557 return;
558 }
559 }
560
561 /**
562 * bgp_refresh_begin - start incoming enhanced route refresh sequence
563 * @p: BGP instance
564 *
565 * This function is called when an incoming enhanced route refresh sequence is
566 * started by the neighbor, demarcated by the BoRR packet. The function updates
567 * the load state and starts the routing table refresh cycle. Note that graceful
568 * restart also uses routing table refresh cycle, but RFC 7313 and load states
569 * ensure that these two sequences do not overlap.
570 */
571 void
bgp_refresh_begin(struct bgp_proto * p)572 bgp_refresh_begin(struct bgp_proto *p)
573 {
574 if (p->load_state == BFS_LOADING)
575 { log(L_WARN "%s: BEGIN-OF-RR received before END-OF-RIB, ignoring", p->p.name); return; }
576
577 p->load_state = BFS_REFRESHING;
578 rt_refresh_begin(p->p.main_ahook->table, p->p.main_ahook);
579 }
580
581 /**
582 * bgp_refresh_end - finish incoming enhanced route refresh sequence
583 * @p: BGP instance
584 *
585 * This function is called when an incoming enhanced route refresh sequence is
586 * finished by the neighbor, demarcated by the EoRR packet. The function updates
587 * the load state and ends the routing table refresh cycle. Routes not received
588 * during the sequence are removed by the nest.
589 */
590 void
bgp_refresh_end(struct bgp_proto * p)591 bgp_refresh_end(struct bgp_proto *p)
592 {
593 if (p->load_state != BFS_REFRESHING)
594 { log(L_WARN "%s: END-OF-RR received without prior BEGIN-OF-RR, ignoring", p->p.name); return; }
595
596 p->load_state = BFS_NONE;
597 rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
598 }
599
600
601 static void
bgp_send_open(struct bgp_conn * conn)602 bgp_send_open(struct bgp_conn *conn)
603 {
604 conn->start_state = conn->bgp->start_state;
605
606 // Default values, possibly changed by receiving capabilities.
607 conn->advertised_as = 0;
608 conn->peer_refresh_support = 0;
609 conn->peer_as4_support = 0;
610 conn->peer_add_path = 0;
611 conn->peer_enhanced_refresh_support = 0;
612 conn->peer_gr_aware = 0;
613 conn->peer_gr_able = 0;
614 conn->peer_gr_time = 0;
615 conn->peer_gr_flags = 0;
616 conn->peer_gr_aflags = 0;
617 conn->peer_llgr_aware = 0;
618 conn->peer_llgr_able = 0;
619 conn->peer_llgr_time = 0;
620 conn->peer_llgr_aflags = 0;
621 conn->peer_ext_messages_support = 0;
622
623 DBG("BGP: Sending open\n");
624 conn->sk->rx_hook = bgp_rx;
625 conn->sk->tx_hook = bgp_tx;
626 tm_stop(conn->connect_retry_timer);
627 bgp_schedule_packet(conn, PKT_OPEN);
628 bgp_conn_set_state(conn, BS_OPENSENT);
629 bgp_start_timer(conn->hold_timer, conn->bgp->cf->initial_hold_time);
630 }
631
632 static void
bgp_connected(sock * sk)633 bgp_connected(sock *sk)
634 {
635 struct bgp_conn *conn = sk->data;
636 struct bgp_proto *p = conn->bgp;
637
638 BGP_TRACE(D_EVENTS, "Connected");
639 bgp_send_open(conn);
640 }
641
642 static void
bgp_connect_timeout(timer * t)643 bgp_connect_timeout(timer *t)
644 {
645 struct bgp_conn *conn = t->data;
646 struct bgp_proto *p = conn->bgp;
647
648 DBG("BGP: connect_timeout\n");
649 if (p->p.proto_state == PS_START)
650 {
651 bgp_close_conn(conn);
652 bgp_connect(p);
653 }
654 else
655 bgp_conn_enter_idle_state(conn);
656 }
657
658 static void
bgp_sock_err(sock * sk,int err)659 bgp_sock_err(sock *sk, int err)
660 {
661 struct bgp_conn *conn = sk->data;
662 struct bgp_proto *p = conn->bgp;
663
664 /*
665 * This error hook may be called either asynchronously from main
666 * loop, or synchronously from sk_send(). But sk_send() is called
667 * only from bgp_tx() and bgp_kick_tx(), which are both called
668 * asynchronously from main loop. Moreover, they end if err hook is
669 * called. Therefore, we could suppose that it is always called
670 * asynchronously.
671 */
672
673 bgp_store_error(p, conn, BE_SOCKET, err);
674
675 if (err)
676 BGP_TRACE(D_EVENTS, "Connection lost (%M)", err);
677 else
678 BGP_TRACE(D_EVENTS, "Connection closed");
679
680 if ((conn->state == BS_ESTABLISHED) && p->gr_ready)
681 bgp_handle_graceful_restart(p);
682
683 bgp_conn_enter_idle_state(conn);
684 }
685
686 static void
bgp_hold_timeout(timer * t)687 bgp_hold_timeout(timer *t)
688 {
689 struct bgp_conn *conn = t->data;
690 struct bgp_proto *p = conn->bgp;
691
692 DBG("BGP: Hold timeout\n");
693
694 /* We are already closing the connection - just do hangup */
695 if (conn->state == BS_CLOSE)
696 {
697 BGP_TRACE(D_EVENTS, "Connection stalled");
698 bgp_conn_enter_idle_state(conn);
699 return;
700 }
701
702 /* If there is something in input queue, we are probably congested
703 and perhaps just not processed BGP packets in time. */
704
705 if (sk_rx_ready(conn->sk) > 0)
706 bgp_start_timer(conn->hold_timer, 10);
707 else if ((conn->state == BS_ESTABLISHED) && p->gr_ready && conn->peer_llgr_able)
708 {
709 BGP_TRACE(D_EVENTS, "Hold timer expired");
710 bgp_handle_graceful_restart(p);
711 bgp_conn_enter_idle_state(conn);
712 }
713 else
714 bgp_error(conn, 4, 0, NULL, 0);
715 }
716
717 static void
bgp_keepalive_timeout(timer * t)718 bgp_keepalive_timeout(timer *t)
719 {
720 struct bgp_conn *conn = t->data;
721
722 DBG("BGP: Keepalive timer\n");
723 bgp_schedule_packet(conn, PKT_KEEPALIVE);
724
725 /* Kick TX a bit faster */
726 if (ev_active(conn->tx_ev))
727 ev_run(conn->tx_ev);
728 }
729
730 static void
bgp_setup_conn(struct bgp_proto * p,struct bgp_conn * conn)731 bgp_setup_conn(struct bgp_proto *p, struct bgp_conn *conn)
732 {
733 timer *t;
734
735 conn->sk = NULL;
736 conn->bgp = p;
737 conn->packets_to_send = 0;
738
739 t = conn->connect_retry_timer = tm_new(p->p.pool);
740 t->hook = bgp_connect_timeout;
741 t->data = conn;
742 t = conn->hold_timer = tm_new(p->p.pool);
743 t->hook = bgp_hold_timeout;
744 t->data = conn;
745 t = conn->keepalive_timer = tm_new(p->p.pool);
746 t->hook = bgp_keepalive_timeout;
747 t->data = conn;
748 conn->tx_ev = ev_new(p->p.pool);
749 conn->tx_ev->hook = bgp_kick_tx;
750 conn->tx_ev->data = conn;
751 }
752
753 static void
bgp_setup_sk(struct bgp_conn * conn,sock * s)754 bgp_setup_sk(struct bgp_conn *conn, sock *s)
755 {
756 s->data = conn;
757 s->err_hook = bgp_sock_err;
758 s->fast_rx = 1;
759 conn->sk = s;
760 }
761
762 static void
bgp_active(struct bgp_proto * p)763 bgp_active(struct bgp_proto *p)
764 {
765 int delay = MAX(1, p->cf->connect_delay_time);
766 struct bgp_conn *conn = &p->outgoing_conn;
767
768 BGP_TRACE(D_EVENTS, "Connect delayed by %d seconds", delay);
769 bgp_setup_conn(p, conn);
770 bgp_conn_set_state(conn, BS_ACTIVE);
771 bgp_start_timer(conn->connect_retry_timer, delay);
772 }
773
774 /**
775 * bgp_connect - initiate an outgoing connection
776 * @p: BGP instance
777 *
778 * The bgp_connect() function creates a new &bgp_conn and initiates
779 * a TCP connection to the peer. The rest of connection setup is governed
780 * by the BGP state machine as described in the standard.
781 */
782 static void
bgp_connect(struct bgp_proto * p)783 bgp_connect(struct bgp_proto *p) /* Enter Connect state and start establishing connection */
784 {
785 sock *s;
786 struct bgp_conn *conn = &p->outgoing_conn;
787 int hops = p->cf->multihop ? : 1;
788
789 DBG("BGP: Connecting\n");
790 s = sk_new(p->p.pool);
791 s->type = SK_TCP_ACTIVE;
792 s->saddr = p->source_addr;
793 s->daddr = p->cf->remote_ip;
794 s->dport = p->cf->remote_port;
795 s->iface = p->neigh ? p->neigh->iface : NULL;
796 s->vrf = p->p.vrf;
797 s->ttl = p->cf->ttl_security ? 255 : hops;
798 s->rbsize = p->cf->enable_extended_messages ? BGP_RX_BUFFER_EXT_SIZE : BGP_RX_BUFFER_SIZE;
799 s->tbsize = p->cf->enable_extended_messages ? BGP_TX_BUFFER_EXT_SIZE : BGP_TX_BUFFER_SIZE;
800 s->tos = IP_PREC_INTERNET_CONTROL;
801 s->password = p->cf->password;
802 s->tx_hook = bgp_connected;
803 BGP_TRACE(D_EVENTS, "Connecting to %I%J from local address %I%J", s->daddr, p->cf->iface,
804 s->saddr, ipa_is_link_local(s->saddr) ? s->iface : NULL);
805 bgp_setup_conn(p, conn);
806 bgp_setup_sk(conn, s);
807 bgp_conn_set_state(conn, BS_CONNECT);
808
809 if (sk_open(s) < 0)
810 goto err;
811
812 /* Set minimal receive TTL if needed */
813 if (p->cf->ttl_security)
814 if (sk_set_min_ttl(s, 256 - hops) < 0)
815 goto err;
816
817 DBG("BGP: Waiting for connect success\n");
818 bgp_start_timer(conn->connect_retry_timer, p->cf->connect_retry_time);
819 return;
820
821 err:
822 sk_log_error(s, p->p.name);
823 bgp_sock_err(s, 0);
824 return;
825 }
826
827 /**
828 * bgp_find_proto - find existing proto for incoming connection
829 * @sk: TCP socket
830 *
831 */
832 static struct bgp_proto *
bgp_find_proto(sock * sk)833 bgp_find_proto(sock *sk)
834 {
835 struct proto_config *pc;
836
837 WALK_LIST(pc, config->protos)
838 if ((pc->protocol == &proto_bgp) && pc->proto)
839 {
840 struct bgp_proto *p = (struct bgp_proto *) pc->proto;
841 if (ipa_equal(p->cf->remote_ip, sk->daddr) &&
842 (!p->cf->iface || (p->cf->iface == sk->iface)))
843 return p;
844 }
845
846 return NULL;
847 }
848
849 /**
850 * bgp_incoming_connection - handle an incoming connection
851 * @sk: TCP socket
852 * @dummy: unused
853 *
854 * This function serves as a socket hook for accepting of new BGP
855 * connections. It searches a BGP instance corresponding to the peer
856 * which has connected and if such an instance exists, it creates a
857 * &bgp_conn structure, attaches it to the instance and either sends
858 * an Open message or (if there already is an active connection) it
859 * closes the new connection by sending a Notification message.
860 */
861 static int
bgp_incoming_connection(sock * sk,uint dummy UNUSED)862 bgp_incoming_connection(sock *sk, uint dummy UNUSED)
863 {
864 struct bgp_proto *p;
865 int acc, hops;
866
867 DBG("BGP: Incoming connection from %I port %d\n", sk->daddr, sk->dport);
868 p = bgp_find_proto(sk);
869 if (!p)
870 {
871 log(L_WARN "BGP: Unexpected connect from unknown address %I%J (port %d)",
872 sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL, sk->dport);
873 rfree(sk);
874 return 0;
875 }
876
877 /*
878 * BIRD should keep multiple incoming connections in OpenSent state (for
879 * details RFC 4271 8.2.1 par 3), but it keeps just one. Duplicate incoming
880 * connections are rejected istead. The exception is the case where an
881 * incoming connection triggers a graceful restart.
882 */
883
884 acc = (p->p.proto_state == PS_START || p->p.proto_state == PS_UP) &&
885 (p->start_state >= BSS_CONNECT) && (!p->incoming_conn.sk);
886
887 if (p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready)
888 {
889 bgp_store_error(p, NULL, BE_MISC, BEM_GRACEFUL_RESTART);
890 bgp_handle_graceful_restart(p);
891 bgp_conn_enter_idle_state(p->conn);
892 acc = 1;
893
894 /* There might be separate incoming connection in OpenSent state */
895 if (p->incoming_conn.state > BS_ACTIVE)
896 bgp_close_conn(&p->incoming_conn);
897 }
898
899 BGP_TRACE(D_EVENTS, "Incoming connection from %I%J (port %d) %s",
900 sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL,
901 sk->dport, acc ? "accepted" : "rejected");
902
903 if (!acc)
904 {
905 rfree(sk);
906 return 0;
907 }
908
909 hops = p->cf->multihop ? : 1;
910
911 if (sk_set_ttl(sk, p->cf->ttl_security ? 255 : hops) < 0)
912 goto err;
913
914 if (p->cf->ttl_security)
915 if (sk_set_min_ttl(sk, 256 - hops) < 0)
916 goto err;
917
918 if (p->cf->enable_extended_messages)
919 {
920 sk->rbsize = BGP_RX_BUFFER_EXT_SIZE;
921 sk->tbsize = BGP_TX_BUFFER_EXT_SIZE;
922 sk_reallocate(sk);
923 }
924
925 bgp_setup_conn(p, &p->incoming_conn);
926 bgp_setup_sk(&p->incoming_conn, sk);
927 bgp_send_open(&p->incoming_conn);
928 return 0;
929
930 err:
931 sk_log_error(sk, p->p.name);
932 log(L_ERR "%s: Incoming connection aborted", p->p.name);
933 rfree(sk);
934 return 0;
935 }
936
937 static void
bgp_listen_sock_err(sock * sk UNUSED,int err)938 bgp_listen_sock_err(sock *sk UNUSED, int err)
939 {
940 if (err == ECONNABORTED)
941 log(L_WARN "BGP: Incoming connection aborted");
942 else
943 log(L_ERR "BGP: Error on listening socket: %M", err);
944 }
945
946 static sock *
bgp_setup_listen_sk(ip_addr addr,unsigned port,u32 flags)947 bgp_setup_listen_sk(ip_addr addr, unsigned port, u32 flags)
948 {
949 sock *s = sk_new(&root_pool);
950 DBG("BGP: Creating listening socket\n");
951 s->type = SK_TCP_PASSIVE;
952 s->ttl = 255;
953 s->saddr = addr;
954 s->sport = port ? port : BGP_PORT;
955 s->flags = flags ? 0 : SKF_V6ONLY;
956 s->tos = IP_PREC_INTERNET_CONTROL;
957 s->rbsize = BGP_RX_BUFFER_SIZE;
958 s->tbsize = BGP_TX_BUFFER_SIZE;
959 s->rx_hook = bgp_incoming_connection;
960 s->err_hook = bgp_listen_sock_err;
961
962 if (sk_open(s) < 0)
963 goto err;
964
965 return s;
966
967 err:
968 sk_log_error(s, "BGP");
969 log(L_ERR "BGP: Cannot open listening socket");
970 rfree(s);
971 return NULL;
972 }
973
974 static void
bgp_start_neighbor(struct bgp_proto * p)975 bgp_start_neighbor(struct bgp_proto *p)
976 {
977 /* Called only for single-hop BGP sessions */
978
979 if (ipa_zero(p->source_addr))
980 p->source_addr = p->neigh->ifa->ip;
981
982 #ifdef IPV6
983 {
984 struct ifa *a;
985 p->local_link = IPA_NONE;
986 WALK_LIST(a, p->neigh->iface->addrs)
987 if (a->scope == SCOPE_LINK)
988 {
989 p->local_link = a->ip;
990 break;
991 }
992
993 if (! ipa_nonzero(p->local_link))
994 log(L_WARN "%s: Missing link local address on interface %s", p->p.name, p->neigh->iface->name);
995
996 DBG("BGP: Selected link-level address %I\n", p->local_link);
997 }
998 #endif
999
1000 bgp_initiate(p);
1001 }
1002
1003 static void
bgp_neigh_notify(neighbor * n)1004 bgp_neigh_notify(neighbor *n)
1005 {
1006 struct bgp_proto *p = (struct bgp_proto *) n->proto;
1007 int ps = p->p.proto_state;
1008
1009 if (n != p->neigh)
1010 return;
1011
1012 if ((ps == PS_DOWN) || (ps == PS_STOP))
1013 return;
1014
1015 int prepare = (ps == PS_START) && (p->start_state == BSS_PREPARE);
1016
1017 if (n->scope <= 0)
1018 {
1019 if (!prepare)
1020 {
1021 BGP_TRACE(D_EVENTS, "Neighbor lost");
1022 bgp_store_error(p, NULL, BE_MISC, BEM_NEIGHBOR_LOST);
1023 /* Perhaps also run bgp_update_startup_delay(p)? */
1024 bgp_stop(p, 0, NULL, 0);
1025 }
1026 }
1027 else if (p->cf->check_link && !(n->iface->flags & IF_LINK_UP))
1028 {
1029 if (!prepare)
1030 {
1031 BGP_TRACE(D_EVENTS, "Link down");
1032 bgp_store_error(p, NULL, BE_MISC, BEM_LINK_DOWN);
1033 if (ps == PS_UP)
1034 bgp_update_startup_delay(p);
1035 bgp_stop(p, 0, NULL, 0);
1036 }
1037 }
1038 else
1039 {
1040 if (prepare)
1041 {
1042 BGP_TRACE(D_EVENTS, "Neighbor ready");
1043 bgp_start_neighbor(p);
1044 }
1045 }
1046 }
1047
1048 static void
bgp_bfd_notify(struct bfd_request * req)1049 bgp_bfd_notify(struct bfd_request *req)
1050 {
1051 struct bgp_proto *p = req->data;
1052 int ps = p->p.proto_state;
1053
1054 if (req->down && ((ps == PS_START) || (ps == PS_UP)))
1055 {
1056 BGP_TRACE(D_EVENTS, "BFD session down");
1057 bgp_store_error(p, NULL, BE_MISC, BEM_BFD_DOWN);
1058
1059 if (p->cf->bfd == BGP_BFD_GRACEFUL)
1060 {
1061 /* Trigger graceful restart */
1062 if (p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready)
1063 bgp_handle_graceful_restart(p);
1064
1065 if (p->incoming_conn.state > BS_IDLE)
1066 bgp_conn_enter_idle_state(&p->incoming_conn);
1067
1068 if (p->outgoing_conn.state > BS_IDLE)
1069 bgp_conn_enter_idle_state(&p->outgoing_conn);
1070 }
1071 else
1072 {
1073 /* Trigger session down */
1074 if (ps == PS_UP)
1075 bgp_update_startup_delay(p);
1076 bgp_stop(p, 0, NULL, 0);
1077 }
1078 }
1079 }
1080
1081 static void
bgp_update_bfd(struct bgp_proto * p,int use_bfd)1082 bgp_update_bfd(struct bgp_proto *p, int use_bfd)
1083 {
1084 if (use_bfd && !p->bfd_req)
1085 p->bfd_req = bfd_request_session(p->p.pool, p->cf->remote_ip, p->source_addr,
1086 p->cf->multihop ? NULL : p->neigh->iface,
1087 p->p.vrf, bgp_bfd_notify, p);
1088
1089 if (!use_bfd && p->bfd_req)
1090 {
1091 rfree(p->bfd_req);
1092 p->bfd_req = NULL;
1093 }
1094 }
1095
1096 static int
bgp_reload_routes(struct proto * P)1097 bgp_reload_routes(struct proto *P)
1098 {
1099 struct bgp_proto *p = (struct bgp_proto *) P;
1100 if (!p->conn || !p->conn->peer_refresh_support)
1101 return 0;
1102
1103 bgp_schedule_packet(p->conn, PKT_ROUTE_REFRESH);
1104 return 1;
1105 }
1106
1107 static void
bgp_feed_begin(struct proto * P,int initial)1108 bgp_feed_begin(struct proto *P, int initial)
1109 {
1110 struct bgp_proto *p = (struct bgp_proto *) P;
1111
1112 /* This should not happen */
1113 if (!p->conn)
1114 return;
1115
1116 if (initial && p->cf->gr_mode)
1117 p->feed_state = BFS_LOADING;
1118
1119 /* It is refeed and both sides support enhanced route refresh */
1120 if (!initial && p->cf->enable_refresh &&
1121 p->conn->peer_enhanced_refresh_support)
1122 {
1123 /* BoRR must not be sent before End-of-RIB */
1124 if (p->feed_state == BFS_LOADING || p->feed_state == BFS_LOADED)
1125 return;
1126
1127 p->feed_state = BFS_REFRESHING;
1128 bgp_schedule_packet(p->conn, PKT_BEGIN_REFRESH);
1129 }
1130 }
1131
1132 static void
bgp_feed_end(struct proto * P)1133 bgp_feed_end(struct proto *P)
1134 {
1135 struct bgp_proto *p = (struct bgp_proto *) P;
1136
1137 /* This should not happen */
1138 if (!p->conn)
1139 return;
1140
1141 /* Non-demarcated feed ended, nothing to do */
1142 if (p->feed_state == BFS_NONE)
1143 return;
1144
1145 /* Schedule End-of-RIB packet */
1146 if (p->feed_state == BFS_LOADING)
1147 p->feed_state = BFS_LOADED;
1148
1149 /* Schedule EoRR packet */
1150 if (p->feed_state == BFS_REFRESHING)
1151 p->feed_state = BFS_REFRESHED;
1152
1153 /* Kick TX hook */
1154 bgp_schedule_packet(p->conn, PKT_UPDATE);
1155 }
1156
1157
1158 static void
bgp_start_locked(struct object_lock * lock)1159 bgp_start_locked(struct object_lock *lock)
1160 {
1161 struct bgp_proto *p = lock->data;
1162 struct bgp_config *cf = p->cf;
1163
1164 if (p->p.proto_state != PS_START)
1165 {
1166 DBG("BGP: Got lock in different state %d\n", p->p.proto_state);
1167 return;
1168 }
1169
1170 DBG("BGP: Got lock\n");
1171
1172 if (cf->multihop)
1173 {
1174 /* Multi-hop sessions do not use neighbor entries */
1175 bgp_initiate(p);
1176 return;
1177 }
1178
1179 neighbor *n = neigh_find2(&p->p, &cf->remote_ip, cf->iface, NEF_STICKY);
1180 if (!n)
1181 {
1182 log(L_ERR "%s: Invalid remote address %I%J", p->p.name, cf->remote_ip, cf->iface);
1183 /* As we do not start yet, we can just disable protocol */
1184 p->p.disabled = 1;
1185 bgp_store_error(p, NULL, BE_MISC, BEM_INVALID_NEXT_HOP);
1186 proto_notify_state(&p->p, PS_DOWN);
1187 return;
1188 }
1189
1190 p->neigh = n;
1191
1192 if (n->scope <= 0)
1193 BGP_TRACE(D_EVENTS, "Waiting for %I%J to become my neighbor", cf->remote_ip, cf->iface);
1194 else if (p->cf->check_link && !(n->iface->flags & IF_LINK_UP))
1195 BGP_TRACE(D_EVENTS, "Waiting for link on %s", n->iface->name);
1196 else
1197 bgp_start_neighbor(p);
1198 }
1199
1200 static int
bgp_start(struct proto * P)1201 bgp_start(struct proto *P)
1202 {
1203 struct bgp_proto *p = (struct bgp_proto *) P;
1204 struct object_lock *lock;
1205
1206 DBG("BGP: Startup.\n");
1207 p->start_state = BSS_PREPARE;
1208 p->outgoing_conn.state = BS_IDLE;
1209 p->incoming_conn.state = BS_IDLE;
1210 p->neigh = NULL;
1211 p->bfd_req = NULL;
1212 p->gr_ready = 0;
1213 p->gr_active = 0;
1214
1215 rt_lock_table(p->igp_table);
1216
1217 p->event = ev_new(p->p.pool);
1218 p->event->hook = bgp_decision;
1219 p->event->data = p;
1220
1221 p->startup_timer = tm_new(p->p.pool);
1222 p->startup_timer->hook = bgp_startup_timeout;
1223 p->startup_timer->data = p;
1224
1225 p->gr_timer = tm_new(p->p.pool);
1226 p->gr_timer->hook = bgp_graceful_restart_timeout;
1227 p->gr_timer->data = p;
1228
1229 p->local_id = proto_get_router_id(P->cf);
1230 if (p->rr_client)
1231 p->rr_cluster_id = p->cf->rr_cluster_id ? p->cf->rr_cluster_id : p->local_id;
1232
1233 p->remote_id = 0;
1234 p->source_addr = p->cf->source_addr;
1235
1236 if (p->p.gr_recovery && p->cf->gr_mode)
1237 proto_graceful_restart_lock(P);
1238
1239 /*
1240 * Before attempting to create the connection, we need to lock the
1241 * port, so that are sure we're the only instance attempting to talk
1242 * with that neighbor.
1243 */
1244
1245 lock = p->lock = olock_new(P->pool);
1246 lock->addr = p->cf->remote_ip;
1247 lock->port = p->cf->remote_port;
1248 lock->iface = p->cf->iface;
1249 lock->vrf = p->cf->iface ? NULL : p->p.vrf;
1250 lock->type = OBJLOCK_TCP;
1251 lock->hook = bgp_start_locked;
1252 lock->data = p;
1253 olock_acquire(lock);
1254
1255 return PS_START;
1256 }
1257
1258 extern int proto_restart;
1259
1260 static int
bgp_shutdown(struct proto * P)1261 bgp_shutdown(struct proto *P)
1262 {
1263 struct bgp_proto *p = (struct bgp_proto *) P;
1264 uint subcode = 0;
1265
1266 char *message = NULL;
1267 byte *data = NULL;
1268 uint len = 0;
1269
1270 BGP_TRACE(D_EVENTS, "Shutdown requested");
1271
1272 switch (P->down_code)
1273 {
1274 case PDC_CF_REMOVE:
1275 case PDC_CF_DISABLE:
1276 subcode = 3; // Errcode 6, 3 - peer de-configured
1277 break;
1278
1279 case PDC_CF_RESTART:
1280 subcode = 6; // Errcode 6, 6 - other configuration change
1281 break;
1282
1283 case PDC_CMD_DISABLE:
1284 case PDC_CMD_SHUTDOWN:
1285 subcode = 2; // Errcode 6, 2 - administrative shutdown
1286 message = P->message;
1287 break;
1288
1289 case PDC_CMD_RESTART:
1290 subcode = 4; // Errcode 6, 4 - administrative reset
1291 message = P->message;
1292 break;
1293
1294 case PDC_RX_LIMIT_HIT:
1295 case PDC_IN_LIMIT_HIT:
1296 subcode = 1; // Errcode 6, 1 - max number of prefixes reached
1297 /* log message for compatibility */
1298 log(L_WARN "%s: Route limit exceeded, shutting down", p->p.name);
1299 goto limit;
1300
1301 case PDC_OUT_LIMIT_HIT:
1302 subcode = proto_restart ? 4 : 2; // Administrative reset or shutdown
1303
1304 limit:
1305 bgp_store_error(p, NULL, BE_AUTO_DOWN, BEA_ROUTE_LIMIT_EXCEEDED);
1306 if (proto_restart)
1307 bgp_update_startup_delay(p);
1308 else
1309 p->startup_delay = 0;
1310 goto done;
1311 }
1312
1313 bgp_store_error(p, NULL, BE_MAN_DOWN, 0);
1314 p->startup_delay = 0;
1315
1316 /* RFC 8203 - shutdown communication */
1317 if (message)
1318 {
1319 uint msg_len = strlen(message);
1320 msg_len = MIN(msg_len, 255);
1321
1322 /* Buffer will be freed automatically by protocol shutdown */
1323 data = mb_alloc(p->p.pool, msg_len + 1);
1324 len = msg_len + 1;
1325
1326 data[0] = msg_len;
1327 memcpy(data+1, message, msg_len);
1328 }
1329
1330 done:
1331 bgp_stop(p, subcode, data, len);
1332 return p->p.proto_state;
1333 }
1334
1335 static void
bgp_cleanup(struct proto * P)1336 bgp_cleanup(struct proto *P)
1337 {
1338 struct bgp_proto *p = (struct bgp_proto *) P;
1339 rt_unlock_table(p->igp_table);
1340 }
1341
1342 static rtable *
get_igp_table(struct bgp_config * cf)1343 get_igp_table(struct bgp_config *cf)
1344 {
1345 return cf->igp_table ? cf->igp_table->table : cf->c.table->table;
1346 }
1347
1348 static struct proto *
bgp_init(struct proto_config * C)1349 bgp_init(struct proto_config *C)
1350 {
1351 struct proto *P = proto_new(C, sizeof(struct bgp_proto));
1352 struct bgp_config *c = (struct bgp_config *) C;
1353 struct bgp_proto *p = (struct bgp_proto *) P;
1354
1355 P->accept_ra_types = c->secondary ? RA_ACCEPTED : RA_OPTIMAL;
1356 P->rt_notify = bgp_rt_notify;
1357 P->import_control = bgp_import_control;
1358 P->neigh_notify = bgp_neigh_notify;
1359 P->reload_routes = bgp_reload_routes;
1360 P->feed_begin = bgp_feed_begin;
1361 P->feed_end = bgp_feed_end;
1362 P->rte_better = bgp_rte_better;
1363 P->rte_mergable = bgp_rte_mergable;
1364 P->rte_recalculate = c->deterministic_med ? bgp_rte_recalculate : NULL;
1365 P->rte_modify = bgp_rte_modify_stale;
1366
1367 p->cf = c;
1368 p->local_as = c->local_as;
1369 p->remote_as = c->remote_as;
1370 p->is_internal = (c->local_as == c->remote_as);
1371 p->rs_client = c->rs_client;
1372 p->rr_client = c->rr_client;
1373 p->igp_table = get_igp_table(c);
1374
1375 return P;
1376 }
1377
1378
1379 void
bgp_check_config(struct bgp_config * c)1380 bgp_check_config(struct bgp_config *c)
1381 {
1382 int internal = (c->local_as == c->remote_as);
1383
1384 /* Do not check templates at all */
1385 if (c->c.class == SYM_TEMPLATE)
1386 return;
1387
1388
1389 /* EBGP direct by default, IBGP multihop by default */
1390 if (c->multihop < 0)
1391 c->multihop = internal ? 64 : 0;
1392
1393 /* Different default for gw_mode */
1394 if (!c->gw_mode)
1395 c->gw_mode = c->multihop ? GW_RECURSIVE : GW_DIRECT;
1396
1397 /* Different default based on rs_client */
1398 if (!c->missing_lladdr)
1399 c->missing_lladdr = c->rs_client ? MLL_IGNORE : MLL_SELF;
1400
1401 /* LLGR mode default based on GR mode */
1402 if (c->llgr_mode < 0)
1403 c->llgr_mode = c->gr_mode ? BGP_LLGR_AWARE : 0;
1404
1405 /* Disable after error incompatible with restart limit action */
1406 if (c->c.in_limit && (c->c.in_limit->action == PLA_RESTART) && c->disable_after_error)
1407 c->c.in_limit->action = PLA_DISABLE;
1408
1409
1410 if (!c->local_as)
1411 cf_error("Local AS number must be set");
1412
1413 if (ipa_zero(c->remote_ip))
1414 cf_error("Neighbor must be configured");
1415
1416 if (!c->remote_as)
1417 cf_error("Remote AS number must be set");
1418
1419 if (ipa_is_link_local(c->remote_ip) && !c->iface)
1420 cf_error("Link-local neighbor address requires specified interface");
1421
1422 if (!(c->capabilities && c->enable_as4) && (c->remote_as > 0xFFFF))
1423 cf_error("Neighbor AS number out of range (AS4 not available)");
1424
1425 if (!internal && c->rr_client)
1426 cf_error("Only internal neighbor can be RR client");
1427
1428 if (internal && c->rs_client)
1429 cf_error("Only external neighbor can be RS client");
1430
1431 if (c->multihop && (c->gw_mode == GW_DIRECT))
1432 cf_error("Multihop BGP cannot use direct gateway mode");
1433
1434 if (c->multihop && (ipa_is_link_local(c->remote_ip) ||
1435 ipa_is_link_local(c->source_addr)))
1436 cf_error("Multihop BGP cannot be used with link-local addresses");
1437
1438 if (c->multihop && c->iface)
1439 cf_error("Multihop BGP cannot be bound to interface");
1440
1441 if (c->multihop && c->check_link)
1442 cf_error("Multihop BGP cannot depend on link state");
1443
1444 if (c->multihop && c->bfd && ipa_zero(c->source_addr))
1445 cf_error("Multihop BGP with BFD requires specified source address");
1446
1447 if ((c->gw_mode == GW_RECURSIVE) && c->c.table->sorted)
1448 cf_error("BGP in recursive mode prohibits sorted table");
1449
1450 if (c->deterministic_med && c->c.table->sorted)
1451 cf_error("BGP with deterministic MED prohibits sorted table");
1452
1453 if (c->secondary && !c->c.table->sorted)
1454 cf_error("BGP with secondary option requires sorted table");
1455
1456 if (!c->gr_mode && c->llgr_mode)
1457 cf_error("Long-lived graceful restart requires basic graceful restart");
1458 }
1459
1460 static int
bgp_reconfigure(struct proto * P,struct proto_config * C)1461 bgp_reconfigure(struct proto *P, struct proto_config *C)
1462 {
1463 struct bgp_config *new = (struct bgp_config *) C;
1464 struct bgp_proto *p = (struct bgp_proto *) P;
1465 struct bgp_config *old = p->cf;
1466
1467 if (proto_get_router_id(C) != p->local_id)
1468 return 0;
1469
1470 int same = !memcmp(((byte *) old) + sizeof(struct proto_config),
1471 ((byte *) new) + sizeof(struct proto_config),
1472 // password item is last and must be checked separately
1473 OFFSETOF(struct bgp_config, password) - sizeof(struct proto_config))
1474 && ((!old->password && !new->password)
1475 || (old->password && new->password && !strcmp(old->password, new->password)))
1476 && (get_igp_table(old) == get_igp_table(new));
1477
1478 if (same && (p->start_state > BSS_PREPARE))
1479 bgp_update_bfd(p, new->bfd);
1480
1481 /* We should update our copy of configuration ptr as old configuration will be freed */
1482 if (same)
1483 p->cf = new;
1484
1485 return same;
1486 }
1487
1488 static void
bgp_copy_config(struct proto_config * dest,struct proto_config * src)1489 bgp_copy_config(struct proto_config *dest, struct proto_config *src)
1490 {
1491 /* Just a shallow copy */
1492 proto_copy_rest(dest, src, sizeof(struct bgp_config));
1493 }
1494
1495
1496 /**
1497 * bgp_error - report a protocol error
1498 * @c: connection
1499 * @code: error code (according to the RFC)
1500 * @subcode: error sub-code
1501 * @data: data to be passed in the Notification message
1502 * @len: length of the data
1503 *
1504 * bgp_error() sends a notification packet to tell the other side that a protocol
1505 * error has occurred (including the data considered erroneous if possible) and
1506 * closes the connection.
1507 */
1508 void
bgp_error(struct bgp_conn * c,unsigned code,unsigned subcode,byte * data,int len)1509 bgp_error(struct bgp_conn *c, unsigned code, unsigned subcode, byte *data, int len)
1510 {
1511 struct bgp_proto *p = c->bgp;
1512
1513 if (c->state == BS_CLOSE)
1514 return;
1515
1516 bgp_log_error(p, BE_BGP_TX, "Error", code, subcode, data, (len > 0) ? len : -len);
1517 bgp_store_error(p, c, BE_BGP_TX, (code << 16) | subcode);
1518 bgp_conn_enter_close_state(c);
1519
1520 c->notify_code = code;
1521 c->notify_subcode = subcode;
1522 c->notify_data = data;
1523 c->notify_size = (len > 0) ? len : 0;
1524 bgp_schedule_packet(c, PKT_NOTIFICATION);
1525
1526 if (code != 6)
1527 {
1528 bgp_update_startup_delay(p);
1529 bgp_stop(p, 0, NULL, 0);
1530 }
1531 }
1532
1533 /**
1534 * bgp_store_error - store last error for status report
1535 * @p: BGP instance
1536 * @c: connection
1537 * @class: error class (BE_xxx constants)
1538 * @code: error code (class specific)
1539 *
1540 * bgp_store_error() decides whether given error is interesting enough
1541 * and store that error to last_error variables of @p
1542 */
1543 void
bgp_store_error(struct bgp_proto * p,struct bgp_conn * c,u8 class,u32 code)1544 bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code)
1545 {
1546 /* During PS_UP, we ignore errors on secondary connection */
1547 if ((p->p.proto_state == PS_UP) && c && (c != p->conn))
1548 return;
1549
1550 /* During PS_STOP, we ignore any errors, as we want to report
1551 * the error that caused transition to PS_STOP
1552 */
1553 if (p->p.proto_state == PS_STOP)
1554 return;
1555
1556 p->last_error_class = class;
1557 p->last_error_code = code;
1558 }
1559
1560 static char *bgp_state_names[] = { "Idle", "Connect", "Active", "OpenSent", "OpenConfirm", "Established", "Close" };
1561 static char *bgp_err_classes[] = { "", "Error: ", "Socket: ", "Received: ", "BGP Error: ", "Automatic shutdown: ", ""};
1562 static char *bgp_misc_errors[] = { "", "Neighbor lost", "Invalid next hop", "Kernel MD5 auth failed", "No listening socket", "Link down", "BFD session down", "Graceful restart"};
1563 static char *bgp_auto_errors[] = { "", "Route limit exceeded"};
1564
1565 static const char *
bgp_last_errmsg(struct bgp_proto * p)1566 bgp_last_errmsg(struct bgp_proto *p)
1567 {
1568 switch (p->last_error_class)
1569 {
1570 case BE_MISC:
1571 return bgp_misc_errors[p->last_error_code];
1572 case BE_SOCKET:
1573 return (p->last_error_code == 0) ? "Connection closed" : strerror(p->last_error_code);
1574 case BE_BGP_RX:
1575 case BE_BGP_TX:
1576 return bgp_error_dsc(p->last_error_code >> 16, p->last_error_code & 0xFF);
1577 case BE_AUTO_DOWN:
1578 return bgp_auto_errors[p->last_error_code];
1579 default:
1580 return "";
1581 }
1582 }
1583
1584 static const char *
bgp_state_dsc(struct bgp_proto * p)1585 bgp_state_dsc(struct bgp_proto *p)
1586 {
1587 if (p->p.proto_state == PS_DOWN)
1588 return "Down";
1589
1590 int state = MAX(p->incoming_conn.state, p->outgoing_conn.state);
1591 if ((state == BS_IDLE) && (p->start_state >= BSS_CONNECT) && p->cf->passive)
1592 return "Passive";
1593
1594 return bgp_state_names[state];
1595 }
1596
1597 static void
bgp_get_status(struct proto * P,byte * buf)1598 bgp_get_status(struct proto *P, byte *buf)
1599 {
1600 struct bgp_proto *p = (struct bgp_proto *) P;
1601
1602 const char *err1 = bgp_err_classes[p->last_error_class];
1603 const char *err2 = bgp_last_errmsg(p);
1604
1605 if (P->proto_state == PS_DOWN)
1606 bsprintf(buf, "%s%s", err1, err2);
1607 else
1608 bsprintf(buf, "%-14s%s%s", bgp_state_dsc(p), err1, err2);
1609 }
1610
1611 static void
bgp_show_proto_info(struct proto * P)1612 bgp_show_proto_info(struct proto *P)
1613 {
1614 struct bgp_proto *p = (struct bgp_proto *) P;
1615 struct bgp_conn *c = p->conn;
1616
1617 proto_show_basic_info(P);
1618
1619 cli_msg(-1006, " BGP state: %s", bgp_state_dsc(p));
1620 cli_msg(-1006, " Neighbor address: %I%J", p->cf->remote_ip, p->cf->iface);
1621 cli_msg(-1006, " Neighbor AS: %u", p->remote_as);
1622
1623 if (p->gr_active)
1624 cli_msg(-1006, " Neighbor graceful restart active");
1625
1626 if (p->gr_active && p->gr_timer->expires)
1627 cli_msg(-1006, " %-15s %d/-",
1628 (p->gr_active != BGP_GRS_LLGR_2) ? "Restart timer:" : "LL stale timer:",
1629 p->gr_timer->expires - now);
1630
1631 if (P->proto_state == PS_START)
1632 {
1633 struct bgp_conn *oc = &p->outgoing_conn;
1634
1635 if ((p->start_state < BSS_CONNECT) &&
1636 (p->startup_timer->expires))
1637 cli_msg(-1006, " Error wait: %d/%d",
1638 p->startup_timer->expires - now, p->startup_delay);
1639
1640 if ((oc->state == BS_ACTIVE) &&
1641 (oc->connect_retry_timer->expires))
1642 cli_msg(-1006, " Connect delay: %d/%d",
1643 oc->connect_retry_timer->expires - now, p->cf->connect_delay_time);
1644 }
1645 else if (P->proto_state == PS_UP)
1646 {
1647 cli_msg(-1006, " Neighbor ID: %R", p->remote_id);
1648 cli_msg(-1006, " Neighbor caps: %s%s%s%s%s%s%s",
1649 c->peer_refresh_support ? " refresh" : "",
1650 c->peer_enhanced_refresh_support ? " enhanced-refresh" : "",
1651 c->peer_gr_able ? " restart-able" : (c->peer_gr_aware ? " restart-aware" : ""),
1652 c->peer_llgr_able ? " llgr-able" : (c->peer_llgr_aware ? " llgr-aware" : ""),
1653 c->peer_as4_support ? " AS4" : "",
1654 (c->peer_add_path & ADD_PATH_RX) ? " add-path-rx" : "",
1655 (c->peer_add_path & ADD_PATH_TX) ? " add-path-tx" : "",
1656 c->peer_ext_messages_support ? " ext-messages" : "");
1657 cli_msg(-1006, " Session: %s%s%s%s%s%s%s%s",
1658 p->is_internal ? "internal" : "external",
1659 p->cf->multihop ? " multihop" : "",
1660 p->rr_client ? " route-reflector" : "",
1661 p->rs_client ? " route-server" : "",
1662 p->as4_session ? " AS4" : "",
1663 p->add_path_rx ? " add-path-rx" : "",
1664 p->add_path_tx ? " add-path-tx" : "",
1665 p->ext_messages ? " ext-messages" : "");
1666 cli_msg(-1006, " Source address: %I", p->source_addr);
1667 if (P->cf->in_limit)
1668 cli_msg(-1006, " Route limit: %d/%d",
1669 p->p.stats.imp_routes + p->p.stats.filt_routes, P->cf->in_limit->limit);
1670 cli_msg(-1006, " Hold timer: %d/%d",
1671 tm_remains(c->hold_timer), c->hold_time);
1672 cli_msg(-1006, " Keepalive timer: %d/%d",
1673 tm_remains(c->keepalive_timer), c->keepalive_time);
1674 }
1675
1676 if ((p->last_error_class != BE_NONE) &&
1677 (p->last_error_class != BE_MAN_DOWN))
1678 {
1679 const char *err1 = bgp_err_classes[p->last_error_class];
1680 const char *err2 = bgp_last_errmsg(p);
1681 cli_msg(-1006, " Last error: %s%s", err1, err2);
1682 }
1683 }
1684
1685 struct protocol proto_bgp = {
1686 .name = "BGP",
1687 .template = "bgp%d",
1688 .attr_class = EAP_BGP,
1689 .preference = DEF_PREF_BGP,
1690 .config_size = sizeof(struct bgp_config),
1691 .init = bgp_init,
1692 .start = bgp_start,
1693 .shutdown = bgp_shutdown,
1694 .cleanup = bgp_cleanup,
1695 .reconfigure = bgp_reconfigure,
1696 .copy_config = bgp_copy_config,
1697 .get_status = bgp_get_status,
1698 .get_attr = bgp_get_attr,
1699 .get_route_info = bgp_get_route_info,
1700 .show_proto_info = bgp_show_proto_info
1701 };
1702