1 /*
2  *	BIRD -- The Border Gateway Protocol
3  *
4  *	(c) 2000 Martin Mares <mj@ucw.cz>
5  *
6  *	Can be freely distributed and used under the terms of the GNU GPL.
7  */
8 
9 /**
10  * DOC: Border Gateway Protocol
11  *
12  * The BGP protocol is implemented in three parts: |bgp.c| which takes care of the
13  * connection and most of the interface with BIRD core, |packets.c| handling
14  * both incoming and outgoing BGP packets and |attrs.c| containing functions for
15  * manipulation with BGP attribute lists.
16  *
17  * As opposed to the other existing routing daemons, BIRD has a sophisticated core
18  * architecture which is able to keep all the information needed by BGP in the
19  * primary routing table, therefore no complex data structures like a central
20  * BGP table are needed. This increases memory footprint of a BGP router with
21  * many connections, but not too much and, which is more important, it makes
22  * BGP much easier to implement.
23  *
24  * Each instance of BGP (corresponding to a single BGP peer) is described by a &bgp_proto
25  * structure to which are attached individual connections represented by &bgp_connection
26  * (usually, there exists only one connection, but during BGP session setup, there
27  * can be more of them). The connections are handled according to the BGP state machine
28  * defined in the RFC with all the timers and all the parameters configurable.
29  *
30  * In incoming direction, we listen on the connection's socket and each time we receive
31  * some input, we pass it to bgp_rx(). It decodes packet headers and the markers and
32  * passes complete packets to bgp_rx_packet() which distributes the packet according
33  * to its type.
34  *
35  * In outgoing direction, we gather all the routing updates and sort them to buckets
36  * (&bgp_bucket) according to their attributes (we keep a hash table for fast comparison
37  * of &rta's and a &fib which helps us to find if we already have another route for
38  * the same destination queued for sending, so that we can replace it with the new one
39  * immediately instead of sending both updates). There also exists a special bucket holding
40  * all the route withdrawals which cannot be queued anywhere else as they don't have any
41  * attributes. If we have any packet to send (due to either new routes or the connection
42  * tracking code wanting to send a Open, Keepalive or Notification message), we call
43  * bgp_schedule_packet() which sets the corresponding bit in a @packet_to_send
44  * bit field in &bgp_conn and as soon as the transmit socket buffer becomes empty,
45  * we call bgp_fire_tx(). It inspects state of all the packet type bits and calls
46  * the corresponding bgp_create_xx() functions, eventually rescheduling the same packet
47  * type if we have more data of the same type to send.
48  *
49  * The processing of attributes consists of two functions: bgp_decode_attrs() for checking
50  * of the attribute blocks and translating them to the language of BIRD's extended attributes
51  * and bgp_encode_attrs() which does the converse. Both functions are built around a
52  * @bgp_attr_table array describing all important characteristics of all known attributes.
53  * Unknown transitive attributes are attached to the route as %EAF_TYPE_OPAQUE byte streams.
54  *
55  * BGP protocol implements graceful restart in both restarting (local restart)
56  * and receiving (neighbor restart) roles. The first is handled mostly by the
57  * graceful restart code in the nest, BGP protocol just handles capabilities,
58  * sets @gr_wait and locks graceful restart until end-of-RIB mark is received.
59  * The second is implemented by internal restart of the BGP state to %BS_IDLE
60  * and protocol state to %PS_START, but keeping the protocol up from the core
61  * point of view and therefore maintaining received routes. Routing table
62  * refresh cycle (rt_refresh_begin(), rt_refresh_end()) is used for removing
63  * stale routes after reestablishment of BGP session during graceful restart.
64  */
65 
66 #undef LOCAL_DEBUG
67 
68 #include "nest/bird.h"
69 #include "nest/iface.h"
70 #include "nest/protocol.h"
71 #include "nest/route.h"
72 #include "nest/cli.h"
73 #include "nest/locks.h"
74 #include "conf/conf.h"
75 #include "lib/socket.h"
76 #include "lib/resource.h"
77 #include "lib/string.h"
78 
79 #include "bgp.h"
80 
81 
82 struct linpool *bgp_linpool;		/* Global temporary pool */
83 static sock *bgp_listen_sk;		/* Global listening socket */
84 static int bgp_counter;			/* Number of protocol instances using the listening socket */
85 
86 static void bgp_close(struct bgp_proto *p, int apply_md5);
87 static void bgp_connect(struct bgp_proto *p);
88 static void bgp_active(struct bgp_proto *p);
89 static sock *bgp_setup_listen_sk(ip_addr addr, unsigned port, u32 flags);
90 static void bgp_update_bfd(struct bgp_proto *p, int use_bfd);
91 
92 
93 /**
94  * bgp_open - open a BGP instance
95  * @p: BGP instance
96  *
97  * This function allocates and configures shared BGP resources.
98  * Should be called as the last step during initialization
99  * (when lock is acquired and neighbor is ready).
100  * When error, state changed to PS_DOWN, -1 is returned and caller
101  * should return immediately.
102  */
103 static int
bgp_open(struct bgp_proto * p)104 bgp_open(struct bgp_proto *p)
105 {
106   struct config *cfg = p->cf->c.global;
107   int errcode;
108 
109   if (!bgp_listen_sk)
110     bgp_listen_sk = bgp_setup_listen_sk(cfg->listen_bgp_addr, cfg->listen_bgp_port, cfg->listen_bgp_flags);
111 
112   if (!bgp_listen_sk)
113     {
114       errcode = BEM_NO_SOCKET;
115       goto err;
116     }
117 
118   if (!bgp_linpool)
119     bgp_linpool = lp_new(&root_pool, 4080);
120 
121   bgp_counter++;
122 
123   if (p->cf->password)
124     if (sk_set_md5_auth(bgp_listen_sk, p->cf->source_addr, p->cf->remote_ip,
125 			p->cf->iface, p->cf->password, p->cf->setkey) < 0)
126       {
127 	sk_log_error(bgp_listen_sk, p->p.name);
128 	bgp_close(p, 0);
129 	errcode = BEM_INVALID_MD5;
130 	goto err;
131       }
132 
133   return 0;
134 
135 err:
136   p->p.disabled = 1;
137   bgp_store_error(p, NULL, BE_MISC, errcode);
138   proto_notify_state(&p->p, PS_DOWN);
139   return -1;
140 }
141 
142 static void
bgp_startup(struct bgp_proto * p)143 bgp_startup(struct bgp_proto *p)
144 {
145   BGP_TRACE(D_EVENTS, "Started");
146   p->start_state = p->cf->capabilities ? BSS_CONNECT : BSS_CONNECT_NOCAP;
147 
148   if (!p->cf->passive)
149     bgp_active(p);
150 }
151 
152 static void
bgp_startup_timeout(timer * t)153 bgp_startup_timeout(timer *t)
154 {
155   bgp_startup(t->data);
156 }
157 
158 
159 static void
bgp_initiate(struct bgp_proto * p)160 bgp_initiate(struct bgp_proto *p)
161 {
162   int rv = bgp_open(p);
163   if (rv < 0)
164     return;
165 
166   if (p->cf->bfd)
167     bgp_update_bfd(p, p->cf->bfd);
168 
169   if (p->startup_delay)
170     {
171       p->start_state = BSS_DELAY;
172       BGP_TRACE(D_EVENTS, "Startup delayed by %d seconds due to errors", p->startup_delay);
173       bgp_start_timer(p->startup_timer, p->startup_delay);
174     }
175   else
176     bgp_startup(p);
177 }
178 
179 /**
180  * bgp_close - close a BGP instance
181  * @p: BGP instance
182  * @apply_md5: 0 to disable unsetting MD5 auth
183  *
184  * This function frees and deconfigures shared BGP resources.
185  * @apply_md5 is set to 0 when bgp_close is called as a cleanup
186  * from failed bgp_open().
187  */
188 static void
bgp_close(struct bgp_proto * p,int apply_md5)189 bgp_close(struct bgp_proto *p, int apply_md5)
190 {
191   ASSERT(bgp_counter);
192   bgp_counter--;
193 
194   if (p->cf->password && apply_md5)
195     if (sk_set_md5_auth(bgp_listen_sk, p->cf->source_addr, p->cf->remote_ip,
196 			p->cf->iface, NULL, p->cf->setkey) < 0)
197       sk_log_error(bgp_listen_sk, p->p.name);
198 
199   if (!bgp_counter)
200     {
201       rfree(bgp_listen_sk);
202       bgp_listen_sk = NULL;
203       rfree(bgp_linpool);
204       bgp_linpool = NULL;
205     }
206 }
207 
208 /**
209  * bgp_start_timer - start a BGP timer
210  * @t: timer
211  * @value: time to fire (0 to disable the timer)
212  *
213  * This functions calls tm_start() on @t with time @value and the
214  * amount of randomization suggested by the BGP standard. Please use
215  * it for all BGP timers.
216  */
217 void
bgp_start_timer(timer * t,int value)218 bgp_start_timer(timer *t, int value)
219 {
220   if (value)
221     {
222       /* The randomization procedure is specified in RFC 1771: 9.2.3.3 */
223       t->randomize = value / 4;
224       tm_start(t, value - t->randomize);
225     }
226   else
227     tm_stop(t);
228 }
229 
230 /**
231  * bgp_close_conn - close a BGP connection
232  * @conn: connection to close
233  *
234  * This function takes a connection described by the &bgp_conn structure,
235  * closes its socket and frees all resources associated with it.
236  */
237 void
bgp_close_conn(struct bgp_conn * conn)238 bgp_close_conn(struct bgp_conn *conn)
239 {
240   // struct bgp_proto *p = conn->bgp;
241 
242   DBG("BGP: Closing connection\n");
243   conn->packets_to_send = 0;
244   rfree(conn->connect_retry_timer);
245   conn->connect_retry_timer = NULL;
246   rfree(conn->keepalive_timer);
247   conn->keepalive_timer = NULL;
248   rfree(conn->hold_timer);
249   conn->hold_timer = NULL;
250   rfree(conn->sk);
251   conn->sk = NULL;
252   rfree(conn->tx_ev);
253   conn->tx_ev = NULL;
254 }
255 
256 
257 /**
258  * bgp_update_startup_delay - update a startup delay
259  * @p: BGP instance
260  *
261  * This function updates a startup delay that is used to postpone next BGP connect.
262  * It also handles disable_after_error and might stop BGP instance when error
263  * happened and disable_after_error is on.
264  *
265  * It should be called when BGP protocol error happened.
266  */
267 void
bgp_update_startup_delay(struct bgp_proto * p)268 bgp_update_startup_delay(struct bgp_proto *p)
269 {
270   struct bgp_config *cf = p->cf;
271 
272   DBG("BGP: Updating startup delay\n");
273 
274   if (p->last_proto_error && ((now - p->last_proto_error) >= (int) cf->error_amnesia_time))
275     p->startup_delay = 0;
276 
277   p->last_proto_error = now;
278 
279   if (cf->disable_after_error)
280     {
281       p->startup_delay = 0;
282       p->p.disabled = 1;
283       return;
284     }
285 
286   if (!p->startup_delay)
287     p->startup_delay = cf->error_delay_time_min;
288   else
289     p->startup_delay = MIN(2 * p->startup_delay, cf->error_delay_time_max);
290 }
291 
292 static void
bgp_graceful_close_conn(struct bgp_conn * conn,uint subcode,byte * data,uint len)293 bgp_graceful_close_conn(struct bgp_conn *conn, uint subcode, byte *data, uint len)
294 {
295   switch (conn->state)
296     {
297     case BS_IDLE:
298     case BS_CLOSE:
299       return;
300     case BS_CONNECT:
301     case BS_ACTIVE:
302       bgp_conn_enter_idle_state(conn);
303       return;
304     case BS_OPENSENT:
305     case BS_OPENCONFIRM:
306     case BS_ESTABLISHED:
307       bgp_error(conn, 6, subcode, data, len);
308       return;
309     default:
310       bug("bgp_graceful_close_conn: Unknown state %d", conn->state);
311     }
312 }
313 
314 static void
bgp_down(struct bgp_proto * p)315 bgp_down(struct bgp_proto *p)
316 {
317   if (p->start_state > BSS_PREPARE)
318     bgp_close(p, 1);
319 
320   BGP_TRACE(D_EVENTS, "Down");
321   proto_notify_state(&p->p, PS_DOWN);
322 }
323 
324 static void
bgp_decision(void * vp)325 bgp_decision(void *vp)
326 {
327   struct bgp_proto *p = vp;
328 
329   DBG("BGP: Decision start\n");
330   if ((p->p.proto_state == PS_START)
331       && (p->outgoing_conn.state == BS_IDLE)
332       && (p->incoming_conn.state != BS_OPENCONFIRM)
333       && (!p->cf->passive))
334     bgp_active(p);
335 
336   if ((p->p.proto_state == PS_STOP)
337       && (p->outgoing_conn.state == BS_IDLE)
338       && (p->incoming_conn.state == BS_IDLE))
339     bgp_down(p);
340 }
341 
342 void
bgp_stop(struct bgp_proto * p,uint subcode,byte * data,uint len)343 bgp_stop(struct bgp_proto *p, uint subcode, byte *data, uint len)
344 {
345   proto_notify_state(&p->p, PS_STOP);
346   bgp_graceful_close_conn(&p->outgoing_conn, subcode, data, len);
347   bgp_graceful_close_conn(&p->incoming_conn, subcode, data, len);
348   ev_schedule(p->event);
349 }
350 
351 static inline void
bgp_conn_set_state(struct bgp_conn * conn,unsigned new_state)352 bgp_conn_set_state(struct bgp_conn *conn, unsigned new_state)
353 {
354   if (conn->bgp->p.mrtdump & MD_STATES)
355     bgp_dump_state_change(conn, conn->state, new_state);
356 
357   conn->state = new_state;
358 }
359 
360 void
bgp_conn_enter_openconfirm_state(struct bgp_conn * conn)361 bgp_conn_enter_openconfirm_state(struct bgp_conn *conn)
362 {
363   /* Really, most of the work is done in bgp_rx_open(). */
364   bgp_conn_set_state(conn, BS_OPENCONFIRM);
365 }
366 
367 void
bgp_conn_enter_established_state(struct bgp_conn * conn)368 bgp_conn_enter_established_state(struct bgp_conn *conn)
369 {
370   struct bgp_proto *p = conn->bgp;
371 
372   BGP_TRACE(D_EVENTS, "BGP session established");
373   DBG("BGP: UP!!!\n");
374 
375   /* For multi-hop BGP sessions */
376   if (ipa_zero(p->source_addr))
377     p->source_addr = conn->sk->saddr;
378 
379   conn->sk->fast_rx = 0;
380 
381   p->conn = conn;
382   p->last_error_class = 0;
383   p->last_error_code = 0;
384   p->feed_state = BFS_NONE;
385   p->load_state = BFS_NONE;
386   bgp_init_bucket_table(p);
387   bgp_init_prefix_table(p, 8);
388 
389   int peer_gr_ready = conn->peer_gr_aware && !(conn->peer_gr_flags & BGP_GRF_RESTART);
390 
391   if (p->p.gr_recovery && !peer_gr_ready)
392     proto_graceful_restart_unlock(&p->p);
393 
394   if (p->p.gr_recovery && (p->cf->gr_mode == BGP_GR_ABLE) && peer_gr_ready)
395     p->p.gr_wait = 1;
396 
397   if (p->gr_active == BGP_GRS_ACTIVE)
398     tm_stop(p->gr_timer);
399 
400   /* Check F-bit for regular graceful restart */
401   if ((p->gr_active == BGP_GRS_ACTIVE) &&
402       (!conn->peer_gr_able || !(conn->peer_gr_aflags & BGP_GRF_FORWARDING)))
403     bgp_graceful_restart_done(p);
404 
405   /* Check F-bit for long-lived graceful restart */
406   if (((p->gr_active == BGP_GRS_LLGR_1) || (p->gr_active == BGP_GRS_LLGR_2)) &&
407       (!conn->peer_llgr_able || !(conn->peer_llgr_aflags & BGP_LLGRF_FORWARDING)))
408     bgp_graceful_restart_done(p);
409 
410   /* GR capability implies that neighbor will send End-of-RIB */
411   if (conn->peer_gr_aware)
412     p->load_state = BFS_LOADING;
413 
414   /* proto_notify_state() will likely call bgp_feed_begin(), setting p->feed_state */
415 
416   bgp_conn_set_state(conn, BS_ESTABLISHED);
417   proto_notify_state(&p->p, PS_UP);
418 }
419 
420 static void
bgp_conn_leave_established_state(struct bgp_proto * p)421 bgp_conn_leave_established_state(struct bgp_proto *p)
422 {
423   BGP_TRACE(D_EVENTS, "BGP session closed");
424   p->conn = NULL;
425 
426   bgp_free_prefix_table(p);
427   bgp_free_bucket_table(p);
428 
429   if (p->p.proto_state == PS_UP)
430     bgp_stop(p, 0, NULL, 0);
431 }
432 
433 void
bgp_conn_enter_close_state(struct bgp_conn * conn)434 bgp_conn_enter_close_state(struct bgp_conn *conn)
435 {
436   struct bgp_proto *p = conn->bgp;
437   int os = conn->state;
438 
439   bgp_conn_set_state(conn, BS_CLOSE);
440   tm_stop(conn->keepalive_timer);
441   conn->sk->rx_hook = NULL;
442 
443   /* Timeout for CLOSE state, if we cannot send notification soon then we just hangup */
444   bgp_start_timer(conn->hold_timer, 10);
445 
446   if (os == BS_ESTABLISHED)
447     bgp_conn_leave_established_state(p);
448 }
449 
450 void
bgp_conn_enter_idle_state(struct bgp_conn * conn)451 bgp_conn_enter_idle_state(struct bgp_conn *conn)
452 {
453   struct bgp_proto *p = conn->bgp;
454   int os = conn->state;
455 
456   bgp_close_conn(conn);
457   bgp_conn_set_state(conn, BS_IDLE);
458   ev_schedule(p->event);
459 
460   if (os == BS_ESTABLISHED)
461     bgp_conn_leave_established_state(p);
462 }
463 
464 /**
465  * bgp_handle_graceful_restart - handle detected BGP graceful restart
466  * @p: BGP instance
467  *
468  * This function is called when a BGP graceful restart of the neighbor is
469  * detected (when the TCP connection fails or when a new TCP connection
470  * appears). The function activates processing of the restart - starts routing
471  * table refresh cycle and activates BGP restart timer. The protocol state goes
472  * back to %PS_START, but changing BGP state back to %BS_IDLE is left for the
473  * caller.
474  */
475 void
bgp_handle_graceful_restart(struct bgp_proto * p)476 bgp_handle_graceful_restart(struct bgp_proto *p)
477 {
478   ASSERT(p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready);
479 
480   BGP_TRACE(D_EVENTS, "Neighbor graceful restart detected%s",
481 	    p->gr_active ? " - already pending" : "");
482   proto_notify_state(&p->p, PS_START);
483 
484   switch (p->gr_active)
485   {
486   case BGP_GRS_ACTIVE:
487     rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
488     break;
489 
490   case BGP_GRS_LLGR_1:
491     rt_refresh_begin(p->p.main_ahook->table, p->p.main_ahook);
492     return;
493 
494   case BGP_GRS_LLGR_2:
495     rt_refresh_begin(p->p.main_ahook->table, p->p.main_ahook);
496     rt_modify_stale(p->p.main_ahook->table, p->p.main_ahook);
497     return;
498   }
499 
500   p->stale_time = p->cf->llgr_mode ? p->conn->peer_llgr_time : 0;
501   p->gr_active = !p->stale_time ? BGP_GRS_ACTIVE : BGP_GRS_LLGR_1;
502   tm_start(p->gr_timer, p->conn->peer_gr_time);
503   rt_refresh_begin(p->p.main_ahook->table, p->p.main_ahook);
504 }
505 
506 /**
507  * bgp_graceful_restart_done - finish active BGP graceful restart
508  * @p: BGP instance
509  *
510  * This function is called when the active BGP graceful restart of the neighbor
511  * should be finished - either successfully (the neighbor sends all paths and
512  * reports end-of-RIB on the new session) or unsuccessfully (the neighbor does
513  * not support BGP graceful restart on the new session). The function ends
514  * routing table refresh cycle and stops BGP restart timer.
515  */
516 void
bgp_graceful_restart_done(struct bgp_proto * p)517 bgp_graceful_restart_done(struct bgp_proto *p)
518 {
519   BGP_TRACE(D_EVENTS, "Neighbor graceful restart done");
520   p->gr_active = 0;
521   tm_stop(p->gr_timer);
522   rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
523 }
524 
525 /**
526  * bgp_graceful_restart_timeout - timeout of graceful restart 'restart timer'
527  * @t: timer
528  *
529  * This function is a timeout hook for @gr_timer, implementing BGP restart time
530  * limit for reestablisment of the BGP session after the graceful restart. When
531  * fired, we just proceed with the usual protocol restart.
532  */
533 
534 static void
bgp_graceful_restart_timeout(timer * t)535 bgp_graceful_restart_timeout(timer *t)
536 {
537   struct bgp_proto *p = t->data;
538 
539   switch (p->gr_active)
540   {
541   case BGP_GRS_ACTIVE:
542     BGP_TRACE(D_EVENTS, "Neighbor graceful restart timeout");
543     bgp_stop(p, 0, NULL, 0);
544     return;
545 
546   case BGP_GRS_LLGR_1:
547     BGP_TRACE(D_EVENTS, "Neighbor graceful restart timeout");
548     p->gr_active = BGP_GRS_LLGR_2;
549     tm_start(p->gr_timer, p->stale_time);
550     rt_modify_stale(p->p.main_ahook->table, p->p.main_ahook);
551     return;
552 
553   case BGP_GRS_LLGR_2:
554     BGP_TRACE(D_EVENTS, "Long-lived graceful restart timeout");
555     p->gr_active = 0;
556     rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
557     return;
558   }
559 }
560 
561 /**
562  * bgp_refresh_begin - start incoming enhanced route refresh sequence
563  * @p: BGP instance
564  *
565  * This function is called when an incoming enhanced route refresh sequence is
566  * started by the neighbor, demarcated by the BoRR packet. The function updates
567  * the load state and starts the routing table refresh cycle. Note that graceful
568  * restart also uses routing table refresh cycle, but RFC 7313 and load states
569  * ensure that these two sequences do not overlap.
570  */
571 void
bgp_refresh_begin(struct bgp_proto * p)572 bgp_refresh_begin(struct bgp_proto *p)
573 {
574   if (p->load_state == BFS_LOADING)
575     { log(L_WARN "%s: BEGIN-OF-RR received before END-OF-RIB, ignoring", p->p.name); return; }
576 
577   p->load_state = BFS_REFRESHING;
578   rt_refresh_begin(p->p.main_ahook->table, p->p.main_ahook);
579 }
580 
581 /**
582  * bgp_refresh_end - finish incoming enhanced route refresh sequence
583  * @p: BGP instance
584  *
585  * This function is called when an incoming enhanced route refresh sequence is
586  * finished by the neighbor, demarcated by the EoRR packet. The function updates
587  * the load state and ends the routing table refresh cycle. Routes not received
588  * during the sequence are removed by the nest.
589  */
590 void
bgp_refresh_end(struct bgp_proto * p)591 bgp_refresh_end(struct bgp_proto *p)
592 {
593   if (p->load_state != BFS_REFRESHING)
594     { log(L_WARN "%s: END-OF-RR received without prior BEGIN-OF-RR, ignoring", p->p.name); return; }
595 
596   p->load_state = BFS_NONE;
597   rt_refresh_end(p->p.main_ahook->table, p->p.main_ahook);
598 }
599 
600 
601 static void
bgp_send_open(struct bgp_conn * conn)602 bgp_send_open(struct bgp_conn *conn)
603 {
604   conn->start_state = conn->bgp->start_state;
605 
606   // Default values, possibly changed by receiving capabilities.
607   conn->advertised_as = 0;
608   conn->peer_refresh_support = 0;
609   conn->peer_as4_support = 0;
610   conn->peer_add_path = 0;
611   conn->peer_enhanced_refresh_support = 0;
612   conn->peer_gr_aware = 0;
613   conn->peer_gr_able = 0;
614   conn->peer_gr_time = 0;
615   conn->peer_gr_flags = 0;
616   conn->peer_gr_aflags = 0;
617   conn->peer_llgr_aware = 0;
618   conn->peer_llgr_able = 0;
619   conn->peer_llgr_time = 0;
620   conn->peer_llgr_aflags = 0;
621   conn->peer_ext_messages_support = 0;
622 
623   DBG("BGP: Sending open\n");
624   conn->sk->rx_hook = bgp_rx;
625   conn->sk->tx_hook = bgp_tx;
626   tm_stop(conn->connect_retry_timer);
627   bgp_schedule_packet(conn, PKT_OPEN);
628   bgp_conn_set_state(conn, BS_OPENSENT);
629   bgp_start_timer(conn->hold_timer, conn->bgp->cf->initial_hold_time);
630 }
631 
632 static void
bgp_connected(sock * sk)633 bgp_connected(sock *sk)
634 {
635   struct bgp_conn *conn = sk->data;
636   struct bgp_proto *p = conn->bgp;
637 
638   BGP_TRACE(D_EVENTS, "Connected");
639   bgp_send_open(conn);
640 }
641 
642 static void
bgp_connect_timeout(timer * t)643 bgp_connect_timeout(timer *t)
644 {
645   struct bgp_conn *conn = t->data;
646   struct bgp_proto *p = conn->bgp;
647 
648   DBG("BGP: connect_timeout\n");
649   if (p->p.proto_state == PS_START)
650     {
651       bgp_close_conn(conn);
652       bgp_connect(p);
653     }
654   else
655     bgp_conn_enter_idle_state(conn);
656 }
657 
658 static void
bgp_sock_err(sock * sk,int err)659 bgp_sock_err(sock *sk, int err)
660 {
661   struct bgp_conn *conn = sk->data;
662   struct bgp_proto *p = conn->bgp;
663 
664   /*
665    * This error hook may be called either asynchronously from main
666    * loop, or synchronously from sk_send().  But sk_send() is called
667    * only from bgp_tx() and bgp_kick_tx(), which are both called
668    * asynchronously from main loop. Moreover, they end if err hook is
669    * called. Therefore, we could suppose that it is always called
670    * asynchronously.
671    */
672 
673   bgp_store_error(p, conn, BE_SOCKET, err);
674 
675   if (err)
676     BGP_TRACE(D_EVENTS, "Connection lost (%M)", err);
677   else
678     BGP_TRACE(D_EVENTS, "Connection closed");
679 
680   if ((conn->state == BS_ESTABLISHED) && p->gr_ready)
681     bgp_handle_graceful_restart(p);
682 
683   bgp_conn_enter_idle_state(conn);
684 }
685 
686 static void
bgp_hold_timeout(timer * t)687 bgp_hold_timeout(timer *t)
688 {
689   struct bgp_conn *conn = t->data;
690   struct bgp_proto *p = conn->bgp;
691 
692   DBG("BGP: Hold timeout\n");
693 
694   /* We are already closing the connection - just do hangup */
695   if (conn->state == BS_CLOSE)
696   {
697     BGP_TRACE(D_EVENTS, "Connection stalled");
698     bgp_conn_enter_idle_state(conn);
699     return;
700   }
701 
702   /* If there is something in input queue, we are probably congested
703      and perhaps just not processed BGP packets in time. */
704 
705   if (sk_rx_ready(conn->sk) > 0)
706     bgp_start_timer(conn->hold_timer, 10);
707   else if ((conn->state == BS_ESTABLISHED) && p->gr_ready && conn->peer_llgr_able)
708   {
709     BGP_TRACE(D_EVENTS, "Hold timer expired");
710     bgp_handle_graceful_restart(p);
711     bgp_conn_enter_idle_state(conn);
712   }
713   else
714     bgp_error(conn, 4, 0, NULL, 0);
715 }
716 
717 static void
bgp_keepalive_timeout(timer * t)718 bgp_keepalive_timeout(timer *t)
719 {
720   struct bgp_conn *conn = t->data;
721 
722   DBG("BGP: Keepalive timer\n");
723   bgp_schedule_packet(conn, PKT_KEEPALIVE);
724 
725   /* Kick TX a bit faster */
726   if (ev_active(conn->tx_ev))
727     ev_run(conn->tx_ev);
728 }
729 
730 static void
bgp_setup_conn(struct bgp_proto * p,struct bgp_conn * conn)731 bgp_setup_conn(struct bgp_proto *p, struct bgp_conn *conn)
732 {
733   timer *t;
734 
735   conn->sk = NULL;
736   conn->bgp = p;
737   conn->packets_to_send = 0;
738 
739   t = conn->connect_retry_timer = tm_new(p->p.pool);
740   t->hook = bgp_connect_timeout;
741   t->data = conn;
742   t = conn->hold_timer = tm_new(p->p.pool);
743   t->hook = bgp_hold_timeout;
744   t->data = conn;
745   t = conn->keepalive_timer = tm_new(p->p.pool);
746   t->hook = bgp_keepalive_timeout;
747   t->data = conn;
748   conn->tx_ev = ev_new(p->p.pool);
749   conn->tx_ev->hook = bgp_kick_tx;
750   conn->tx_ev->data = conn;
751 }
752 
753 static void
bgp_setup_sk(struct bgp_conn * conn,sock * s)754 bgp_setup_sk(struct bgp_conn *conn, sock *s)
755 {
756   s->data = conn;
757   s->err_hook = bgp_sock_err;
758   s->fast_rx = 1;
759   conn->sk = s;
760 }
761 
762 static void
bgp_active(struct bgp_proto * p)763 bgp_active(struct bgp_proto *p)
764 {
765   int delay = MAX(1, p->cf->connect_delay_time);
766   struct bgp_conn *conn = &p->outgoing_conn;
767 
768   BGP_TRACE(D_EVENTS, "Connect delayed by %d seconds", delay);
769   bgp_setup_conn(p, conn);
770   bgp_conn_set_state(conn, BS_ACTIVE);
771   bgp_start_timer(conn->connect_retry_timer, delay);
772 }
773 
774 /**
775  * bgp_connect - initiate an outgoing connection
776  * @p: BGP instance
777  *
778  * The bgp_connect() function creates a new &bgp_conn and initiates
779  * a TCP connection to the peer. The rest of connection setup is governed
780  * by the BGP state machine as described in the standard.
781  */
782 static void
bgp_connect(struct bgp_proto * p)783 bgp_connect(struct bgp_proto *p)	/* Enter Connect state and start establishing connection */
784 {
785   sock *s;
786   struct bgp_conn *conn = &p->outgoing_conn;
787   int hops = p->cf->multihop ? : 1;
788 
789   DBG("BGP: Connecting\n");
790   s = sk_new(p->p.pool);
791   s->type = SK_TCP_ACTIVE;
792   s->saddr = p->source_addr;
793   s->daddr = p->cf->remote_ip;
794   s->dport = p->cf->remote_port;
795   s->iface = p->neigh ? p->neigh->iface : NULL;
796   s->vrf = p->p.vrf;
797   s->ttl = p->cf->ttl_security ? 255 : hops;
798   s->rbsize = p->cf->enable_extended_messages ? BGP_RX_BUFFER_EXT_SIZE : BGP_RX_BUFFER_SIZE;
799   s->tbsize = p->cf->enable_extended_messages ? BGP_TX_BUFFER_EXT_SIZE : BGP_TX_BUFFER_SIZE;
800   s->tos = IP_PREC_INTERNET_CONTROL;
801   s->password = p->cf->password;
802   s->tx_hook = bgp_connected;
803   BGP_TRACE(D_EVENTS, "Connecting to %I%J from local address %I%J", s->daddr, p->cf->iface,
804 	    s->saddr, ipa_is_link_local(s->saddr) ? s->iface : NULL);
805   bgp_setup_conn(p, conn);
806   bgp_setup_sk(conn, s);
807   bgp_conn_set_state(conn, BS_CONNECT);
808 
809   if (sk_open(s) < 0)
810     goto err;
811 
812   /* Set minimal receive TTL if needed */
813   if (p->cf->ttl_security)
814     if (sk_set_min_ttl(s, 256 - hops) < 0)
815       goto err;
816 
817   DBG("BGP: Waiting for connect success\n");
818   bgp_start_timer(conn->connect_retry_timer, p->cf->connect_retry_time);
819   return;
820 
821  err:
822   sk_log_error(s, p->p.name);
823   bgp_sock_err(s, 0);
824   return;
825 }
826 
827 /**
828  * bgp_find_proto - find existing proto for incoming connection
829  * @sk: TCP socket
830  *
831  */
832 static struct bgp_proto *
bgp_find_proto(sock * sk)833 bgp_find_proto(sock *sk)
834 {
835   struct proto_config *pc;
836 
837   WALK_LIST(pc, config->protos)
838     if ((pc->protocol == &proto_bgp) && pc->proto)
839       {
840 	struct bgp_proto *p = (struct bgp_proto *) pc->proto;
841 	if (ipa_equal(p->cf->remote_ip, sk->daddr) &&
842 	    (!p->cf->iface || (p->cf->iface == sk->iface)))
843 	  return p;
844       }
845 
846   return NULL;
847 }
848 
849 /**
850  * bgp_incoming_connection - handle an incoming connection
851  * @sk: TCP socket
852  * @dummy: unused
853  *
854  * This function serves as a socket hook for accepting of new BGP
855  * connections. It searches a BGP instance corresponding to the peer
856  * which has connected and if such an instance exists, it creates a
857  * &bgp_conn structure, attaches it to the instance and either sends
858  * an Open message or (if there already is an active connection) it
859  * closes the new connection by sending a Notification message.
860  */
861 static int
bgp_incoming_connection(sock * sk,uint dummy UNUSED)862 bgp_incoming_connection(sock *sk, uint dummy UNUSED)
863 {
864   struct bgp_proto *p;
865   int acc, hops;
866 
867   DBG("BGP: Incoming connection from %I port %d\n", sk->daddr, sk->dport);
868   p = bgp_find_proto(sk);
869   if (!p)
870     {
871       log(L_WARN "BGP: Unexpected connect from unknown address %I%J (port %d)",
872 	  sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL, sk->dport);
873       rfree(sk);
874       return 0;
875     }
876 
877   /*
878    * BIRD should keep multiple incoming connections in OpenSent state (for
879    * details RFC 4271 8.2.1 par 3), but it keeps just one. Duplicate incoming
880    * connections are rejected istead. The exception is the case where an
881    * incoming connection triggers a graceful restart.
882    */
883 
884   acc = (p->p.proto_state == PS_START || p->p.proto_state == PS_UP) &&
885     (p->start_state >= BSS_CONNECT) && (!p->incoming_conn.sk);
886 
887   if (p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready)
888     {
889       bgp_store_error(p, NULL, BE_MISC, BEM_GRACEFUL_RESTART);
890       bgp_handle_graceful_restart(p);
891       bgp_conn_enter_idle_state(p->conn);
892       acc = 1;
893 
894       /* There might be separate incoming connection in OpenSent state */
895       if (p->incoming_conn.state > BS_ACTIVE)
896 	bgp_close_conn(&p->incoming_conn);
897     }
898 
899   BGP_TRACE(D_EVENTS, "Incoming connection from %I%J (port %d) %s",
900 	    sk->daddr, ipa_is_link_local(sk->daddr) ? sk->iface : NULL,
901 	    sk->dport, acc ? "accepted" : "rejected");
902 
903   if (!acc)
904     {
905       rfree(sk);
906       return 0;
907     }
908 
909   hops = p->cf->multihop ? : 1;
910 
911   if (sk_set_ttl(sk, p->cf->ttl_security ? 255 : hops) < 0)
912     goto err;
913 
914   if (p->cf->ttl_security)
915     if (sk_set_min_ttl(sk, 256 - hops) < 0)
916       goto err;
917 
918   if (p->cf->enable_extended_messages)
919     {
920       sk->rbsize = BGP_RX_BUFFER_EXT_SIZE;
921       sk->tbsize = BGP_TX_BUFFER_EXT_SIZE;
922       sk_reallocate(sk);
923     }
924 
925   bgp_setup_conn(p, &p->incoming_conn);
926   bgp_setup_sk(&p->incoming_conn, sk);
927   bgp_send_open(&p->incoming_conn);
928   return 0;
929 
930 err:
931   sk_log_error(sk, p->p.name);
932   log(L_ERR "%s: Incoming connection aborted", p->p.name);
933   rfree(sk);
934   return 0;
935 }
936 
937 static void
bgp_listen_sock_err(sock * sk UNUSED,int err)938 bgp_listen_sock_err(sock *sk UNUSED, int err)
939 {
940   if (err == ECONNABORTED)
941     log(L_WARN "BGP: Incoming connection aborted");
942   else
943     log(L_ERR "BGP: Error on listening socket: %M", err);
944 }
945 
946 static sock *
bgp_setup_listen_sk(ip_addr addr,unsigned port,u32 flags)947 bgp_setup_listen_sk(ip_addr addr, unsigned port, u32 flags)
948 {
949   sock *s = sk_new(&root_pool);
950   DBG("BGP: Creating listening socket\n");
951   s->type = SK_TCP_PASSIVE;
952   s->ttl = 255;
953   s->saddr = addr;
954   s->sport = port ? port : BGP_PORT;
955   s->flags = flags ? 0 : SKF_V6ONLY;
956   s->tos = IP_PREC_INTERNET_CONTROL;
957   s->rbsize = BGP_RX_BUFFER_SIZE;
958   s->tbsize = BGP_TX_BUFFER_SIZE;
959   s->rx_hook = bgp_incoming_connection;
960   s->err_hook = bgp_listen_sock_err;
961 
962   if (sk_open(s) < 0)
963     goto err;
964 
965   return s;
966 
967  err:
968   sk_log_error(s, "BGP");
969   log(L_ERR "BGP: Cannot open listening socket");
970   rfree(s);
971   return NULL;
972 }
973 
974 static void
bgp_start_neighbor(struct bgp_proto * p)975 bgp_start_neighbor(struct bgp_proto *p)
976 {
977   /* Called only for single-hop BGP sessions */
978 
979   if (ipa_zero(p->source_addr))
980     p->source_addr = p->neigh->ifa->ip;
981 
982 #ifdef IPV6
983   {
984     struct ifa *a;
985     p->local_link = IPA_NONE;
986     WALK_LIST(a, p->neigh->iface->addrs)
987       if (a->scope == SCOPE_LINK)
988         {
989 	  p->local_link = a->ip;
990 	  break;
991 	}
992 
993     if (! ipa_nonzero(p->local_link))
994       log(L_WARN "%s: Missing link local address on interface %s", p->p.name,  p->neigh->iface->name);
995 
996     DBG("BGP: Selected link-level address %I\n", p->local_link);
997   }
998 #endif
999 
1000   bgp_initiate(p);
1001 }
1002 
1003 static void
bgp_neigh_notify(neighbor * n)1004 bgp_neigh_notify(neighbor *n)
1005 {
1006   struct bgp_proto *p = (struct bgp_proto *) n->proto;
1007   int ps = p->p.proto_state;
1008 
1009   if (n != p->neigh)
1010     return;
1011 
1012   if ((ps == PS_DOWN) || (ps == PS_STOP))
1013     return;
1014 
1015   int prepare = (ps == PS_START) && (p->start_state == BSS_PREPARE);
1016 
1017   if (n->scope <= 0)
1018     {
1019       if (!prepare)
1020         {
1021 	  BGP_TRACE(D_EVENTS, "Neighbor lost");
1022 	  bgp_store_error(p, NULL, BE_MISC, BEM_NEIGHBOR_LOST);
1023 	  /* Perhaps also run bgp_update_startup_delay(p)? */
1024 	  bgp_stop(p, 0, NULL, 0);
1025 	}
1026     }
1027   else if (p->cf->check_link && !(n->iface->flags & IF_LINK_UP))
1028     {
1029       if (!prepare)
1030         {
1031 	  BGP_TRACE(D_EVENTS, "Link down");
1032 	  bgp_store_error(p, NULL, BE_MISC, BEM_LINK_DOWN);
1033 	  if (ps == PS_UP)
1034 	    bgp_update_startup_delay(p);
1035 	  bgp_stop(p, 0, NULL, 0);
1036 	}
1037     }
1038   else
1039     {
1040       if (prepare)
1041 	{
1042 	  BGP_TRACE(D_EVENTS, "Neighbor ready");
1043 	  bgp_start_neighbor(p);
1044 	}
1045     }
1046 }
1047 
1048 static void
bgp_bfd_notify(struct bfd_request * req)1049 bgp_bfd_notify(struct bfd_request *req)
1050 {
1051   struct bgp_proto *p = req->data;
1052   int ps = p->p.proto_state;
1053 
1054   if (req->down && ((ps == PS_START) || (ps == PS_UP)))
1055   {
1056     BGP_TRACE(D_EVENTS, "BFD session down");
1057     bgp_store_error(p, NULL, BE_MISC, BEM_BFD_DOWN);
1058 
1059     if (p->cf->bfd == BGP_BFD_GRACEFUL)
1060     {
1061       /* Trigger graceful restart */
1062       if (p->conn && (p->conn->state == BS_ESTABLISHED) && p->gr_ready)
1063 	bgp_handle_graceful_restart(p);
1064 
1065       if (p->incoming_conn.state > BS_IDLE)
1066 	bgp_conn_enter_idle_state(&p->incoming_conn);
1067 
1068       if (p->outgoing_conn.state > BS_IDLE)
1069 	bgp_conn_enter_idle_state(&p->outgoing_conn);
1070     }
1071     else
1072     {
1073       /* Trigger session down */
1074       if (ps == PS_UP)
1075 	bgp_update_startup_delay(p);
1076       bgp_stop(p, 0, NULL, 0);
1077     }
1078   }
1079 }
1080 
1081 static void
bgp_update_bfd(struct bgp_proto * p,int use_bfd)1082 bgp_update_bfd(struct bgp_proto *p, int use_bfd)
1083 {
1084   if (use_bfd && !p->bfd_req)
1085     p->bfd_req = bfd_request_session(p->p.pool, p->cf->remote_ip, p->source_addr,
1086 				     p->cf->multihop ? NULL : p->neigh->iface,
1087 				     p->p.vrf, bgp_bfd_notify, p);
1088 
1089   if (!use_bfd && p->bfd_req)
1090     {
1091       rfree(p->bfd_req);
1092       p->bfd_req = NULL;
1093     }
1094 }
1095 
1096 static int
bgp_reload_routes(struct proto * P)1097 bgp_reload_routes(struct proto *P)
1098 {
1099   struct bgp_proto *p = (struct bgp_proto *) P;
1100   if (!p->conn || !p->conn->peer_refresh_support)
1101     return 0;
1102 
1103   bgp_schedule_packet(p->conn, PKT_ROUTE_REFRESH);
1104   return 1;
1105 }
1106 
1107 static void
bgp_feed_begin(struct proto * P,int initial)1108 bgp_feed_begin(struct proto *P, int initial)
1109 {
1110   struct bgp_proto *p = (struct bgp_proto *) P;
1111 
1112   /* This should not happen */
1113   if (!p->conn)
1114     return;
1115 
1116   if (initial && p->cf->gr_mode)
1117     p->feed_state = BFS_LOADING;
1118 
1119   /* It is refeed and both sides support enhanced route refresh */
1120   if (!initial && p->cf->enable_refresh &&
1121       p->conn->peer_enhanced_refresh_support)
1122     {
1123       /* BoRR must not be sent before End-of-RIB */
1124       if (p->feed_state == BFS_LOADING || p->feed_state == BFS_LOADED)
1125 	return;
1126 
1127       p->feed_state = BFS_REFRESHING;
1128       bgp_schedule_packet(p->conn, PKT_BEGIN_REFRESH);
1129     }
1130 }
1131 
1132 static void
bgp_feed_end(struct proto * P)1133 bgp_feed_end(struct proto *P)
1134 {
1135   struct bgp_proto *p = (struct bgp_proto *) P;
1136 
1137   /* This should not happen */
1138   if (!p->conn)
1139     return;
1140 
1141   /* Non-demarcated feed ended, nothing to do */
1142   if (p->feed_state == BFS_NONE)
1143     return;
1144 
1145   /* Schedule End-of-RIB packet */
1146   if (p->feed_state == BFS_LOADING)
1147     p->feed_state = BFS_LOADED;
1148 
1149   /* Schedule EoRR packet */
1150   if (p->feed_state == BFS_REFRESHING)
1151     p->feed_state = BFS_REFRESHED;
1152 
1153   /* Kick TX hook */
1154   bgp_schedule_packet(p->conn, PKT_UPDATE);
1155 }
1156 
1157 
1158 static void
bgp_start_locked(struct object_lock * lock)1159 bgp_start_locked(struct object_lock *lock)
1160 {
1161   struct bgp_proto *p = lock->data;
1162   struct bgp_config *cf = p->cf;
1163 
1164   if (p->p.proto_state != PS_START)
1165     {
1166       DBG("BGP: Got lock in different state %d\n", p->p.proto_state);
1167       return;
1168     }
1169 
1170   DBG("BGP: Got lock\n");
1171 
1172   if (cf->multihop)
1173     {
1174       /* Multi-hop sessions do not use neighbor entries */
1175       bgp_initiate(p);
1176       return;
1177     }
1178 
1179   neighbor *n = neigh_find2(&p->p, &cf->remote_ip, cf->iface, NEF_STICKY);
1180   if (!n)
1181     {
1182       log(L_ERR "%s: Invalid remote address %I%J", p->p.name, cf->remote_ip, cf->iface);
1183       /* As we do not start yet, we can just disable protocol */
1184       p->p.disabled = 1;
1185       bgp_store_error(p, NULL, BE_MISC, BEM_INVALID_NEXT_HOP);
1186       proto_notify_state(&p->p, PS_DOWN);
1187       return;
1188     }
1189 
1190   p->neigh = n;
1191 
1192   if (n->scope <= 0)
1193     BGP_TRACE(D_EVENTS, "Waiting for %I%J to become my neighbor", cf->remote_ip, cf->iface);
1194   else if (p->cf->check_link && !(n->iface->flags & IF_LINK_UP))
1195     BGP_TRACE(D_EVENTS, "Waiting for link on %s", n->iface->name);
1196   else
1197     bgp_start_neighbor(p);
1198 }
1199 
1200 static int
bgp_start(struct proto * P)1201 bgp_start(struct proto *P)
1202 {
1203   struct bgp_proto *p = (struct bgp_proto *) P;
1204   struct object_lock *lock;
1205 
1206   DBG("BGP: Startup.\n");
1207   p->start_state = BSS_PREPARE;
1208   p->outgoing_conn.state = BS_IDLE;
1209   p->incoming_conn.state = BS_IDLE;
1210   p->neigh = NULL;
1211   p->bfd_req = NULL;
1212   p->gr_ready = 0;
1213   p->gr_active = 0;
1214 
1215   rt_lock_table(p->igp_table);
1216 
1217   p->event = ev_new(p->p.pool);
1218   p->event->hook = bgp_decision;
1219   p->event->data = p;
1220 
1221   p->startup_timer = tm_new(p->p.pool);
1222   p->startup_timer->hook = bgp_startup_timeout;
1223   p->startup_timer->data = p;
1224 
1225   p->gr_timer = tm_new(p->p.pool);
1226   p->gr_timer->hook = bgp_graceful_restart_timeout;
1227   p->gr_timer->data = p;
1228 
1229   p->local_id = proto_get_router_id(P->cf);
1230   if (p->rr_client)
1231     p->rr_cluster_id = p->cf->rr_cluster_id ? p->cf->rr_cluster_id : p->local_id;
1232 
1233   p->remote_id = 0;
1234   p->source_addr = p->cf->source_addr;
1235 
1236   if (p->p.gr_recovery && p->cf->gr_mode)
1237     proto_graceful_restart_lock(P);
1238 
1239   /*
1240    *  Before attempting to create the connection, we need to lock the
1241    *  port, so that are sure we're the only instance attempting to talk
1242    *  with that neighbor.
1243    */
1244 
1245   lock = p->lock = olock_new(P->pool);
1246   lock->addr = p->cf->remote_ip;
1247   lock->port = p->cf->remote_port;
1248   lock->iface = p->cf->iface;
1249   lock->vrf = p->cf->iface ? NULL : p->p.vrf;
1250   lock->type = OBJLOCK_TCP;
1251   lock->hook = bgp_start_locked;
1252   lock->data = p;
1253   olock_acquire(lock);
1254 
1255   return PS_START;
1256 }
1257 
1258 extern int proto_restart;
1259 
1260 static int
bgp_shutdown(struct proto * P)1261 bgp_shutdown(struct proto *P)
1262 {
1263   struct bgp_proto *p = (struct bgp_proto *) P;
1264   uint subcode = 0;
1265 
1266   char *message = NULL;
1267   byte *data = NULL;
1268   uint len = 0;
1269 
1270   BGP_TRACE(D_EVENTS, "Shutdown requested");
1271 
1272   switch (P->down_code)
1273     {
1274     case PDC_CF_REMOVE:
1275     case PDC_CF_DISABLE:
1276       subcode = 3; // Errcode 6, 3 - peer de-configured
1277       break;
1278 
1279     case PDC_CF_RESTART:
1280       subcode = 6; // Errcode 6, 6 - other configuration change
1281       break;
1282 
1283     case PDC_CMD_DISABLE:
1284     case PDC_CMD_SHUTDOWN:
1285       subcode = 2; // Errcode 6, 2 - administrative shutdown
1286       message = P->message;
1287       break;
1288 
1289     case PDC_CMD_RESTART:
1290       subcode = 4; // Errcode 6, 4 - administrative reset
1291       message = P->message;
1292       break;
1293 
1294     case PDC_RX_LIMIT_HIT:
1295     case PDC_IN_LIMIT_HIT:
1296       subcode = 1; // Errcode 6, 1 - max number of prefixes reached
1297       /* log message for compatibility */
1298       log(L_WARN "%s: Route limit exceeded, shutting down", p->p.name);
1299       goto limit;
1300 
1301     case PDC_OUT_LIMIT_HIT:
1302       subcode = proto_restart ? 4 : 2; // Administrative reset or shutdown
1303 
1304     limit:
1305       bgp_store_error(p, NULL, BE_AUTO_DOWN, BEA_ROUTE_LIMIT_EXCEEDED);
1306       if (proto_restart)
1307 	bgp_update_startup_delay(p);
1308       else
1309 	p->startup_delay = 0;
1310       goto done;
1311     }
1312 
1313   bgp_store_error(p, NULL, BE_MAN_DOWN, 0);
1314   p->startup_delay = 0;
1315 
1316   /* RFC 8203 - shutdown communication */
1317   if (message)
1318   {
1319     uint msg_len = strlen(message);
1320     msg_len = MIN(msg_len, 255);
1321 
1322     /* Buffer will be freed automatically by protocol shutdown */
1323     data = mb_alloc(p->p.pool, msg_len + 1);
1324     len = msg_len + 1;
1325 
1326     data[0] = msg_len;
1327     memcpy(data+1, message, msg_len);
1328   }
1329 
1330 done:
1331   bgp_stop(p, subcode, data, len);
1332   return p->p.proto_state;
1333 }
1334 
1335 static void
bgp_cleanup(struct proto * P)1336 bgp_cleanup(struct proto *P)
1337 {
1338   struct bgp_proto *p = (struct bgp_proto *) P;
1339   rt_unlock_table(p->igp_table);
1340 }
1341 
1342 static rtable *
get_igp_table(struct bgp_config * cf)1343 get_igp_table(struct bgp_config *cf)
1344 {
1345   return cf->igp_table ? cf->igp_table->table : cf->c.table->table;
1346 }
1347 
1348 static struct proto *
bgp_init(struct proto_config * C)1349 bgp_init(struct proto_config *C)
1350 {
1351   struct proto *P = proto_new(C, sizeof(struct bgp_proto));
1352   struct bgp_config *c = (struct bgp_config *) C;
1353   struct bgp_proto *p = (struct bgp_proto *) P;
1354 
1355   P->accept_ra_types = c->secondary ? RA_ACCEPTED : RA_OPTIMAL;
1356   P->rt_notify = bgp_rt_notify;
1357   P->import_control = bgp_import_control;
1358   P->neigh_notify = bgp_neigh_notify;
1359   P->reload_routes = bgp_reload_routes;
1360   P->feed_begin = bgp_feed_begin;
1361   P->feed_end = bgp_feed_end;
1362   P->rte_better = bgp_rte_better;
1363   P->rte_mergable = bgp_rte_mergable;
1364   P->rte_recalculate = c->deterministic_med ? bgp_rte_recalculate : NULL;
1365   P->rte_modify = bgp_rte_modify_stale;
1366 
1367   p->cf = c;
1368   p->local_as = c->local_as;
1369   p->remote_as = c->remote_as;
1370   p->is_internal = (c->local_as == c->remote_as);
1371   p->rs_client = c->rs_client;
1372   p->rr_client = c->rr_client;
1373   p->igp_table = get_igp_table(c);
1374 
1375   return P;
1376 }
1377 
1378 
1379 void
bgp_check_config(struct bgp_config * c)1380 bgp_check_config(struct bgp_config *c)
1381 {
1382   int internal = (c->local_as == c->remote_as);
1383 
1384   /* Do not check templates at all */
1385   if (c->c.class == SYM_TEMPLATE)
1386     return;
1387 
1388 
1389   /* EBGP direct by default, IBGP multihop by default */
1390   if (c->multihop < 0)
1391     c->multihop = internal ? 64 : 0;
1392 
1393   /* Different default for gw_mode */
1394   if (!c->gw_mode)
1395     c->gw_mode = c->multihop ? GW_RECURSIVE : GW_DIRECT;
1396 
1397   /* Different default based on rs_client */
1398   if (!c->missing_lladdr)
1399     c->missing_lladdr = c->rs_client ? MLL_IGNORE : MLL_SELF;
1400 
1401   /* LLGR mode default based on GR mode */
1402   if (c->llgr_mode < 0)
1403     c->llgr_mode = c->gr_mode ? BGP_LLGR_AWARE : 0;
1404 
1405   /* Disable after error incompatible with restart limit action */
1406   if (c->c.in_limit && (c->c.in_limit->action == PLA_RESTART) && c->disable_after_error)
1407     c->c.in_limit->action = PLA_DISABLE;
1408 
1409 
1410   if (!c->local_as)
1411     cf_error("Local AS number must be set");
1412 
1413   if (ipa_zero(c->remote_ip))
1414     cf_error("Neighbor must be configured");
1415 
1416   if (!c->remote_as)
1417     cf_error("Remote AS number must be set");
1418 
1419   if (ipa_is_link_local(c->remote_ip) && !c->iface)
1420     cf_error("Link-local neighbor address requires specified interface");
1421 
1422   if (!(c->capabilities && c->enable_as4) && (c->remote_as > 0xFFFF))
1423     cf_error("Neighbor AS number out of range (AS4 not available)");
1424 
1425   if (!internal && c->rr_client)
1426     cf_error("Only internal neighbor can be RR client");
1427 
1428   if (internal && c->rs_client)
1429     cf_error("Only external neighbor can be RS client");
1430 
1431   if (c->multihop && (c->gw_mode == GW_DIRECT))
1432     cf_error("Multihop BGP cannot use direct gateway mode");
1433 
1434   if (c->multihop && (ipa_is_link_local(c->remote_ip) ||
1435 		      ipa_is_link_local(c->source_addr)))
1436     cf_error("Multihop BGP cannot be used with link-local addresses");
1437 
1438   if (c->multihop && c->iface)
1439     cf_error("Multihop BGP cannot be bound to interface");
1440 
1441   if (c->multihop && c->check_link)
1442     cf_error("Multihop BGP cannot depend on link state");
1443 
1444   if (c->multihop && c->bfd && ipa_zero(c->source_addr))
1445     cf_error("Multihop BGP with BFD requires specified source address");
1446 
1447   if ((c->gw_mode == GW_RECURSIVE) && c->c.table->sorted)
1448     cf_error("BGP in recursive mode prohibits sorted table");
1449 
1450   if (c->deterministic_med && c->c.table->sorted)
1451     cf_error("BGP with deterministic MED prohibits sorted table");
1452 
1453   if (c->secondary && !c->c.table->sorted)
1454     cf_error("BGP with secondary option requires sorted table");
1455 
1456   if (!c->gr_mode && c->llgr_mode)
1457     cf_error("Long-lived graceful restart requires basic graceful restart");
1458 }
1459 
1460 static int
bgp_reconfigure(struct proto * P,struct proto_config * C)1461 bgp_reconfigure(struct proto *P, struct proto_config *C)
1462 {
1463   struct bgp_config *new = (struct bgp_config *) C;
1464   struct bgp_proto *p = (struct bgp_proto *) P;
1465   struct bgp_config *old = p->cf;
1466 
1467   if (proto_get_router_id(C) != p->local_id)
1468     return 0;
1469 
1470   int same = !memcmp(((byte *) old) + sizeof(struct proto_config),
1471 		     ((byte *) new) + sizeof(struct proto_config),
1472 		     // password item is last and must be checked separately
1473 		     OFFSETOF(struct bgp_config, password) - sizeof(struct proto_config))
1474     && ((!old->password && !new->password)
1475 	|| (old->password && new->password && !strcmp(old->password, new->password)))
1476     && (get_igp_table(old) == get_igp_table(new));
1477 
1478   if (same && (p->start_state > BSS_PREPARE))
1479     bgp_update_bfd(p, new->bfd);
1480 
1481   /* We should update our copy of configuration ptr as old configuration will be freed */
1482   if (same)
1483     p->cf = new;
1484 
1485   return same;
1486 }
1487 
1488 static void
bgp_copy_config(struct proto_config * dest,struct proto_config * src)1489 bgp_copy_config(struct proto_config *dest, struct proto_config *src)
1490 {
1491   /* Just a shallow copy */
1492   proto_copy_rest(dest, src, sizeof(struct bgp_config));
1493 }
1494 
1495 
1496 /**
1497  * bgp_error - report a protocol error
1498  * @c: connection
1499  * @code: error code (according to the RFC)
1500  * @subcode: error sub-code
1501  * @data: data to be passed in the Notification message
1502  * @len: length of the data
1503  *
1504  * bgp_error() sends a notification packet to tell the other side that a protocol
1505  * error has occurred (including the data considered erroneous if possible) and
1506  * closes the connection.
1507  */
1508 void
bgp_error(struct bgp_conn * c,unsigned code,unsigned subcode,byte * data,int len)1509 bgp_error(struct bgp_conn *c, unsigned code, unsigned subcode, byte *data, int len)
1510 {
1511   struct bgp_proto *p = c->bgp;
1512 
1513   if (c->state == BS_CLOSE)
1514     return;
1515 
1516   bgp_log_error(p, BE_BGP_TX, "Error", code, subcode, data, (len > 0) ? len : -len);
1517   bgp_store_error(p, c, BE_BGP_TX, (code << 16) | subcode);
1518   bgp_conn_enter_close_state(c);
1519 
1520   c->notify_code = code;
1521   c->notify_subcode = subcode;
1522   c->notify_data = data;
1523   c->notify_size = (len > 0) ? len : 0;
1524   bgp_schedule_packet(c, PKT_NOTIFICATION);
1525 
1526   if (code != 6)
1527     {
1528       bgp_update_startup_delay(p);
1529       bgp_stop(p, 0, NULL, 0);
1530     }
1531 }
1532 
1533 /**
1534  * bgp_store_error - store last error for status report
1535  * @p: BGP instance
1536  * @c: connection
1537  * @class: error class (BE_xxx constants)
1538  * @code: error code (class specific)
1539  *
1540  * bgp_store_error() decides whether given error is interesting enough
1541  * and store that error to last_error variables of @p
1542  */
1543 void
bgp_store_error(struct bgp_proto * p,struct bgp_conn * c,u8 class,u32 code)1544 bgp_store_error(struct bgp_proto *p, struct bgp_conn *c, u8 class, u32 code)
1545 {
1546   /* During PS_UP, we ignore errors on secondary connection */
1547   if ((p->p.proto_state == PS_UP) && c && (c != p->conn))
1548     return;
1549 
1550   /* During PS_STOP, we ignore any errors, as we want to report
1551    * the error that caused transition to PS_STOP
1552    */
1553   if (p->p.proto_state == PS_STOP)
1554     return;
1555 
1556   p->last_error_class = class;
1557   p->last_error_code = code;
1558 }
1559 
1560 static char *bgp_state_names[] = { "Idle", "Connect", "Active", "OpenSent", "OpenConfirm", "Established", "Close" };
1561 static char *bgp_err_classes[] = { "", "Error: ", "Socket: ", "Received: ", "BGP Error: ", "Automatic shutdown: ", ""};
1562 static char *bgp_misc_errors[] = { "", "Neighbor lost", "Invalid next hop", "Kernel MD5 auth failed", "No listening socket", "Link down", "BFD session down", "Graceful restart"};
1563 static char *bgp_auto_errors[] = { "", "Route limit exceeded"};
1564 
1565 static const char *
bgp_last_errmsg(struct bgp_proto * p)1566 bgp_last_errmsg(struct bgp_proto *p)
1567 {
1568   switch (p->last_error_class)
1569     {
1570     case BE_MISC:
1571       return bgp_misc_errors[p->last_error_code];
1572     case BE_SOCKET:
1573       return (p->last_error_code == 0) ? "Connection closed" : strerror(p->last_error_code);
1574     case BE_BGP_RX:
1575     case BE_BGP_TX:
1576       return bgp_error_dsc(p->last_error_code >> 16, p->last_error_code & 0xFF);
1577     case BE_AUTO_DOWN:
1578       return bgp_auto_errors[p->last_error_code];
1579     default:
1580       return "";
1581     }
1582 }
1583 
1584 static const char *
bgp_state_dsc(struct bgp_proto * p)1585 bgp_state_dsc(struct bgp_proto *p)
1586 {
1587   if (p->p.proto_state == PS_DOWN)
1588     return "Down";
1589 
1590   int state = MAX(p->incoming_conn.state, p->outgoing_conn.state);
1591   if ((state == BS_IDLE) && (p->start_state >= BSS_CONNECT) && p->cf->passive)
1592     return "Passive";
1593 
1594   return bgp_state_names[state];
1595 }
1596 
1597 static void
bgp_get_status(struct proto * P,byte * buf)1598 bgp_get_status(struct proto *P, byte *buf)
1599 {
1600   struct bgp_proto *p = (struct bgp_proto *) P;
1601 
1602   const char *err1 = bgp_err_classes[p->last_error_class];
1603   const char *err2 = bgp_last_errmsg(p);
1604 
1605   if (P->proto_state == PS_DOWN)
1606     bsprintf(buf, "%s%s", err1, err2);
1607   else
1608     bsprintf(buf, "%-14s%s%s", bgp_state_dsc(p), err1, err2);
1609 }
1610 
1611 static void
bgp_show_proto_info(struct proto * P)1612 bgp_show_proto_info(struct proto *P)
1613 {
1614   struct bgp_proto *p = (struct bgp_proto *) P;
1615   struct bgp_conn *c = p->conn;
1616 
1617   proto_show_basic_info(P);
1618 
1619   cli_msg(-1006, "  BGP state:          %s", bgp_state_dsc(p));
1620   cli_msg(-1006, "    Neighbor address: %I%J", p->cf->remote_ip, p->cf->iface);
1621   cli_msg(-1006, "    Neighbor AS:      %u", p->remote_as);
1622 
1623   if (p->gr_active)
1624     cli_msg(-1006, "    Neighbor graceful restart active");
1625 
1626   if (p->gr_active && p->gr_timer->expires)
1627     cli_msg(-1006, "    %-15s   %d/-",
1628 	    (p->gr_active != BGP_GRS_LLGR_2) ? "Restart timer:" : "LL stale timer:",
1629 	    p->gr_timer->expires - now);
1630 
1631   if (P->proto_state == PS_START)
1632     {
1633       struct bgp_conn *oc = &p->outgoing_conn;
1634 
1635       if ((p->start_state < BSS_CONNECT) &&
1636 	  (p->startup_timer->expires))
1637 	cli_msg(-1006, "    Error wait:       %d/%d",
1638 		p->startup_timer->expires - now, p->startup_delay);
1639 
1640       if ((oc->state == BS_ACTIVE) &&
1641 	  (oc->connect_retry_timer->expires))
1642 	cli_msg(-1006, "    Connect delay:    %d/%d",
1643 		oc->connect_retry_timer->expires - now, p->cf->connect_delay_time);
1644     }
1645   else if (P->proto_state == PS_UP)
1646     {
1647       cli_msg(-1006, "    Neighbor ID:      %R", p->remote_id);
1648       cli_msg(-1006, "    Neighbor caps:   %s%s%s%s%s%s%s",
1649 	      c->peer_refresh_support ? " refresh" : "",
1650 	      c->peer_enhanced_refresh_support ? " enhanced-refresh" : "",
1651 	      c->peer_gr_able ? " restart-able" : (c->peer_gr_aware ? " restart-aware" : ""),
1652 	      c->peer_llgr_able ? " llgr-able" : (c->peer_llgr_aware ? " llgr-aware" : ""),
1653 	      c->peer_as4_support ? " AS4" : "",
1654 	      (c->peer_add_path & ADD_PATH_RX) ? " add-path-rx" : "",
1655 	      (c->peer_add_path & ADD_PATH_TX) ? " add-path-tx" : "",
1656 	      c->peer_ext_messages_support ? " ext-messages" : "");
1657       cli_msg(-1006, "    Session:          %s%s%s%s%s%s%s%s",
1658 	      p->is_internal ? "internal" : "external",
1659 	      p->cf->multihop ? " multihop" : "",
1660 	      p->rr_client ? " route-reflector" : "",
1661 	      p->rs_client ? " route-server" : "",
1662 	      p->as4_session ? " AS4" : "",
1663 	      p->add_path_rx ? " add-path-rx" : "",
1664 	      p->add_path_tx ? " add-path-tx" : "",
1665 	      p->ext_messages ? " ext-messages" : "");
1666       cli_msg(-1006, "    Source address:   %I", p->source_addr);
1667       if (P->cf->in_limit)
1668 	cli_msg(-1006, "    Route limit:      %d/%d",
1669 		p->p.stats.imp_routes + p->p.stats.filt_routes, P->cf->in_limit->limit);
1670       cli_msg(-1006, "    Hold timer:       %d/%d",
1671 	      tm_remains(c->hold_timer), c->hold_time);
1672       cli_msg(-1006, "    Keepalive timer:  %d/%d",
1673 	      tm_remains(c->keepalive_timer), c->keepalive_time);
1674     }
1675 
1676   if ((p->last_error_class != BE_NONE) &&
1677       (p->last_error_class != BE_MAN_DOWN))
1678     {
1679       const char *err1 = bgp_err_classes[p->last_error_class];
1680       const char *err2 = bgp_last_errmsg(p);
1681       cli_msg(-1006, "    Last error:       %s%s", err1, err2);
1682     }
1683 }
1684 
1685 struct protocol proto_bgp = {
1686   .name = 		"BGP",
1687   .template = 		"bgp%d",
1688   .attr_class = 	EAP_BGP,
1689   .preference = 	DEF_PREF_BGP,
1690   .config_size =	sizeof(struct bgp_config),
1691   .init = 		bgp_init,
1692   .start = 		bgp_start,
1693   .shutdown = 		bgp_shutdown,
1694   .cleanup = 		bgp_cleanup,
1695   .reconfigure = 	bgp_reconfigure,
1696   .copy_config = 	bgp_copy_config,
1697   .get_status = 	bgp_get_status,
1698   .get_attr = 		bgp_get_attr,
1699   .get_route_info = 	bgp_get_route_info,
1700   .show_proto_info = 	bgp_show_proto_info
1701 };
1702