xref: /openbsd/usr.sbin/bgpd/session.c (revision 17df1aa7)
1 /*	$OpenBSD: session.c,v 1.308 2010/05/03 13:09:38 claudio Exp $ */
2 
3 /*
4  * Copyright (c) 2003, 2004, 2005 Henning Brauer <henning@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/param.h>
20 #include <sys/types.h>
21 
22 #include <sys/mman.h>
23 #include <sys/socket.h>
24 #include <sys/un.h>
25 #include <net/if_types.h>
26 #include <netinet/in.h>
27 #include <netinet/in_systm.h>
28 #include <netinet/ip.h>
29 #include <netinet/tcp.h>
30 #include <arpa/inet.h>
31 
32 #include <err.h>
33 #include <errno.h>
34 #include <fcntl.h>
35 #include <limits.h>
36 #include <poll.h>
37 #include <pwd.h>
38 #include <signal.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <unistd.h>
43 
44 #include "bgpd.h"
45 #include "mrt.h"
46 #include "session.h"
47 
48 #define PFD_PIPE_MAIN		0
49 #define PFD_PIPE_ROUTE		1
50 #define PFD_PIPE_ROUTE_CTL	2
51 #define PFD_SOCK_CTL		3
52 #define PFD_SOCK_RCTL		4
53 #define PFD_LISTENERS_START	5
54 
55 void	session_sighdlr(int);
56 int	setup_listeners(u_int *);
57 void	init_conf(struct bgpd_config *);
58 void	init_peer(struct peer *);
59 void	start_timer_holdtime(struct peer *);
60 void	start_timer_keepalive(struct peer *);
61 void	session_close_connection(struct peer *);
62 void	change_state(struct peer *, enum session_state, enum session_events);
63 int	session_setup_socket(struct peer *);
64 void	session_accept(int);
65 int	session_connect(struct peer *);
66 void	session_tcp_established(struct peer *);
67 void	session_capa_ann_none(struct peer *);
68 int	session_capa_add(struct buf *, u_int8_t, u_int8_t);
69 int	session_capa_add_mp(struct buf *, u_int8_t);
70 struct bgp_msg	*session_newmsg(enum msg_type, u_int16_t);
71 int	session_sendmsg(struct bgp_msg *, struct peer *);
72 void	session_open(struct peer *);
73 void	session_keepalive(struct peer *);
74 void	session_update(u_int32_t, void *, size_t);
75 void	session_notification(struct peer *, u_int8_t, u_int8_t, void *,
76 	    ssize_t);
77 void	session_rrefresh(struct peer *, u_int8_t);
78 int	session_dispatch_msg(struct pollfd *, struct peer *);
79 int	parse_header(struct peer *, u_char *, u_int16_t *, u_int8_t *);
80 int	parse_open(struct peer *);
81 int	parse_update(struct peer *);
82 int	parse_refresh(struct peer *);
83 int	parse_notification(struct peer *);
84 int	parse_capabilities(struct peer *, u_char *, u_int16_t, u_int32_t *);
85 int	capa_neg_calc(struct peer *);
86 void	session_dispatch_imsg(struct imsgbuf *, int, u_int *);
87 void	session_up(struct peer *);
88 void	session_down(struct peer *);
89 void	session_demote(struct peer *, int);
90 
91 int		 la_cmp(struct listen_addr *, struct listen_addr *);
92 struct peer	*getpeerbyip(struct sockaddr *);
93 int		 session_match_mask(struct peer *, struct bgpd_addr *);
94 struct peer	*getpeerbyid(u_int32_t);
95 
96 struct bgpd_config	*conf, *nconf;
97 struct bgpd_sysdep	 sysdep;
98 struct peer		*peers, *npeers;
99 volatile sig_atomic_t	 session_quit;
100 int			 pending_reconf;
101 int			 csock = -1, rcsock = -1;
102 u_int			 peer_cnt;
103 struct imsgbuf		*ibuf_rde;
104 struct imsgbuf		*ibuf_rde_ctl;
105 struct imsgbuf		*ibuf_main;
106 
107 struct mrt_head		 mrthead;
108 
109 void
110 session_sighdlr(int sig)
111 {
112 	switch (sig) {
113 	case SIGINT:
114 	case SIGTERM:
115 		session_quit = 1;
116 		break;
117 	}
118 }
119 
120 int
121 setup_listeners(u_int *la_cnt)
122 {
123 	int			 ttl = 255;
124 	int			 opt;
125 	struct listen_addr	*la;
126 	u_int			 cnt = 0;
127 
128 	TAILQ_FOREACH(la, conf->listen_addrs, entry) {
129 		la->reconf = RECONF_NONE;
130 		cnt++;
131 
132 		if (la->flags & LISTENER_LISTENING)
133 			continue;
134 
135 		if (la->fd == -1) {
136 			log_warn("cannot establish listener on %s: invalid fd",
137 			    log_sockaddr((struct sockaddr *)&la->sa));
138 			continue;
139 		}
140 
141 		opt = 1;
142 		if (setsockopt(la->fd, IPPROTO_TCP, TCP_MD5SIG,
143 		    &opt, sizeof(opt)) == -1) {
144 			if (errno == ENOPROTOOPT) {	/* system w/o md5sig */
145 				log_warnx("md5sig not available, disabling");
146 				sysdep.no_md5sig = 1;
147 			} else
148 				fatal("setsockopt TCP_MD5SIG");
149 		}
150 
151 		/* set ttl to 255 so that ttl-security works */
152 		if (la->sa.ss_family == AF_INET && setsockopt(la->fd,
153 		    IPPROTO_IP, IP_TTL, &ttl, sizeof(ttl)) == -1) {
154 			log_warn("setup_listeners setsockopt TTL");
155 			continue;
156 		}
157 
158 		session_socket_blockmode(la->fd, BM_NONBLOCK);
159 
160 		if (listen(la->fd, MAX_BACKLOG)) {
161 			close(la->fd);
162 			fatal("listen");
163 		}
164 
165 		la->flags |= LISTENER_LISTENING;
166 
167 		log_info("listening on %s",
168 		    log_sockaddr((struct sockaddr *)&la->sa));
169 	}
170 
171 	*la_cnt = cnt;
172 
173 	return (0);
174 }
175 
176 pid_t
177 session_main(int pipe_m2s[2], int pipe_s2r[2], int pipe_m2r[2],
178     int pipe_s2rctl[2], char *cname, char *rcname)
179 {
180 	int			 nfds, timeout;
181 	unsigned int		 i, j, idx_peers, idx_listeners, idx_mrts;
182 	pid_t			 pid;
183 	u_int			 pfd_elms = 0, peer_l_elms = 0, mrt_l_elms = 0;
184 	u_int			 listener_cnt, ctl_cnt, mrt_cnt;
185 	u_int			 new_cnt;
186 	u_int32_t		 ctl_queued;
187 	struct passwd		*pw;
188 	struct peer		*p, **peer_l = NULL, *last, *next;
189 	struct mrt		*m, *xm, **mrt_l = NULL;
190 	struct pollfd		*pfd = NULL;
191 	struct ctl_conn		*ctl_conn;
192 	struct listen_addr	*la;
193 	void			*newp;
194 	short			 events;
195 
196 	switch (pid = fork()) {
197 	case -1:
198 		fatal("cannot fork");
199 	case 0:
200 		break;
201 	default:
202 		return (pid);
203 	}
204 
205 	/* control socket is outside chroot */
206 	if ((csock = control_init(0, cname)) == -1)
207 		fatalx("control socket setup failed");
208 	if (rcname != NULL && (rcsock = control_init(1, rcname)) == -1)
209 		fatalx("control socket setup failed");
210 
211 	if ((pw = getpwnam(BGPD_USER)) == NULL)
212 		fatal(NULL);
213 
214 	if (chroot(pw->pw_dir) == -1)
215 		fatal("chroot");
216 	if (chdir("/") == -1)
217 		fatal("chdir(\"/\")");
218 
219 	setproctitle("session engine");
220 	bgpd_process = PROC_SE;
221 
222 	if (pfkey_init(&sysdep) == -1)
223 		fatalx("pfkey setup failed");
224 
225 	if (setgroups(1, &pw->pw_gid) ||
226 	    setresgid(pw->pw_gid, pw->pw_gid, pw->pw_gid) ||
227 	    setresuid(pw->pw_uid, pw->pw_uid, pw->pw_uid))
228 		fatal("can't drop privileges");
229 
230 	signal(SIGTERM, session_sighdlr);
231 	signal(SIGINT, session_sighdlr);
232 	signal(SIGPIPE, SIG_IGN);
233 	signal(SIGHUP, SIG_IGN);
234 	signal(SIGALRM, SIG_IGN);
235 	signal(SIGUSR1, SIG_IGN);
236 
237 	close(pipe_m2s[0]);
238 	close(pipe_s2r[1]);
239 	close(pipe_s2rctl[1]);
240 	close(pipe_m2r[0]);
241 	close(pipe_m2r[1]);
242 	if ((ibuf_rde = malloc(sizeof(struct imsgbuf))) == NULL ||
243 	    (ibuf_rde_ctl = malloc(sizeof(struct imsgbuf))) == NULL ||
244 	    (ibuf_main = malloc(sizeof(struct imsgbuf))) == NULL)
245 		fatal(NULL);
246 	imsg_init(ibuf_rde, pipe_s2r[0]);
247 	imsg_init(ibuf_rde_ctl, pipe_s2rctl[0]);
248 	imsg_init(ibuf_main, pipe_m2s[1]);
249 
250 	TAILQ_INIT(&ctl_conns);
251 	control_listen(csock);
252 	control_listen(rcsock);
253 	LIST_INIT(&mrthead);
254 	listener_cnt = 0;
255 	peer_cnt = 0;
256 	ctl_cnt = 0;
257 
258 	if ((conf = malloc(sizeof(struct bgpd_config))) == NULL)
259 		fatal(NULL);
260 	if ((conf->listen_addrs = calloc(1, sizeof(struct listen_addrs))) ==
261 	    NULL)
262 		fatal(NULL);
263 	TAILQ_INIT(conf->listen_addrs);
264 
265 	log_info("session engine ready");
266 
267 	while (session_quit == 0) {
268 		/* check for peers to be initialized or deleted */
269 		last = NULL;
270 		for (p = peers; p != NULL; p = next) {
271 			next = p->next;
272 			if (!pending_reconf) {
273 				/* cloned peer that idled out? */
274 				if (p->state == STATE_IDLE && p->conf.cloned &&
275 				    time(NULL) - p->stats.last_updown >=
276 				    INTERVAL_HOLD_CLONED)
277 					p->conf.reconf_action = RECONF_DELETE;
278 
279 				/* new peer that needs init? */
280 				if (p->state == STATE_NONE)
281 					init_peer(p);
282 
283 				/* reinit due? */
284 				if (p->conf.reconf_action == RECONF_REINIT) {
285 					session_stop(p, ERR_CEASE_ADMIN_RESET);
286 					if (!p->conf.down)
287 						timer_set(p, Timer_IdleHold, 0);
288 				}
289 
290 				/* deletion due? */
291 				if (p->conf.reconf_action == RECONF_DELETE) {
292 					if (p->demoted)
293 						session_demote(p, -1);
294 					p->conf.demote_group[0] = 0;
295 					session_stop(p, ERR_CEASE_PEER_UNCONF);
296 					log_peer_warnx(&p->conf, "removed");
297 					if (last != NULL)
298 						last->next = next;
299 					else
300 						peers = next;
301 					timer_remove_all(p);
302 					free(p);
303 					peer_cnt--;
304 					continue;
305 				}
306 				p->conf.reconf_action = RECONF_NONE;
307 			}
308 			last = p;
309 		}
310 
311 		if (peer_cnt > peer_l_elms) {
312 			if ((newp = realloc(peer_l, sizeof(struct peer *) *
313 			    peer_cnt)) == NULL) {
314 				/* panic for now  */
315 				log_warn("could not resize peer_l from %u -> %u"
316 				    " entries", peer_l_elms, peer_cnt);
317 				fatalx("exiting");
318 			}
319 			peer_l = newp;
320 			peer_l_elms = peer_cnt;
321 		}
322 
323 		mrt_cnt = 0;
324 		for (m = LIST_FIRST(&mrthead); m != NULL; m = xm) {
325 			xm = LIST_NEXT(m, entry);
326 			if (m->state == MRT_STATE_REMOVE) {
327 				mrt_clean(m);
328 				LIST_REMOVE(m, entry);
329 				free(m);
330 				continue;
331 			}
332 			if (m->wbuf.queued)
333 				mrt_cnt++;
334 		}
335 
336 		if (mrt_cnt > mrt_l_elms) {
337 			if ((newp = realloc(mrt_l, sizeof(struct mrt *) *
338 			    mrt_cnt)) == NULL) {
339 				/* panic for now  */
340 				log_warn("could not resize mrt_l from %u -> %u"
341 				    " entries", mrt_l_elms, mrt_cnt);
342 				fatalx("exiting");
343 			}
344 			mrt_l = newp;
345 			mrt_l_elms = mrt_cnt;
346 		}
347 
348 		new_cnt = PFD_LISTENERS_START + listener_cnt + peer_cnt +
349 		    ctl_cnt + mrt_cnt;
350 		if (new_cnt > pfd_elms) {
351 			if ((newp = realloc(pfd, sizeof(struct pollfd) *
352 			    new_cnt)) == NULL) {
353 				/* panic for now  */
354 				log_warn("could not resize pfd from %u -> %u"
355 				    " entries", pfd_elms, new_cnt);
356 				fatalx("exiting");
357 			}
358 			pfd = newp;
359 			pfd_elms = new_cnt;
360 		}
361 
362 		bzero(pfd, sizeof(struct pollfd) * pfd_elms);
363 		pfd[PFD_PIPE_MAIN].fd = ibuf_main->fd;
364 		pfd[PFD_PIPE_MAIN].events = POLLIN;
365 		if (ibuf_main->w.queued > 0)
366 			pfd[PFD_PIPE_MAIN].events |= POLLOUT;
367 		pfd[PFD_PIPE_ROUTE].fd = ibuf_rde->fd;
368 		pfd[PFD_PIPE_ROUTE].events = POLLIN;
369 		if (ibuf_rde->w.queued > 0)
370 			pfd[PFD_PIPE_ROUTE].events |= POLLOUT;
371 
372 		ctl_queued = 0;
373 		TAILQ_FOREACH(ctl_conn, &ctl_conns, entry)
374 			ctl_queued += ctl_conn->ibuf.w.queued;
375 
376 		pfd[PFD_PIPE_ROUTE_CTL].fd = ibuf_rde_ctl->fd;
377 		if (ctl_queued < SESSION_CTL_QUEUE_MAX)
378 			/*
379 			 * Do not act as unlimited buffer. Don't read in more
380 			 * messages if the ctl sockets are getting full.
381 			 */
382 			pfd[PFD_PIPE_ROUTE_CTL].events = POLLIN;
383 		pfd[PFD_SOCK_CTL].fd = csock;
384 		pfd[PFD_SOCK_CTL].events = POLLIN;
385 		pfd[PFD_SOCK_RCTL].fd = rcsock;
386 		pfd[PFD_SOCK_RCTL].events = POLLIN;
387 
388 		i = PFD_LISTENERS_START;
389 		TAILQ_FOREACH(la, conf->listen_addrs, entry) {
390 			pfd[i].fd = la->fd;
391 			pfd[i].events = POLLIN;
392 			i++;
393 		}
394 		idx_listeners = i;
395 		timeout = 240;	/* loop every 240s at least */
396 
397 		for (p = peers; p != NULL; p = p->next) {
398 			time_t	nextaction;
399 			struct peer_timer *pt;
400 
401 			/* check timers */
402 			if ((pt = timer_nextisdue(p)) != NULL) {
403 				switch (pt->type) {
404 				case Timer_Hold:
405 					bgp_fsm(p, EVNT_TIMER_HOLDTIME);
406 					break;
407 				case Timer_ConnectRetry:
408 					bgp_fsm(p, EVNT_TIMER_CONNRETRY);
409 					break;
410 				case Timer_Keepalive:
411 					bgp_fsm(p, EVNT_TIMER_KEEPALIVE);
412 					break;
413 				case Timer_IdleHold:
414 					bgp_fsm(p, EVNT_START);
415 					break;
416 				case Timer_IdleHoldReset:
417 					p->IdleHoldTime /= 2;
418 					if (p->IdleHoldTime <=
419 					    INTERVAL_IDLE_HOLD_INITIAL) {
420 						p->IdleHoldTime =
421 						    INTERVAL_IDLE_HOLD_INITIAL;
422 						timer_stop(p,
423 						    Timer_IdleHoldReset);
424 						p->errcnt = 0;
425 					} else
426 						timer_set(p,
427 						    Timer_IdleHoldReset,
428 						    p->IdleHoldTime);
429 					break;
430 				case Timer_CarpUndemote:
431 					timer_stop(p, Timer_CarpUndemote);
432 					if (p->demoted &&
433 					    p->state == STATE_ESTABLISHED)
434 						session_demote(p, -1);
435 					break;
436 				default:
437 					fatalx("King Bula lost in time");
438 				}
439 			}
440 			if ((nextaction = timer_nextduein(p)) != -1 &&
441 			    nextaction < timeout)
442 				timeout = nextaction;
443 
444 			/* are we waiting for a write? */
445 			events = POLLIN;
446 			if (p->wbuf.queued > 0 || p->state == STATE_CONNECT)
447 				events |= POLLOUT;
448 
449 			/* poll events */
450 			if (p->fd != -1 && events != 0) {
451 				pfd[i].fd = p->fd;
452 				pfd[i].events = events;
453 				peer_l[i - idx_listeners] = p;
454 				i++;
455 			}
456 		}
457 
458 		idx_peers = i;
459 
460 		LIST_FOREACH(m, &mrthead, entry)
461 			if (m->wbuf.queued) {
462 				pfd[i].fd = m->wbuf.fd;
463 				pfd[i].events = POLLOUT;
464 				mrt_l[i - idx_peers] = m;
465 				i++;
466 			}
467 
468 		idx_mrts = i;
469 
470 		TAILQ_FOREACH(ctl_conn, &ctl_conns, entry) {
471 			pfd[i].fd = ctl_conn->ibuf.fd;
472 			pfd[i].events = POLLIN;
473 			if (ctl_conn->ibuf.w.queued > 0)
474 				pfd[i].events |= POLLOUT;
475 			i++;
476 		}
477 
478 		if (timeout < 0)
479 			timeout = 0;
480 		if ((nfds = poll(pfd, i, timeout * 1000)) == -1)
481 			if (errno != EINTR)
482 				fatal("poll error");
483 
484 		if (nfds > 0 && pfd[PFD_PIPE_MAIN].revents & POLLOUT)
485 			if (msgbuf_write(&ibuf_main->w) < 0)
486 				fatal("pipe write error");
487 
488 		if (nfds > 0 && pfd[PFD_PIPE_MAIN].revents & POLLIN) {
489 			nfds--;
490 			session_dispatch_imsg(ibuf_main, PFD_PIPE_MAIN,
491 			    &listener_cnt);
492 		}
493 
494 		if (nfds > 0 && pfd[PFD_PIPE_ROUTE].revents & POLLOUT)
495 			if (msgbuf_write(&ibuf_rde->w) < 0)
496 				fatal("pipe write error");
497 
498 		if (nfds > 0 && pfd[PFD_PIPE_ROUTE].revents & POLLIN) {
499 			nfds--;
500 			session_dispatch_imsg(ibuf_rde, PFD_PIPE_ROUTE,
501 			    &listener_cnt);
502 		}
503 
504 		if (nfds > 0 && pfd[PFD_PIPE_ROUTE_CTL].revents & POLLIN) {
505 			nfds--;
506 			session_dispatch_imsg(ibuf_rde_ctl, PFD_PIPE_ROUTE_CTL,
507 			    &listener_cnt);
508 		}
509 
510 		if (nfds > 0 && pfd[PFD_SOCK_CTL].revents & POLLIN) {
511 			nfds--;
512 			ctl_cnt += control_accept(csock, 0);
513 		}
514 
515 		if (nfds > 0 && pfd[PFD_SOCK_RCTL].revents & POLLIN) {
516 			nfds--;
517 			ctl_cnt += control_accept(rcsock, 1);
518 		}
519 
520 		for (j = PFD_LISTENERS_START; nfds > 0 && j < idx_listeners;
521 		    j++)
522 			if (pfd[j].revents & POLLIN) {
523 				nfds--;
524 				session_accept(pfd[j].fd);
525 			}
526 
527 		for (; nfds > 0 && j < idx_peers; j++)
528 			nfds -= session_dispatch_msg(&pfd[j],
529 			    peer_l[j - idx_listeners]);
530 
531 		for (; nfds > 0 && j < idx_mrts; j++)
532 			if (pfd[j].revents & POLLOUT) {
533 				nfds--;
534 				mrt_write(mrt_l[j - idx_peers]);
535 			}
536 
537 		for (; nfds > 0 && j < i; j++)
538 			nfds -= control_dispatch_msg(&pfd[j], &ctl_cnt);
539 	}
540 
541 	while ((p = peers) != NULL) {
542 		peers = p->next;
543 		session_stop(p, ERR_CEASE_ADMIN_DOWN);
544 		pfkey_remove(p);
545 		free(p);
546 	}
547 
548 	while ((m = LIST_FIRST(&mrthead)) != NULL) {
549 		mrt_clean(m);
550 		LIST_REMOVE(m, entry);
551 		free(m);
552 	}
553 
554 	while ((la = TAILQ_FIRST(conf->listen_addrs)) != NULL) {
555 		TAILQ_REMOVE(conf->listen_addrs, la, entry);
556 		free(la);
557 	}
558 	free(conf->listen_addrs);
559 	free(peer_l);
560 	free(mrt_l);
561 	free(pfd);
562 
563 	msgbuf_write(&ibuf_rde->w);
564 	msgbuf_clear(&ibuf_rde->w);
565 	free(ibuf_rde);
566 	msgbuf_write(&ibuf_main->w);
567 	msgbuf_clear(&ibuf_main->w);
568 	free(ibuf_main);
569 
570 	control_shutdown(csock);
571 	control_shutdown(rcsock);
572 	log_info("session engine exiting");
573 	_exit(0);
574 }
575 
576 void
577 init_conf(struct bgpd_config *c)
578 {
579 	if (!c->holdtime)
580 		c->holdtime = INTERVAL_HOLD;
581 	if (!c->connectretry)
582 		c->connectretry = INTERVAL_CONNECTRETRY;
583 }
584 
585 void
586 init_peer(struct peer *p)
587 {
588 	TAILQ_INIT(&p->timers);
589 	p->fd = p->wbuf.fd = -1;
590 
591 	if (p->conf.if_depend[0])
592 		imsg_compose(ibuf_main, IMSG_IFINFO, 0, 0, -1,
593 		    p->conf.if_depend, sizeof(p->conf.if_depend));
594 	else
595 		p->depend_ok = 1;
596 
597 	peer_cnt++;
598 
599 	change_state(p, STATE_IDLE, EVNT_NONE);
600 	if (p->conf.down)
601 		timer_stop(p, Timer_IdleHold);		/* no autostart */
602 	else
603 		timer_set(p, Timer_IdleHold, 0);	/* start ASAP */
604 
605 	/*
606 	 * on startup, demote if requested.
607 	 * do not handle new peers. they must reach ESTABLISHED beforehands.
608 	 * peers added at runtime have reconf_action set to RECONF_REINIT.
609 	 */
610 	if (p->conf.reconf_action != RECONF_REINIT && p->conf.demote_group[0])
611 		session_demote(p, +1);
612 }
613 
614 void
615 bgp_fsm(struct peer *peer, enum session_events event)
616 {
617 	switch (peer->state) {
618 	case STATE_NONE:
619 		/* nothing */
620 		break;
621 	case STATE_IDLE:
622 		switch (event) {
623 		case EVNT_START:
624 			timer_stop(peer, Timer_Hold);
625 			timer_stop(peer, Timer_Keepalive);
626 			timer_stop(peer, Timer_IdleHold);
627 
628 			/* allocate read buffer */
629 			peer->rbuf = calloc(1, sizeof(struct buf_read));
630 			if (peer->rbuf == NULL)
631 				fatal(NULL);
632 
633 			/* init write buffer */
634 			msgbuf_init(&peer->wbuf);
635 
636 			/* init pfkey - remove old if any, load new ones */
637 			pfkey_remove(peer);
638 			if (pfkey_establish(peer) == -1) {
639 				log_peer_warnx(&peer->conf,
640 				    "pfkey setup failed");
641 				return;
642 			}
643 
644 			peer->stats.last_sent_errcode = 0;
645 			peer->stats.last_sent_suberr = 0;
646 
647 			if (!peer->depend_ok)
648 				timer_stop(peer, Timer_ConnectRetry);
649 			else if (peer->passive || peer->conf.passive ||
650 			    peer->conf.template) {
651 				change_state(peer, STATE_ACTIVE, event);
652 				timer_stop(peer, Timer_ConnectRetry);
653 			} else {
654 				change_state(peer, STATE_CONNECT, event);
655 				timer_set(peer, Timer_ConnectRetry,
656 				    conf->connectretry);
657 				session_connect(peer);
658 			}
659 			peer->passive = 0;
660 			break;
661 		default:
662 			/* ignore */
663 			break;
664 		}
665 		break;
666 	case STATE_CONNECT:
667 		switch (event) {
668 		case EVNT_START:
669 			/* ignore */
670 			break;
671 		case EVNT_CON_OPEN:
672 			session_tcp_established(peer);
673 			session_open(peer);
674 			timer_stop(peer, Timer_ConnectRetry);
675 			peer->holdtime = INTERVAL_HOLD_INITIAL;
676 			start_timer_holdtime(peer);
677 			change_state(peer, STATE_OPENSENT, event);
678 			break;
679 		case EVNT_CON_OPENFAIL:
680 			timer_set(peer, Timer_ConnectRetry,
681 			    conf->connectretry);
682 			session_close_connection(peer);
683 			change_state(peer, STATE_ACTIVE, event);
684 			break;
685 		case EVNT_TIMER_CONNRETRY:
686 			timer_set(peer, Timer_ConnectRetry,
687 			    conf->connectretry);
688 			session_connect(peer);
689 			break;
690 		default:
691 			change_state(peer, STATE_IDLE, event);
692 			break;
693 		}
694 		break;
695 	case STATE_ACTIVE:
696 		switch (event) {
697 		case EVNT_START:
698 			/* ignore */
699 			break;
700 		case EVNT_CON_OPEN:
701 			session_tcp_established(peer);
702 			session_open(peer);
703 			timer_stop(peer, Timer_ConnectRetry);
704 			peer->holdtime = INTERVAL_HOLD_INITIAL;
705 			start_timer_holdtime(peer);
706 			change_state(peer, STATE_OPENSENT, event);
707 			break;
708 		case EVNT_CON_OPENFAIL:
709 			timer_set(peer, Timer_ConnectRetry,
710 			    conf->connectretry);
711 			session_close_connection(peer);
712 			change_state(peer, STATE_ACTIVE, event);
713 			break;
714 		case EVNT_TIMER_CONNRETRY:
715 			timer_set(peer, Timer_ConnectRetry,
716 			    peer->holdtime);
717 			change_state(peer, STATE_CONNECT, event);
718 			session_connect(peer);
719 			break;
720 		default:
721 			change_state(peer, STATE_IDLE, event);
722 			break;
723 		}
724 		break;
725 	case STATE_OPENSENT:
726 		switch (event) {
727 		case EVNT_START:
728 			/* ignore */
729 			break;
730 		case EVNT_STOP:
731 			change_state(peer, STATE_IDLE, event);
732 			break;
733 		case EVNT_CON_CLOSED:
734 			session_close_connection(peer);
735 			timer_set(peer, Timer_ConnectRetry,
736 			    conf->connectretry);
737 			change_state(peer, STATE_ACTIVE, event);
738 			break;
739 		case EVNT_CON_FATAL:
740 			change_state(peer, STATE_IDLE, event);
741 			break;
742 		case EVNT_TIMER_HOLDTIME:
743 			session_notification(peer, ERR_HOLDTIMEREXPIRED,
744 			    0, NULL, 0);
745 			change_state(peer, STATE_IDLE, event);
746 			break;
747 		case EVNT_RCVD_OPEN:
748 			/* parse_open calls change_state itself on failure */
749 			if (parse_open(peer))
750 				break;
751 			session_keepalive(peer);
752 			change_state(peer, STATE_OPENCONFIRM, event);
753 			break;
754 		case EVNT_RCVD_NOTIFICATION:
755 			if (parse_notification(peer)) {
756 				change_state(peer, STATE_IDLE, event);
757 				/* don't punish, capa negotiation */
758 				timer_set(peer, Timer_IdleHold, 0);
759 				peer->IdleHoldTime /= 2;
760 			} else
761 				change_state(peer, STATE_IDLE, event);
762 			break;
763 		default:
764 			session_notification(peer, ERR_FSM, 0, NULL, 0);
765 			change_state(peer, STATE_IDLE, event);
766 			break;
767 		}
768 		break;
769 	case STATE_OPENCONFIRM:
770 		switch (event) {
771 		case EVNT_START:
772 			/* ignore */
773 			break;
774 		case EVNT_STOP:
775 			change_state(peer, STATE_IDLE, event);
776 			break;
777 		case EVNT_CON_CLOSED:
778 		case EVNT_CON_FATAL:
779 			change_state(peer, STATE_IDLE, event);
780 			break;
781 		case EVNT_TIMER_HOLDTIME:
782 			session_notification(peer, ERR_HOLDTIMEREXPIRED,
783 			    0, NULL, 0);
784 			change_state(peer, STATE_IDLE, event);
785 			break;
786 		case EVNT_TIMER_KEEPALIVE:
787 			session_keepalive(peer);
788 			break;
789 		case EVNT_RCVD_KEEPALIVE:
790 			start_timer_holdtime(peer);
791 			change_state(peer, STATE_ESTABLISHED, event);
792 			break;
793 		case EVNT_RCVD_NOTIFICATION:
794 			parse_notification(peer);
795 			change_state(peer, STATE_IDLE, event);
796 			break;
797 		default:
798 			session_notification(peer, ERR_FSM, 0, NULL, 0);
799 			change_state(peer, STATE_IDLE, event);
800 			break;
801 		}
802 		break;
803 	case STATE_ESTABLISHED:
804 		switch (event) {
805 		case EVNT_START:
806 			/* ignore */
807 			break;
808 		case EVNT_STOP:
809 			change_state(peer, STATE_IDLE, event);
810 			break;
811 		case EVNT_CON_CLOSED:
812 		case EVNT_CON_FATAL:
813 			change_state(peer, STATE_IDLE, event);
814 			break;
815 		case EVNT_TIMER_HOLDTIME:
816 			session_notification(peer, ERR_HOLDTIMEREXPIRED,
817 			    0, NULL, 0);
818 			change_state(peer, STATE_IDLE, event);
819 			break;
820 		case EVNT_TIMER_KEEPALIVE:
821 			session_keepalive(peer);
822 			break;
823 		case EVNT_RCVD_KEEPALIVE:
824 			start_timer_holdtime(peer);
825 			break;
826 		case EVNT_RCVD_UPDATE:
827 			start_timer_holdtime(peer);
828 			if (parse_update(peer))
829 				change_state(peer, STATE_IDLE, event);
830 			else
831 				start_timer_holdtime(peer);
832 			break;
833 		case EVNT_RCVD_NOTIFICATION:
834 			parse_notification(peer);
835 			change_state(peer, STATE_IDLE, event);
836 			break;
837 		default:
838 			session_notification(peer, ERR_FSM, 0, NULL, 0);
839 			change_state(peer, STATE_IDLE, event);
840 			break;
841 		}
842 		break;
843 	}
844 }
845 
846 void
847 start_timer_holdtime(struct peer *peer)
848 {
849 	if (peer->holdtime > 0)
850 		timer_set(peer, Timer_Hold, peer->holdtime);
851 	else
852 		timer_stop(peer, Timer_Hold);
853 }
854 
855 void
856 start_timer_keepalive(struct peer *peer)
857 {
858 	if (peer->holdtime > 0)
859 		timer_set(peer, Timer_Keepalive, peer->holdtime / 3);
860 	else
861 		timer_stop(peer, Timer_Keepalive);
862 }
863 
864 void
865 session_close_connection(struct peer *peer)
866 {
867 	if (peer->fd != -1)
868 		close(peer->fd);
869 
870 	peer->fd = peer->wbuf.fd = -1;
871 }
872 
873 void
874 change_state(struct peer *peer, enum session_state state,
875     enum session_events event)
876 {
877 	struct mrt	*mrt;
878 
879 	switch (state) {
880 	case STATE_IDLE:
881 		/* carp demotion first. new peers handled in init_peer */
882 		if (peer->state == STATE_ESTABLISHED &&
883 		    peer->conf.demote_group[0] && !peer->demoted)
884 			session_demote(peer, +1);
885 
886 		/*
887 		 * try to write out what's buffered (maybe a notification),
888 		 * don't bother if it fails
889 		 */
890 		if (peer->state >= STATE_OPENSENT && peer->wbuf.queued)
891 			msgbuf_write(&peer->wbuf);
892 
893 		/*
894 		 * we must start the timer for the next EVNT_START
895 		 * if we are coming here due to an error and the
896 		 * session was not established successfully before, the
897 		 * starttimerinterval needs to be exponentially increased
898 		 */
899 		if (peer->IdleHoldTime == 0)
900 			peer->IdleHoldTime = INTERVAL_IDLE_HOLD_INITIAL;
901 		peer->holdtime = INTERVAL_HOLD_INITIAL;
902 		timer_stop(peer, Timer_ConnectRetry);
903 		timer_stop(peer, Timer_Keepalive);
904 		timer_stop(peer, Timer_Hold);
905 		timer_stop(peer, Timer_IdleHold);
906 		timer_stop(peer, Timer_IdleHoldReset);
907 		session_close_connection(peer);
908 		msgbuf_clear(&peer->wbuf);
909 		free(peer->rbuf);
910 		peer->rbuf = NULL;
911 		bzero(&peer->capa.peer, sizeof(peer->capa.peer));
912 		if (peer->state == STATE_ESTABLISHED)
913 			session_down(peer);
914 		if (event != EVNT_STOP) {
915 			timer_set(peer, Timer_IdleHold, peer->IdleHoldTime);
916 			if (event != EVNT_NONE &&
917 			    peer->IdleHoldTime < MAX_IDLE_HOLD/2)
918 				peer->IdleHoldTime *= 2;
919 		}
920 		if (peer->state == STATE_NONE ||
921 		    peer->state == STATE_ESTABLISHED) {
922 			/* initialize capability negotiation structures */
923 			memcpy(&peer->capa.ann, &peer->conf.capabilities,
924 			    sizeof(peer->capa.ann));
925 			if (!peer->conf.announce_capa)
926 				session_capa_ann_none(peer);
927 		}
928 		break;
929 	case STATE_CONNECT:
930 		break;
931 	case STATE_ACTIVE:
932 		break;
933 	case STATE_OPENSENT:
934 		break;
935 	case STATE_OPENCONFIRM:
936 		break;
937 	case STATE_ESTABLISHED:
938 		timer_set(peer, Timer_IdleHoldReset, peer->IdleHoldTime);
939 		if (peer->demoted)
940 			timer_set(peer, Timer_CarpUndemote,
941 			    INTERVAL_HOLD_DEMOTED);
942 		session_up(peer);
943 		break;
944 	default:		/* something seriously fucked */
945 		break;
946 	}
947 
948 	log_statechange(peer, state, event);
949 	LIST_FOREACH(mrt, &mrthead, entry) {
950 		if (!(mrt->type == MRT_ALL_IN || mrt->type == MRT_ALL_OUT))
951 			continue;
952 		if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
953 		    mrt->peer_id == peer->conf.id || (mrt->group_id != 0 &&
954 		    mrt->group_id == peer->conf.groupid))
955 			mrt_dump_state(mrt, peer->state, state, peer);
956 	}
957 	peer->prev_state = peer->state;
958 	peer->state = state;
959 }
960 
961 void
962 session_accept(int listenfd)
963 {
964 	int			 connfd;
965 	int			 opt;
966 	socklen_t		 len;
967 	struct sockaddr_storage	 cliaddr;
968 	struct peer		*p = NULL;
969 
970 	len = sizeof(cliaddr);
971 	if ((connfd = accept(listenfd,
972 	    (struct sockaddr *)&cliaddr, &len)) == -1) {
973 		if (errno == EWOULDBLOCK || errno == EINTR)
974 			return;
975 		else
976 			log_warn("accept");
977 	}
978 
979 	p = getpeerbyip((struct sockaddr *)&cliaddr);
980 
981 	if (p != NULL && p->state == STATE_IDLE && p->errcnt < 2) {
982 		if (timer_running(p, Timer_IdleHold, NULL)) {
983 			/* fast reconnect after clear */
984 			p->passive = 1;
985 			bgp_fsm(p, EVNT_START);
986 		}
987 	}
988 
989 	if (p != NULL &&
990 	    (p->state == STATE_CONNECT || p->state == STATE_ACTIVE)) {
991 		if (p->fd != -1) {
992 			if (p->state == STATE_CONNECT)
993 				session_close_connection(p);
994 			else {
995 				close(connfd);
996 				return;
997 			}
998 		}
999 
1000 		if (p->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) {
1001 			log_peer_warnx(&p->conf,
1002 			    "ipsec or md5sig configured but not available");
1003 			close(connfd);
1004 			return;
1005 		}
1006 
1007 		if (p->conf.auth.method == AUTH_MD5SIG) {
1008 			if (sysdep.no_md5sig) {
1009 				log_peer_warnx(&p->conf,
1010 				    "md5sig configured but not available");
1011 				close(connfd);
1012 				return;
1013 			}
1014 			len = sizeof(opt);
1015 			if (getsockopt(connfd, IPPROTO_TCP, TCP_MD5SIG,
1016 			    &opt, &len) == -1)
1017 				fatal("getsockopt TCP_MD5SIG");
1018 			if (!opt) {	/* non-md5'd connection! */
1019 				log_peer_warnx(&p->conf,
1020 				    "connection attempt without md5 signature");
1021 				close(connfd);
1022 				return;
1023 			}
1024 		}
1025 		p->fd = p->wbuf.fd = connfd;
1026 		if (session_setup_socket(p)) {
1027 			close(connfd);
1028 			return;
1029 		}
1030 		session_socket_blockmode(connfd, BM_NONBLOCK);
1031 		bgp_fsm(p, EVNT_CON_OPEN);
1032 	} else {
1033 		log_conn_attempt(p, (struct sockaddr *)&cliaddr);
1034 		close(connfd);
1035 	}
1036 }
1037 
1038 int
1039 session_connect(struct peer *peer)
1040 {
1041 	int			 opt = 1;
1042 	struct sockaddr		*sa;
1043 
1044 	/*
1045 	 * we do not need the overcomplicated collision detection RFC 1771
1046 	 * describes; we simply make sure there is only ever one concurrent
1047 	 * tcp connection per peer.
1048 	 */
1049 	if (peer->fd != -1)
1050 		return (-1);
1051 
1052 	if ((peer->fd = socket(aid2af(peer->conf.remote_addr.aid), SOCK_STREAM,
1053 	    IPPROTO_TCP)) == -1) {
1054 		log_peer_warn(&peer->conf, "session_connect socket");
1055 		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1056 		return (-1);
1057 	}
1058 
1059 	if (peer->conf.auth.method != AUTH_NONE && sysdep.no_pfkey) {
1060 		log_peer_warnx(&peer->conf,
1061 		    "ipsec or md5sig configured but not available");
1062 		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1063 		return (-1);
1064 	}
1065 
1066 	if (peer->conf.auth.method == AUTH_MD5SIG) {
1067 		if (sysdep.no_md5sig) {
1068 			log_peer_warnx(&peer->conf,
1069 			    "md5sig configured but not available");
1070 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1071 			return (-1);
1072 		}
1073 		if (setsockopt(peer->fd, IPPROTO_TCP, TCP_MD5SIG,
1074 		    &opt, sizeof(opt)) == -1) {
1075 			log_peer_warn(&peer->conf, "setsockopt md5sig");
1076 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1077 			return (-1);
1078 		}
1079 	}
1080 	peer->wbuf.fd = peer->fd;
1081 
1082 	/* if update source is set we need to bind() */
1083 	if ((sa = addr2sa(&peer->conf.local_addr, 0)) != NULL) {
1084 		if (bind(peer->fd, sa, sa->sa_len) == -1) {
1085 			log_peer_warn(&peer->conf, "session_connect bind");
1086 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1087 			return (-1);
1088 		}
1089 	}
1090 
1091 	if (session_setup_socket(peer)) {
1092 		bgp_fsm(peer, EVNT_CON_OPENFAIL);
1093 		return (-1);
1094 	}
1095 
1096 	session_socket_blockmode(peer->fd, BM_NONBLOCK);
1097 
1098 	sa = addr2sa(&peer->conf.remote_addr, BGP_PORT);
1099 	if (connect(peer->fd, sa, sa->sa_len) == -1) {
1100 		if (errno != EINPROGRESS) {
1101 			if (errno != peer->lasterr)
1102 				log_peer_warn(&peer->conf, "connect");
1103 			peer->lasterr = errno;
1104 			bgp_fsm(peer, EVNT_CON_OPENFAIL);
1105 			return (-1);
1106 		}
1107 	} else
1108 		bgp_fsm(peer, EVNT_CON_OPEN);
1109 
1110 	return (0);
1111 }
1112 
1113 int
1114 session_setup_socket(struct peer *p)
1115 {
1116 	int	ttl = p->conf.distance;
1117 	int	pre = IPTOS_PREC_INTERNETCONTROL;
1118 	int	nodelay = 1;
1119 	int	bsize;
1120 
1121 	switch (p->conf.remote_addr.aid) {
1122 	case AID_INET:
1123 		/* set precedence, see RFC 1771 appendix 5 */
1124 		if (setsockopt(p->fd, IPPROTO_IP, IP_TOS, &pre, sizeof(pre)) ==
1125 		    -1) {
1126 			log_peer_warn(&p->conf,
1127 			    "session_setup_socket setsockopt TOS");
1128 			return (-1);
1129 		}
1130 
1131 		if (p->conf.ebgp) {
1132 			/* set TTL to foreign router's distance
1133 			   1=direct n=multihop with ttlsec, we always use 255 */
1134 			if (p->conf.ttlsec) {
1135 				ttl = 256 - p->conf.distance;
1136 				if (setsockopt(p->fd, IPPROTO_IP, IP_MINTTL,
1137 				    &ttl, sizeof(ttl)) == -1) {
1138 					log_peer_warn(&p->conf,
1139 					    "session_setup_socket: "
1140 					    "setsockopt MINTTL");
1141 					return (-1);
1142 				}
1143 				ttl = 255;
1144 			}
1145 
1146 			if (setsockopt(p->fd, IPPROTO_IP, IP_TTL, &ttl,
1147 			    sizeof(ttl)) == -1) {
1148 				log_peer_warn(&p->conf,
1149 				    "session_setup_socket setsockopt TTL");
1150 				return (-1);
1151 			}
1152 		}
1153 		break;
1154 	case AID_INET6:
1155 		if (p->conf.ebgp) {
1156 			/* set hoplimit to foreign router's distance */
1157 			if (setsockopt(p->fd, IPPROTO_IPV6, IPV6_UNICAST_HOPS,
1158 			    &ttl, sizeof(ttl)) == -1) {
1159 				log_peer_warn(&p->conf,
1160 				    "session_setup_socket setsockopt hoplimit");
1161 				return (-1);
1162 			}
1163 		}
1164 		break;
1165 	}
1166 
1167 	/* set TCP_NODELAY */
1168 	if (setsockopt(p->fd, IPPROTO_TCP, TCP_NODELAY, &nodelay,
1169 	    sizeof(nodelay)) == -1) {
1170 		log_peer_warn(&p->conf,
1171 		    "session_setup_socket setsockopt TCP_NODELAY");
1172 		return (-1);
1173 	}
1174 
1175 	/* only increase bufsize (and thus window) if md5 or ipsec is in use */
1176 	if (p->conf.auth.method != AUTH_NONE) {
1177 		/* try to increase bufsize. no biggie if it fails */
1178 		bsize = 65535;
1179 		while (setsockopt(p->fd, SOL_SOCKET, SO_RCVBUF, &bsize,
1180 		    sizeof(bsize)) == -1)
1181 			bsize /= 2;
1182 		bsize = 65535;
1183 		while (setsockopt(p->fd, SOL_SOCKET, SO_SNDBUF, &bsize,
1184 		    sizeof(bsize)) == -1)
1185 			bsize /= 2;
1186 	}
1187 
1188 	return (0);
1189 }
1190 
1191 void
1192 session_socket_blockmode(int fd, enum blockmodes bm)
1193 {
1194 	int	flags;
1195 
1196 	if ((flags = fcntl(fd, F_GETFL, 0)) == -1)
1197 		fatal("fcntl F_GETFL");
1198 
1199 	if (bm == BM_NONBLOCK)
1200 		flags |= O_NONBLOCK;
1201 	else
1202 		flags &= ~O_NONBLOCK;
1203 
1204 	if ((flags = fcntl(fd, F_SETFL, flags)) == -1)
1205 		fatal("fcntl F_SETFL");
1206 }
1207 
1208 void
1209 session_tcp_established(struct peer *peer)
1210 {
1211 	socklen_t	len;
1212 
1213 	len = sizeof(peer->sa_local);
1214 	if (getsockname(peer->fd, (struct sockaddr *)&peer->sa_local,
1215 	    &len) == -1)
1216 		log_warn("getsockname");
1217 	len = sizeof(peer->sa_remote);
1218 	if (getpeername(peer->fd, (struct sockaddr *)&peer->sa_remote,
1219 	    &len) == -1)
1220 		log_warn("getpeername");
1221 }
1222 
1223 void
1224 session_capa_ann_none(struct peer *peer)
1225 {
1226 	bzero(&peer->capa.ann, sizeof(peer->capa.ann));
1227 }
1228 
1229 int
1230 session_capa_add(struct buf *opb, u_int8_t capa_code, u_int8_t capa_len)
1231 {
1232 	int errs = 0;
1233 
1234 	errs += buf_add(opb, &capa_code, sizeof(capa_code));
1235 	errs += buf_add(opb, &capa_len, sizeof(capa_len));
1236 	return (errs);
1237 }
1238 
1239 int
1240 session_capa_add_mp(struct buf *buf, u_int8_t aid)
1241 {
1242 	u_int8_t		 safi, pad = 0;
1243 	u_int16_t		 afi;
1244 	int			 errs = 0;
1245 
1246 	if (aid2afi(aid, &afi, &safi) == -1)
1247 		fatalx("session_capa_add_mp: bad afi/safi pair");
1248 	afi = htons(afi);
1249 	errs += buf_add(buf, &afi, sizeof(afi));
1250 	errs += buf_add(buf, &pad, sizeof(pad));
1251 	errs += buf_add(buf, &safi, sizeof(safi));
1252 
1253 	return (errs);
1254 }
1255 
1256 struct bgp_msg *
1257 session_newmsg(enum msg_type msgtype, u_int16_t len)
1258 {
1259 	struct bgp_msg		*msg;
1260 	struct msg_header	 hdr;
1261 	struct buf		*buf;
1262 	int			 errs = 0;
1263 
1264 	memset(&hdr.marker, 0xff, sizeof(hdr.marker));
1265 	hdr.len = htons(len);
1266 	hdr.type = msgtype;
1267 
1268 	if ((buf = buf_open(len)) == NULL)
1269 		return (NULL);
1270 
1271 	errs += buf_add(buf, &hdr.marker, sizeof(hdr.marker));
1272 	errs += buf_add(buf, &hdr.len, sizeof(hdr.len));
1273 	errs += buf_add(buf, &hdr.type, sizeof(hdr.type));
1274 
1275 	if (errs > 0 ||
1276 	    (msg = calloc(1, sizeof(*msg))) == NULL) {
1277 		buf_free(buf);
1278 		return (NULL);
1279 	}
1280 
1281 	msg->buf = buf;
1282 	msg->type = msgtype;
1283 	msg->len = len;
1284 
1285 	return (msg);
1286 }
1287 
1288 int
1289 session_sendmsg(struct bgp_msg *msg, struct peer *p)
1290 {
1291 	struct mrt		*mrt;
1292 
1293 	LIST_FOREACH(mrt, &mrthead, entry) {
1294 		if (!(mrt->type == MRT_ALL_OUT || (msg->type == UPDATE &&
1295 		    mrt->type == MRT_UPDATE_OUT)))
1296 			continue;
1297 		if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
1298 		    mrt->peer_id == p->conf.id || (mrt->group_id == 0 &&
1299 		    mrt->group_id == p->conf.groupid))
1300 			mrt_dump_bgp_msg(mrt, msg->buf->buf, msg->len, p);
1301 	}
1302 
1303 	buf_close(&p->wbuf, msg->buf);
1304 	free(msg);
1305 	return (0);
1306 }
1307 
1308 void
1309 session_open(struct peer *p)
1310 {
1311 	struct bgp_msg		*buf;
1312 	struct buf		*opb;
1313 	struct msg_open		 msg;
1314 	u_int16_t		 len;
1315 	u_int8_t		 i, op_type, optparamlen = 0;
1316 	u_int			 errs = 0;
1317 
1318 
1319 	if ((opb = buf_dynamic(0, UCHAR_MAX - sizeof(op_type) -
1320 	    sizeof(optparamlen))) == NULL) {
1321 		bgp_fsm(p, EVNT_CON_FATAL);
1322 		return;
1323 	}
1324 
1325 	/* multiprotocol extensions, RFC 4760 */
1326 	for (i = 0; i < AID_MAX; i++)
1327 		if (p->capa.ann.mp[i]) {	/* 4 bytes data */
1328 			errs += session_capa_add(opb, CAPA_MP, 4);
1329 			errs += session_capa_add_mp(opb, i);
1330 		}
1331 
1332 	/* route refresh, RFC 2918 */
1333 	if (p->capa.ann.refresh)	/* no data */
1334 		errs += session_capa_add(opb, CAPA_REFRESH, 0);
1335 
1336 	/* End-of-RIB marker, RFC 4724 */
1337 	if (p->capa.ann.restart) {	/* 2 bytes data */
1338 		u_char		c[2];
1339 
1340 		c[0] = 0x80; /* we're always restarting */
1341 		c[1] = 0;
1342 		errs += session_capa_add(opb, CAPA_RESTART, 2);
1343 		errs += buf_add(opb, &c, 2);
1344 	}
1345 
1346 	/* 4-bytes AS numbers, draft-ietf-idr-as4bytes-13 */
1347 	if (p->capa.ann.as4byte) {	/* 4 bytes data */
1348 		u_int32_t	nas;
1349 
1350 		nas = htonl(conf->as);
1351 		errs += session_capa_add(opb, CAPA_AS4BYTE, sizeof(nas));
1352 		errs += buf_add(opb, &nas, sizeof(nas));
1353 	}
1354 
1355 	if (buf_size(opb))
1356 		optparamlen = buf_size(opb) + sizeof(op_type) +
1357 		    sizeof(optparamlen);
1358 
1359 	len = MSGSIZE_OPEN_MIN + optparamlen;
1360 	if (errs || (buf = session_newmsg(OPEN, len)) == NULL) {
1361 		buf_free(opb);
1362 		bgp_fsm(p, EVNT_CON_FATAL);
1363 		return;
1364 	}
1365 
1366 	msg.version = 4;
1367 	msg.myas = htons(conf->short_as);
1368 	if (p->conf.holdtime)
1369 		msg.holdtime = htons(p->conf.holdtime);
1370 	else
1371 		msg.holdtime = htons(conf->holdtime);
1372 	msg.bgpid = conf->bgpid;	/* is already in network byte order */
1373 	msg.optparamlen = optparamlen;
1374 
1375 	errs += buf_add(buf->buf, &msg.version, sizeof(msg.version));
1376 	errs += buf_add(buf->buf, &msg.myas, sizeof(msg.myas));
1377 	errs += buf_add(buf->buf, &msg.holdtime, sizeof(msg.holdtime));
1378 	errs += buf_add(buf->buf, &msg.bgpid, sizeof(msg.bgpid));
1379 	errs += buf_add(buf->buf, &msg.optparamlen, sizeof(msg.optparamlen));
1380 
1381 	if (optparamlen) {
1382 		op_type = OPT_PARAM_CAPABILITIES;
1383 		optparamlen = buf_size(opb);
1384 		errs += buf_add(buf->buf, &op_type, sizeof(op_type));
1385 		errs += buf_add(buf->buf, &optparamlen, sizeof(optparamlen));
1386 		errs += buf_add(buf->buf, opb->buf, buf_size(opb));
1387 	}
1388 
1389 	buf_free(opb);
1390 
1391 	if (errs > 0) {
1392 		buf_free(buf->buf);
1393 		free(buf);
1394 		bgp_fsm(p, EVNT_CON_FATAL);
1395 		return;
1396 	}
1397 
1398 	if (session_sendmsg(buf, p) == -1) {
1399 		bgp_fsm(p, EVNT_CON_FATAL);
1400 		return;
1401 	}
1402 
1403 	p->stats.msg_sent_open++;
1404 }
1405 
1406 void
1407 session_keepalive(struct peer *p)
1408 {
1409 	struct bgp_msg		*buf;
1410 
1411 	if ((buf = session_newmsg(KEEPALIVE, MSGSIZE_KEEPALIVE)) == NULL ||
1412 	    session_sendmsg(buf, p) == -1) {
1413 		bgp_fsm(p, EVNT_CON_FATAL);
1414 		return;
1415 	}
1416 
1417 	start_timer_keepalive(p);
1418 	p->stats.msg_sent_keepalive++;
1419 }
1420 
1421 void
1422 session_update(u_int32_t peerid, void *data, size_t datalen)
1423 {
1424 	struct peer		*p;
1425 	struct bgp_msg		*buf;
1426 
1427 	if ((p = getpeerbyid(peerid)) == NULL) {
1428 		log_warnx("no such peer: id=%u", peerid);
1429 		return;
1430 	}
1431 
1432 	if (p->state != STATE_ESTABLISHED)
1433 		return;
1434 
1435 	if ((buf = session_newmsg(UPDATE, MSGSIZE_HEADER + datalen)) == NULL) {
1436 		bgp_fsm(p, EVNT_CON_FATAL);
1437 		return;
1438 	}
1439 
1440 	if (buf_add(buf->buf, data, datalen)) {
1441 		buf_free(buf->buf);
1442 		free(buf);
1443 		bgp_fsm(p, EVNT_CON_FATAL);
1444 		return;
1445 	}
1446 
1447 	if (session_sendmsg(buf, p) == -1) {
1448 		bgp_fsm(p, EVNT_CON_FATAL);
1449 		return;
1450 	}
1451 
1452 	start_timer_keepalive(p);
1453 	p->stats.msg_sent_update++;
1454 }
1455 
1456 void
1457 session_notification(struct peer *p, u_int8_t errcode, u_int8_t subcode,
1458     void *data, ssize_t datalen)
1459 {
1460 	struct bgp_msg		*buf;
1461 	u_int			 errs = 0;
1462 
1463 	if (p->stats.last_sent_errcode)	/* some notification already sent */
1464 		return;
1465 
1466 	if ((buf = session_newmsg(NOTIFICATION,
1467 	    MSGSIZE_NOTIFICATION_MIN + datalen)) == NULL) {
1468 		bgp_fsm(p, EVNT_CON_FATAL);
1469 		return;
1470 	}
1471 
1472 	errs += buf_add(buf->buf, &errcode, sizeof(errcode));
1473 	errs += buf_add(buf->buf, &subcode, sizeof(subcode));
1474 
1475 	if (datalen > 0)
1476 		errs += buf_add(buf->buf, data, datalen);
1477 
1478 	if (errs > 0) {
1479 		buf_free(buf->buf);
1480 		free(buf);
1481 		bgp_fsm(p, EVNT_CON_FATAL);
1482 		return;
1483 	}
1484 
1485 	if (session_sendmsg(buf, p) == -1) {
1486 		bgp_fsm(p, EVNT_CON_FATAL);
1487 		return;
1488 	}
1489 
1490 	p->stats.msg_sent_notification++;
1491 	p->stats.last_sent_errcode = errcode;
1492 	p->stats.last_sent_suberr = subcode;
1493 }
1494 
1495 int
1496 session_neighbor_rrefresh(struct peer *p)
1497 {
1498 	u_int8_t	i;
1499 
1500 	if (!p->capa.peer.refresh)
1501 		return (-1);
1502 
1503 	for (i = 0; i < AID_MAX; i++) {
1504 		if (p->capa.peer.mp[i] != 0)
1505 			session_rrefresh(p, i);
1506 	}
1507 
1508 	return (0);
1509 }
1510 
1511 void
1512 session_rrefresh(struct peer *p, u_int8_t aid)
1513 {
1514 	struct bgp_msg		*buf;
1515 	int			 errs = 0;
1516 	u_int16_t		 afi;
1517 	u_int8_t		 safi, null8 = 0;
1518 
1519 	if (aid2afi(aid, &afi, &safi) == -1)
1520 		fatalx("session_rrefresh: bad afi/safi pair");
1521 
1522 	if ((buf = session_newmsg(RREFRESH, MSGSIZE_RREFRESH)) == NULL) {
1523 		bgp_fsm(p, EVNT_CON_FATAL);
1524 		return;
1525 	}
1526 
1527 	afi = htons(afi);
1528 	errs += buf_add(buf->buf, &afi, sizeof(afi));
1529 	errs += buf_add(buf->buf, &null8, sizeof(null8));
1530 	errs += buf_add(buf->buf, &safi, sizeof(safi));
1531 
1532 	if (errs > 0) {
1533 		buf_free(buf->buf);
1534 		free(buf);
1535 		bgp_fsm(p, EVNT_CON_FATAL);
1536 		return;
1537 	}
1538 
1539 	if (session_sendmsg(buf, p) == -1) {
1540 		bgp_fsm(p, EVNT_CON_FATAL);
1541 		return;
1542 	}
1543 
1544 	p->stats.msg_sent_rrefresh++;
1545 }
1546 
1547 int
1548 session_dispatch_msg(struct pollfd *pfd, struct peer *p)
1549 {
1550 	ssize_t		n, rpos, av, left;
1551 	socklen_t	len;
1552 	int		error, processed = 0;
1553 	u_int16_t	msglen;
1554 	u_int8_t	msgtype;
1555 
1556 	if (p->state == STATE_CONNECT) {
1557 		if (pfd->revents & POLLOUT) {
1558 			if (pfd->revents & POLLIN) {
1559 				/* error occurred */
1560 				len = sizeof(error);
1561 				if (getsockopt(pfd->fd, SOL_SOCKET, SO_ERROR,
1562 				    &error, &len) == -1 || error) {
1563 					if (error)
1564 						errno = error;
1565 					if (errno != p->lasterr) {
1566 						log_peer_warn(&p->conf,
1567 						    "socket error");
1568 						p->lasterr = errno;
1569 					}
1570 					bgp_fsm(p, EVNT_CON_OPENFAIL);
1571 					return (1);
1572 				}
1573 			}
1574 			bgp_fsm(p, EVNT_CON_OPEN);
1575 			return (1);
1576 		}
1577 		if (pfd->revents & POLLHUP) {
1578 			bgp_fsm(p, EVNT_CON_OPENFAIL);
1579 			return (1);
1580 		}
1581 		if (pfd->revents & (POLLERR|POLLNVAL)) {
1582 			bgp_fsm(p, EVNT_CON_FATAL);
1583 			return (1);
1584 		}
1585 		return (0);
1586 	}
1587 
1588 	if (pfd->revents & POLLHUP) {
1589 		bgp_fsm(p, EVNT_CON_CLOSED);
1590 		return (1);
1591 	}
1592 	if (pfd->revents & (POLLERR|POLLNVAL)) {
1593 		bgp_fsm(p, EVNT_CON_FATAL);
1594 		return (1);
1595 	}
1596 
1597 	if (pfd->revents & POLLOUT && p->wbuf.queued) {
1598 		if ((error = msgbuf_write(&p->wbuf)) < 0) {
1599 			if (error == -2)
1600 				log_peer_warnx(&p->conf, "Connection closed");
1601 			else
1602 				log_peer_warn(&p->conf, "write error");
1603 			bgp_fsm(p, EVNT_CON_FATAL);
1604 			return (1);
1605 		}
1606 		if (!(pfd->revents & POLLIN))
1607 			return (1);
1608 	}
1609 
1610 	if (p->rbuf && pfd->revents & POLLIN) {
1611 		if ((n = read(p->fd, p->rbuf->buf + p->rbuf->wpos,
1612 		    sizeof(p->rbuf->buf) - p->rbuf->wpos)) == -1) {
1613 			if (errno != EINTR && errno != EAGAIN) {
1614 				log_peer_warn(&p->conf, "read error");
1615 				bgp_fsm(p, EVNT_CON_FATAL);
1616 			}
1617 			return (1);
1618 		}
1619 		if (n == 0) {	/* connection closed */
1620 			bgp_fsm(p, EVNT_CON_CLOSED);
1621 			return (1);
1622 		}
1623 
1624 		rpos = 0;
1625 		av = p->rbuf->wpos + n;
1626 		p->stats.last_read = time(NULL);
1627 
1628 		/*
1629 		 * session might drop to IDLE -> buffers deallocated
1630 		 * we MUST check rbuf != NULL before use
1631 		 */
1632 		for (;;) {
1633 			if (rpos + MSGSIZE_HEADER > av)
1634 				break;
1635 			if (p->rbuf == NULL)
1636 				break;
1637 			if (parse_header(p, p->rbuf->buf + rpos, &msglen,
1638 			    &msgtype) == -1)
1639 				return (0);
1640 			if (rpos + msglen > av)
1641 				break;
1642 			p->rbuf->rptr = p->rbuf->buf + rpos;
1643 
1644 			switch (msgtype) {
1645 			case OPEN:
1646 				bgp_fsm(p, EVNT_RCVD_OPEN);
1647 				p->stats.msg_rcvd_open++;
1648 				break;
1649 			case UPDATE:
1650 				bgp_fsm(p, EVNT_RCVD_UPDATE);
1651 				p->stats.msg_rcvd_update++;
1652 				break;
1653 			case NOTIFICATION:
1654 				bgp_fsm(p, EVNT_RCVD_NOTIFICATION);
1655 				p->stats.msg_rcvd_notification++;
1656 				break;
1657 			case KEEPALIVE:
1658 				bgp_fsm(p, EVNT_RCVD_KEEPALIVE);
1659 				p->stats.msg_rcvd_keepalive++;
1660 				break;
1661 			case RREFRESH:
1662 				parse_refresh(p);
1663 				p->stats.msg_rcvd_rrefresh++;
1664 				break;
1665 			default:	/* cannot happen */
1666 				session_notification(p, ERR_HEADER,
1667 				    ERR_HDR_TYPE, &msgtype, 1);
1668 				log_warnx("received message with "
1669 				    "unknown type %u", msgtype);
1670 				bgp_fsm(p, EVNT_CON_FATAL);
1671 			}
1672 			rpos += msglen;
1673 			if (++processed > MSG_PROCESS_LIMIT)
1674 				break;
1675 		}
1676 		if (p->rbuf == NULL)
1677 			return (1);
1678 
1679 		if (rpos < av) {
1680 			left = av - rpos;
1681 			memcpy(&p->rbuf->buf, p->rbuf->buf + rpos, left);
1682 			p->rbuf->wpos = left;
1683 		} else
1684 			p->rbuf->wpos = 0;
1685 
1686 		return (1);
1687 	}
1688 	return (0);
1689 }
1690 
1691 int
1692 parse_header(struct peer *peer, u_char *data, u_int16_t *len, u_int8_t *type)
1693 {
1694 	struct mrt		*mrt;
1695 	u_char			*p;
1696 	u_int16_t		 olen;
1697 	static const u_int8_t	 marker[MSGSIZE_HEADER_MARKER] = { 0xff, 0xff,
1698 				    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1699 				    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
1700 
1701 	/* caller MUST make sure we are getting 19 bytes! */
1702 	p = data;
1703 	if (memcmp(p, marker, sizeof(marker))) {
1704 		log_peer_warnx(&peer->conf, "sync error");
1705 		session_notification(peer, ERR_HEADER, ERR_HDR_SYNC, NULL, 0);
1706 		bgp_fsm(peer, EVNT_CON_FATAL);
1707 		return (-1);
1708 	}
1709 	p += MSGSIZE_HEADER_MARKER;
1710 
1711 	memcpy(&olen, p, 2);
1712 	*len = ntohs(olen);
1713 	p += 2;
1714 	memcpy(type, p, 1);
1715 
1716 	if (*len < MSGSIZE_HEADER || *len > MAX_PKTSIZE) {
1717 		log_peer_warnx(&peer->conf,
1718 		    "received message: illegal length: %u byte", *len);
1719 		session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1720 		    &olen, sizeof(olen));
1721 		bgp_fsm(peer, EVNT_CON_FATAL);
1722 		return (-1);
1723 	}
1724 
1725 	switch (*type) {
1726 	case OPEN:
1727 		if (*len < MSGSIZE_OPEN_MIN) {
1728 			log_peer_warnx(&peer->conf,
1729 			    "received OPEN: illegal len: %u byte", *len);
1730 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1731 			    &olen, sizeof(olen));
1732 			bgp_fsm(peer, EVNT_CON_FATAL);
1733 			return (-1);
1734 		}
1735 		break;
1736 	case NOTIFICATION:
1737 		if (*len < MSGSIZE_NOTIFICATION_MIN) {
1738 			log_peer_warnx(&peer->conf,
1739 			    "received NOTIFICATION: illegal len: %u byte",
1740 			    *len);
1741 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1742 			    &olen, sizeof(olen));
1743 			bgp_fsm(peer, EVNT_CON_FATAL);
1744 			return (-1);
1745 		}
1746 		break;
1747 	case UPDATE:
1748 		if (*len < MSGSIZE_UPDATE_MIN) {
1749 			log_peer_warnx(&peer->conf,
1750 			    "received UPDATE: illegal len: %u byte", *len);
1751 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1752 			    &olen, sizeof(olen));
1753 			bgp_fsm(peer, EVNT_CON_FATAL);
1754 			return (-1);
1755 		}
1756 		break;
1757 	case KEEPALIVE:
1758 		if (*len != MSGSIZE_KEEPALIVE) {
1759 			log_peer_warnx(&peer->conf,
1760 			    "received KEEPALIVE: illegal len: %u byte", *len);
1761 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1762 			    &olen, sizeof(olen));
1763 			bgp_fsm(peer, EVNT_CON_FATAL);
1764 			return (-1);
1765 		}
1766 		break;
1767 	case RREFRESH:
1768 		if (*len != MSGSIZE_RREFRESH) {
1769 			log_peer_warnx(&peer->conf,
1770 			    "received RREFRESH: illegal len: %u byte", *len);
1771 			session_notification(peer, ERR_HEADER, ERR_HDR_LEN,
1772 			    &olen, sizeof(olen));
1773 			bgp_fsm(peer, EVNT_CON_FATAL);
1774 			return (-1);
1775 		}
1776 		break;
1777 	default:
1778 		log_peer_warnx(&peer->conf,
1779 		    "received msg with unknown type %u", *type);
1780 		session_notification(peer, ERR_HEADER, ERR_HDR_TYPE,
1781 		    type, 1);
1782 		bgp_fsm(peer, EVNT_CON_FATAL);
1783 		return (-1);
1784 	}
1785 	LIST_FOREACH(mrt, &mrthead, entry) {
1786 		if (!(mrt->type == MRT_ALL_IN || (*type == UPDATE &&
1787 		    mrt->type == MRT_UPDATE_IN)))
1788 			continue;
1789 		if ((mrt->peer_id == 0 && mrt->group_id == 0) ||
1790 		    mrt->peer_id == peer->conf.id || (mrt->group_id != 0 &&
1791 		    mrt->group_id == peer->conf.groupid))
1792 			mrt_dump_bgp_msg(mrt, data, *len, peer);
1793 	}
1794 	return (0);
1795 }
1796 
1797 int
1798 parse_open(struct peer *peer)
1799 {
1800 	u_char		*p, *op_val;
1801 	u_int8_t	 version, rversion;
1802 	u_int16_t	 short_as, msglen;
1803 	u_int16_t	 holdtime, oholdtime, myholdtime;
1804 	u_int32_t	 as, bgpid;
1805 	u_int8_t	 optparamlen, plen;
1806 	u_int8_t	 op_type, op_len;
1807 
1808 	p = peer->rbuf->rptr;
1809 	p += MSGSIZE_HEADER_MARKER;
1810 	memcpy(&msglen, p, sizeof(msglen));
1811 	msglen = ntohs(msglen);
1812 
1813 	p = peer->rbuf->rptr;
1814 	p += MSGSIZE_HEADER;	/* header is already checked */
1815 
1816 	memcpy(&version, p, sizeof(version));
1817 	p += sizeof(version);
1818 
1819 	if (version != BGP_VERSION) {
1820 		log_peer_warnx(&peer->conf,
1821 		    "peer wants unrecognized version %u", version);
1822 		if (version > BGP_VERSION)
1823 			rversion = version - BGP_VERSION;
1824 		else
1825 			rversion = BGP_VERSION;
1826 		session_notification(peer, ERR_OPEN, ERR_OPEN_VERSION,
1827 		    &rversion, sizeof(rversion));
1828 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1829 		return (-1);
1830 	}
1831 
1832 	memcpy(&short_as, p, sizeof(short_as));
1833 	p += sizeof(short_as);
1834 	as = peer->short_as = ntohs(short_as);
1835 
1836 	memcpy(&oholdtime, p, sizeof(oholdtime));
1837 	p += sizeof(oholdtime);
1838 
1839 	holdtime = ntohs(oholdtime);
1840 	if (holdtime && holdtime < peer->conf.min_holdtime) {
1841 		log_peer_warnx(&peer->conf,
1842 		    "peer requests unacceptable holdtime %u", holdtime);
1843 		session_notification(peer, ERR_OPEN, ERR_OPEN_HOLDTIME,
1844 		    NULL, 0);
1845 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1846 		return (-1);
1847 	}
1848 
1849 	myholdtime = peer->conf.holdtime;
1850 	if (!myholdtime)
1851 		myholdtime = conf->holdtime;
1852 	if (holdtime < myholdtime)
1853 		peer->holdtime = holdtime;
1854 	else
1855 		peer->holdtime = myholdtime;
1856 
1857 	memcpy(&bgpid, p, sizeof(bgpid));
1858 	p += sizeof(bgpid);
1859 
1860 	/* check bgpid for validity - just disallow 0 */
1861 	if (ntohl(bgpid) == 0) {
1862 		log_peer_warnx(&peer->conf, "peer BGPID %lu unacceptable",
1863 		    ntohl(bgpid));
1864 		session_notification(peer, ERR_OPEN, ERR_OPEN_BGPID,
1865 		    NULL, 0);
1866 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1867 		return (-1);
1868 	}
1869 	peer->remote_bgpid = bgpid;
1870 
1871 	memcpy(&optparamlen, p, sizeof(optparamlen));
1872 	p += sizeof(optparamlen);
1873 
1874 	if (optparamlen != msglen - MSGSIZE_OPEN_MIN) {
1875 			log_peer_warnx(&peer->conf,
1876 			    "corrupt OPEN message received: length mismatch");
1877 			session_notification(peer, ERR_OPEN, 0, NULL, 0);
1878 			change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1879 			return (-1);
1880 	}
1881 
1882 	plen = optparamlen;
1883 	while (plen > 0) {
1884 		if (plen < 2) {
1885 			log_peer_warnx(&peer->conf,
1886 			    "corrupt OPEN message received, len wrong");
1887 			session_notification(peer, ERR_OPEN, 0, NULL, 0);
1888 			change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1889 			return (-1);
1890 		}
1891 		memcpy(&op_type, p, sizeof(op_type));
1892 		p += sizeof(op_type);
1893 		plen -= sizeof(op_type);
1894 		memcpy(&op_len, p, sizeof(op_len));
1895 		p += sizeof(op_len);
1896 		plen -= sizeof(op_len);
1897 		if (op_len > 0) {
1898 			if (plen < op_len) {
1899 				log_peer_warnx(&peer->conf,
1900 				    "corrupt OPEN message received, len wrong");
1901 				session_notification(peer, ERR_OPEN, 0,
1902 				    NULL, 0);
1903 				change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1904 				return (-1);
1905 			}
1906 			op_val = p;
1907 			p += op_len;
1908 			plen -= op_len;
1909 		} else
1910 			op_val = NULL;
1911 
1912 		switch (op_type) {
1913 		case OPT_PARAM_CAPABILITIES:		/* RFC 3392 */
1914 			if (parse_capabilities(peer, op_val, op_len,
1915 			    &as) == -1) {
1916 				session_notification(peer, ERR_OPEN, 0,
1917 				    NULL, 0);
1918 				change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1919 				return (-1);
1920 			}
1921 			break;
1922 		case OPT_PARAM_AUTH:			/* deprecated */
1923 		default:
1924 			/*
1925 			 * unsupported type
1926 			 * the RFCs tell us to leave the data section empty
1927 			 * and notify the peer with ERR_OPEN, ERR_OPEN_OPT.
1928 			 * How the peer should know _which_ optional parameter
1929 			 * we don't support is beyond me.
1930 			 */
1931 			log_peer_warnx(&peer->conf,
1932 			    "received OPEN message with unsupported optional "
1933 			    "parameter: type %u", op_type);
1934 			session_notification(peer, ERR_OPEN, ERR_OPEN_OPT,
1935 				NULL, 0);
1936 			change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1937 			timer_set(peer, Timer_IdleHold, 0);	/* no punish */
1938 			peer->IdleHoldTime /= 2;
1939 			return (-1);
1940 		}
1941 	}
1942 
1943 	/* if remote-as is zero and it's a cloned neighbor, accept any */
1944 	if (peer->conf.cloned && !peer->conf.remote_as && as != AS_TRANS) {
1945 		peer->conf.remote_as = as;
1946 		peer->conf.ebgp = (peer->conf.remote_as != conf->as);
1947 		if (!peer->conf.ebgp)
1948 			/* force enforce_as off for iBGP sessions */
1949 			peer->conf.enforce_as = ENFORCE_AS_OFF;
1950 	}
1951 
1952 	if (peer->conf.remote_as != as) {
1953 		log_peer_warnx(&peer->conf, "peer sent wrong AS %s",
1954 		    log_as(as));
1955 		session_notification(peer, ERR_OPEN, ERR_OPEN_AS, NULL, 0);
1956 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1957 		return (-1);
1958 	}
1959 
1960 	if (capa_neg_calc(peer) == -1) {
1961 		log_peer_warnx(&peer->conf,
1962 		    "capabilitiy negotiation calculation failed");
1963 		session_notification(peer, ERR_OPEN, 0, NULL, 0);
1964 		change_state(peer, STATE_IDLE, EVNT_RCVD_OPEN);
1965 		return (-1);
1966 	}
1967 
1968 	return (0);
1969 }
1970 
1971 int
1972 parse_update(struct peer *peer)
1973 {
1974 	u_char		*p;
1975 	u_int16_t	 datalen;
1976 
1977 	/*
1978 	 * we pass the message verbatim to the rde.
1979 	 * in case of errors the whole session is reset with a
1980 	 * notification anyway, we only need to know the peer
1981 	 */
1982 	p = peer->rbuf->rptr;
1983 	p += MSGSIZE_HEADER_MARKER;
1984 	memcpy(&datalen, p, sizeof(datalen));
1985 	datalen = ntohs(datalen);
1986 
1987 	p = peer->rbuf->rptr;
1988 	p += MSGSIZE_HEADER;	/* header is already checked */
1989 	datalen -= MSGSIZE_HEADER;
1990 
1991 	if (imsg_compose(ibuf_rde, IMSG_UPDATE, peer->conf.id, 0, -1, p,
1992 	    datalen) == -1)
1993 		return (-1);
1994 
1995 	return (0);
1996 }
1997 
1998 int
1999 parse_refresh(struct peer *peer)
2000 {
2001 	u_char		*p;
2002 	u_int16_t	 afi;
2003 	u_int8_t	 aid, safi;
2004 
2005 	p = peer->rbuf->rptr;
2006 	p += MSGSIZE_HEADER;	/* header is already checked */
2007 
2008 	/*
2009 	 * We could check if we actually announced the capability but
2010 	 * as long as the message is correctly encoded we don't care.
2011 	 */
2012 
2013 	/* afi, 2 byte */
2014 	memcpy(&afi, p, sizeof(afi));
2015 	afi = ntohs(afi);
2016 	p += 2;
2017 	/* reserved, 1 byte */
2018 	p += 1;
2019 	/* safi, 1 byte */
2020 	memcpy(&safi, p, sizeof(safi));
2021 
2022 	/* afi/safi unchecked -	unrecognized values will be ignored anyway */
2023 	if (afi2aid(afi, safi, &aid) == -1) {
2024 		log_peer_warnx(&peer->conf, "peer sent bad refresh, "
2025 		    "invalid afi/safi pair");
2026 		return (0);
2027 	}
2028 
2029 	if (imsg_compose(ibuf_rde, IMSG_REFRESH, peer->conf.id, 0, -1, &aid,
2030 	    sizeof(aid)) == -1)
2031 		return (-1);
2032 
2033 	return (0);
2034 }
2035 
2036 int
2037 parse_notification(struct peer *peer)
2038 {
2039 	u_char		*p;
2040 	u_int16_t	 datalen;
2041 	u_int8_t	 errcode;
2042 	u_int8_t	 subcode;
2043 	u_int8_t	 capa_code;
2044 	u_int8_t	 capa_len;
2045 	u_int8_t	 i;
2046 
2047 	/* just log */
2048 	p = peer->rbuf->rptr;
2049 	p += MSGSIZE_HEADER_MARKER;
2050 	memcpy(&datalen, p, sizeof(datalen));
2051 	datalen = ntohs(datalen);
2052 
2053 	p = peer->rbuf->rptr;
2054 	p += MSGSIZE_HEADER;	/* header is already checked */
2055 	datalen -= MSGSIZE_HEADER;
2056 
2057 	memcpy(&errcode, p, sizeof(errcode));
2058 	p += sizeof(errcode);
2059 	datalen -= sizeof(errcode);
2060 
2061 	memcpy(&subcode, p, sizeof(subcode));
2062 	p += sizeof(subcode);
2063 	datalen -= sizeof(subcode);
2064 
2065 	log_notification(peer, errcode, subcode, p, datalen);
2066 	peer->errcnt++;
2067 
2068 	if (errcode == ERR_OPEN && subcode == ERR_OPEN_CAPA) {
2069 		if (datalen == 0) {	/* zebra likes to send those.. humbug */
2070 			log_peer_warnx(&peer->conf, "received \"unsupported "
2071 			    "capability\" notification without data part, "
2072 			    "disabling capability announcements altogether");
2073 			session_capa_ann_none(peer);
2074 		}
2075 
2076 		while (datalen > 0) {
2077 			if (datalen < 2) {
2078 				log_peer_warnx(&peer->conf,
2079 				    "parse_notification: "
2080 				    "expect len >= 2, len is %u", datalen);
2081 				return (-1);
2082 			}
2083 			memcpy(&capa_code, p, sizeof(capa_code));
2084 			p += sizeof(capa_code);
2085 			datalen -= sizeof(capa_code);
2086 			memcpy(&capa_len, p, sizeof(capa_len));
2087 			p += sizeof(capa_len);
2088 			datalen -= sizeof(capa_len);
2089 			if (datalen < capa_len) {
2090 				log_peer_warnx(&peer->conf,
2091 				    "parse_notification: capa_len %u exceeds "
2092 				    "remaining msg length %u", capa_len,
2093 				    datalen);
2094 				return (-1);
2095 			}
2096 			p += capa_len;
2097 			datalen -= capa_len;
2098 			switch (capa_code) {
2099 			case CAPA_MP:
2100 				for (i = 0; i < AID_MAX; i++)
2101 					peer->capa.ann.mp[i] = 0;
2102 				log_peer_warnx(&peer->conf,
2103 				    "disabling multiprotocol capability");
2104 				break;
2105 			case CAPA_REFRESH:
2106 				peer->capa.ann.refresh = 0;
2107 				log_peer_warnx(&peer->conf,
2108 				    "disabling route refresh capability");
2109 				break;
2110 			case CAPA_RESTART:
2111 				peer->capa.ann.restart = 0;
2112 				log_peer_warnx(&peer->conf,
2113 				    "disabling restart capability");
2114 				break;
2115 			case CAPA_AS4BYTE:
2116 				peer->capa.ann.as4byte = 0;
2117 				log_peer_warnx(&peer->conf,
2118 				    "disabling 4-byte AS num capability");
2119 				break;
2120 			default:	/* should not happen... */
2121 				log_peer_warnx(&peer->conf, "received "
2122 				    "\"unsupported capability\" notification "
2123 				    "for unknown capability %u, disabling "
2124 				    "capability announcements altogether",
2125 				    capa_code);
2126 				session_capa_ann_none(peer);
2127 				break;
2128 			}
2129 		}
2130 
2131 		return (1);
2132 	}
2133 
2134 	if (errcode == ERR_OPEN && subcode == ERR_OPEN_OPT) {
2135 		session_capa_ann_none(peer);
2136 		return (1);
2137 	}
2138 
2139 	return (0);
2140 }
2141 
2142 int
2143 parse_capabilities(struct peer *peer, u_char *d, u_int16_t dlen, u_int32_t *as)
2144 {
2145 	u_char		*capa_val;
2146 	u_int32_t	 remote_as;
2147 	u_int16_t	 len;
2148 	u_int16_t	 afi;
2149 	u_int8_t	 safi;
2150 	u_int8_t	 aid;
2151 	u_int8_t	 capa_code;
2152 	u_int8_t	 capa_len;
2153 
2154 	len = dlen;
2155 	while (len > 0) {
2156 		if (len < 2) {
2157 			log_peer_warnx(&peer->conf, "parse_capabilities: "
2158 			    "expect len >= 2, len is %u", len);
2159 			return (-1);
2160 		}
2161 		memcpy(&capa_code, d, sizeof(capa_code));
2162 		d += sizeof(capa_code);
2163 		len -= sizeof(capa_code);
2164 		memcpy(&capa_len, d, sizeof(capa_len));
2165 		d += sizeof(capa_len);
2166 		len -= sizeof(capa_len);
2167 		if (capa_len > 0) {
2168 			if (len < capa_len) {
2169 				log_peer_warnx(&peer->conf,
2170 				    "parse_capabilities: "
2171 				    "len %u smaller than capa_len %u",
2172 				    len, capa_len);
2173 				return (-1);
2174 			}
2175 			capa_val = d;
2176 			d += capa_len;
2177 			len -= capa_len;
2178 		} else
2179 			capa_val = NULL;
2180 
2181 		switch (capa_code) {
2182 		case CAPA_MP:			/* RFC 4760 */
2183 			if (capa_len != 4) {
2184 				log_peer_warnx(&peer->conf,
2185 				    "parse_capabilities: "
2186 				    "expect len 4, len is %u", capa_len);
2187 				return (-1);
2188 			}
2189 			memcpy(&afi, capa_val, sizeof(afi));
2190 			afi = ntohs(afi);
2191 			memcpy(&safi, capa_val + 3, sizeof(safi));
2192 			if (afi2aid(afi, safi, &aid) == -1) {
2193 				log_peer_warnx(&peer->conf,
2194 				    "parse_capabilities: AFI %u, "
2195 				    "safi %u unknown", afi, safi);
2196 				break;
2197 			}
2198 			peer->capa.peer.mp[aid] = 1;
2199 			break;
2200 		case CAPA_REFRESH:
2201 			peer->capa.peer.refresh = 1;
2202 			break;
2203 		case CAPA_RESTART:
2204 			peer->capa.peer.restart = 1;
2205 			/* we don't care about the further restart capas yet */
2206 			break;
2207 		case CAPA_AS4BYTE:
2208 			if (capa_len != 4) {
2209 				log_peer_warnx(&peer->conf,
2210 				    "parse_capabilities: "
2211 				    "expect len 4, len is %u", capa_len);
2212 				return (-1);
2213 			}
2214 			memcpy(&remote_as, capa_val, sizeof(remote_as));
2215 			*as = ntohl(remote_as);
2216 			peer->capa.peer.as4byte = 1;
2217 			break;
2218 		default:
2219 			break;
2220 		}
2221 	}
2222 
2223 	return (0);
2224 }
2225 
2226 int
2227 capa_neg_calc(struct peer *p)
2228 {
2229 	u_int8_t	i, hasmp = 0;
2230 
2231 	/* refresh: does not realy matter here, use peer setting */
2232 	p->capa.neg.refresh = p->capa.peer.refresh;
2233 
2234 	/* as4byte: both side must announce capability */
2235 	if (p->capa.ann.as4byte && p->capa.peer.as4byte)
2236 		p->capa.neg.as4byte = 1;
2237 	else
2238 		p->capa.neg.as4byte = 0;
2239 
2240 	/* MP: both side must announce capability */
2241 	for (i = 0; i < AID_MAX; i++) {
2242 		if (p->capa.ann.mp[i] && p->capa.peer.mp[i]) {
2243 			p->capa.neg.mp[i] = 1;
2244 			hasmp = 1;
2245 		} else
2246 			p->capa.neg.mp[i] = 0;
2247 	}
2248 	/* if no MP capability present for default IPv4 unicast mode */
2249 	if (!hasmp)
2250 		p->capa.neg.mp[AID_INET] = 1;
2251 
2252 	p->capa.neg.restart = p->capa.peer.restart;
2253 
2254 	return (0);
2255 }
2256 
2257 void
2258 session_dispatch_imsg(struct imsgbuf *ibuf, int idx, u_int *listener_cnt)
2259 {
2260 	struct imsg		 imsg;
2261 	struct mrt		 xmrt;
2262 	struct mrt		*mrt;
2263 	struct peer_config	*pconf;
2264 	struct peer		*p, *next;
2265 	struct listen_addr	*la, *nla;
2266 	struct kif		*kif;
2267 	u_char			*data;
2268 	enum reconf_action	 reconf;
2269 	int			 n, depend_ok;
2270 	u_int8_t		 errcode, subcode;
2271 
2272 	if ((n = imsg_read(ibuf)) == -1)
2273 		fatal("session_dispatch_imsg: imsg_read error");
2274 
2275 	if (n == 0)	/* connection closed */
2276 		fatalx("session_dispatch_imsg: pipe closed");
2277 
2278 	for (;;) {
2279 		if ((n = imsg_get(ibuf, &imsg)) == -1)
2280 			fatal("session_dispatch_imsg: imsg_get error");
2281 
2282 		if (n == 0)
2283 			break;
2284 
2285 		switch (imsg.hdr.type) {
2286 		case IMSG_RECONF_CONF:
2287 			if (idx != PFD_PIPE_MAIN)
2288 				fatalx("reconf request not from parent");
2289 			if ((nconf = malloc(sizeof(struct bgpd_config))) ==
2290 			    NULL)
2291 				fatal(NULL);
2292 			memcpy(nconf, imsg.data, sizeof(struct bgpd_config));
2293 			if ((nconf->listen_addrs = calloc(1,
2294 			    sizeof(struct listen_addrs))) == NULL)
2295 				fatal(NULL);
2296 			TAILQ_INIT(nconf->listen_addrs);
2297 			npeers = NULL;
2298 			init_conf(nconf);
2299 			pending_reconf = 1;
2300 			break;
2301 		case IMSG_RECONF_PEER:
2302 			if (idx != PFD_PIPE_MAIN)
2303 				fatalx("reconf request not from parent");
2304 			pconf = imsg.data;
2305 			p = getpeerbyaddr(&pconf->remote_addr);
2306 			if (p == NULL) {
2307 				if ((p = calloc(1, sizeof(struct peer))) ==
2308 				    NULL)
2309 					fatal("new_peer");
2310 				p->state = p->prev_state = STATE_NONE;
2311 				p->next = npeers;
2312 				npeers = p;
2313 				reconf = RECONF_REINIT;
2314 			} else
2315 				reconf = RECONF_KEEP;
2316 
2317 			memcpy(&p->conf, pconf, sizeof(struct peer_config));
2318 			p->conf.reconf_action = reconf;
2319 			break;
2320 		case IMSG_RECONF_LISTENER:
2321 			if (idx != PFD_PIPE_MAIN)
2322 				fatalx("reconf request not from parent");
2323 			if (nconf == NULL)
2324 				fatalx("IMSG_RECONF_LISTENER but no config");
2325 			nla = imsg.data;
2326 			TAILQ_FOREACH(la, conf->listen_addrs, entry)
2327 				if (!la_cmp(la, nla))
2328 					break;
2329 
2330 			if (la == NULL) {
2331 				if (nla->reconf != RECONF_REINIT)
2332 					fatalx("king bula sez: "
2333 					    "expected REINIT");
2334 
2335 				if ((nla->fd = imsg.fd) == -1)
2336 					log_warnx("expected to receive fd for "
2337 					    "%s but didn't receive any",
2338 					    log_sockaddr((struct sockaddr *)
2339 					    &nla->sa));
2340 
2341 				la = calloc(1, sizeof(struct listen_addr));
2342 				if (la == NULL)
2343 					fatal(NULL);
2344 				memcpy(&la->sa, &nla->sa, sizeof(la->sa));
2345 				la->flags = nla->flags;
2346 				la->fd = nla->fd;
2347 				la->reconf = RECONF_REINIT;
2348 				TAILQ_INSERT_TAIL(nconf->listen_addrs, la,
2349 				    entry);
2350 			} else {
2351 				if (nla->reconf != RECONF_KEEP)
2352 					fatalx("king bula sez: expected KEEP");
2353 				la->reconf = RECONF_KEEP;
2354 			}
2355 
2356 			break;
2357 		case IMSG_RECONF_DONE:
2358 			if (idx != PFD_PIPE_MAIN)
2359 				fatalx("reconf request not from parent");
2360 			if (nconf == NULL)
2361 				fatalx("got IMSG_RECONF_DONE but no config");
2362 			conf->flags = nconf->flags;
2363 			conf->log = nconf->log;
2364 			conf->bgpid = nconf->bgpid;
2365 			conf->clusterid = nconf->clusterid;
2366 			conf->as = nconf->as;
2367 			conf->short_as = nconf->short_as;
2368 			conf->holdtime = nconf->holdtime;
2369 			conf->min_holdtime = nconf->min_holdtime;
2370 			conf->connectretry = nconf->connectretry;
2371 
2372 			/* add new peers */
2373 			for (p = npeers; p != NULL; p = next) {
2374 				next = p->next;
2375 				p->next = peers;
2376 				peers = p;
2377 			}
2378 			/* find ones that need attention */
2379 			for (p = peers; p != NULL; p = p->next) {
2380 				/* needs to be deleted? */
2381 				if (p->conf.reconf_action == RECONF_NONE &&
2382 				    !p->conf.cloned)
2383 					p->conf.reconf_action = RECONF_DELETE;
2384 				/* had demotion, is demoted, demote removed? */
2385 				if (p->demoted && !p->conf.demote_group[0])
2386 						session_demote(p, -1);
2387 			}
2388 
2389 			/* delete old listeners */
2390 			for (la = TAILQ_FIRST(conf->listen_addrs); la != NULL;
2391 			    la = nla) {
2392 				nla = TAILQ_NEXT(la, entry);
2393 				if (la->reconf == RECONF_NONE) {
2394 					log_info("not listening on %s any more",
2395 					    log_sockaddr(
2396 					    (struct sockaddr *)&la->sa));
2397 					TAILQ_REMOVE(conf->listen_addrs, la,
2398 					    entry);
2399 					close(la->fd);
2400 					free(la);
2401 				}
2402 			}
2403 
2404 			/* add new listeners */
2405 			while ((la = TAILQ_FIRST(nconf->listen_addrs)) !=
2406 			    NULL) {
2407 				TAILQ_REMOVE(nconf->listen_addrs, la, entry);
2408 				TAILQ_INSERT_TAIL(conf->listen_addrs, la,
2409 				    entry);
2410 			}
2411 
2412 			setup_listeners(listener_cnt);
2413 			free(nconf->listen_addrs);
2414 			free(nconf);
2415 			nconf = NULL;
2416 			pending_reconf = 0;
2417 			log_info("SE reconfigured");
2418 			break;
2419 		case IMSG_IFINFO:
2420 			if (idx != PFD_PIPE_MAIN)
2421 				fatalx("IFINFO message not from parent");
2422 			if (imsg.hdr.len != IMSG_HEADER_SIZE +
2423 			    sizeof(struct kif))
2424 				fatalx("IFINFO imsg with wrong len");
2425 			kif = imsg.data;
2426 			depend_ok = (kif->flags & IFF_UP) &&
2427 			    (LINK_STATE_IS_UP(kif->link_state) ||
2428 			    (kif->link_state == LINK_STATE_UNKNOWN &&
2429 			    kif->media_type != IFT_CARP));
2430 
2431 			for (p = peers; p != NULL; p = p->next)
2432 				if (!strcmp(p->conf.if_depend, kif->ifname)) {
2433 					if (depend_ok && !p->depend_ok) {
2434 						p->depend_ok = depend_ok;
2435 						bgp_fsm(p, EVNT_START);
2436 					} else if (!depend_ok && p->depend_ok) {
2437 						p->depend_ok = depend_ok;
2438 						session_stop(p,
2439 						    ERR_CEASE_OTHER_CHANGE);
2440 					}
2441 				}
2442 			break;
2443 		case IMSG_MRT_OPEN:
2444 		case IMSG_MRT_REOPEN:
2445 			if (imsg.hdr.len > IMSG_HEADER_SIZE +
2446 			    sizeof(struct mrt)) {
2447 				log_warnx("wrong imsg len");
2448 				break;
2449 			}
2450 
2451 			memcpy(&xmrt, imsg.data, sizeof(struct mrt));
2452 			if ((xmrt.wbuf.fd = imsg.fd) == -1)
2453 				log_warnx("expected to receive fd for mrt dump "
2454 				    "but didn't receive any");
2455 
2456 			mrt = mrt_get(&mrthead, &xmrt);
2457 			if (mrt == NULL) {
2458 				/* new dump */
2459 				mrt = calloc(1, sizeof(struct mrt));
2460 				if (mrt == NULL)
2461 					fatal("session_dispatch_imsg");
2462 				memcpy(mrt, &xmrt, sizeof(struct mrt));
2463 				TAILQ_INIT(&mrt->wbuf.bufs);
2464 				LIST_INSERT_HEAD(&mrthead, mrt, entry);
2465 			} else {
2466 				/* old dump reopened */
2467 				close(mrt->wbuf.fd);
2468 				mrt->wbuf.fd = xmrt.wbuf.fd;
2469 			}
2470 			break;
2471 		case IMSG_MRT_CLOSE:
2472 			if (imsg.hdr.len > IMSG_HEADER_SIZE +
2473 			    sizeof(struct mrt)) {
2474 				log_warnx("wrong imsg len");
2475 				break;
2476 			}
2477 
2478 			memcpy(&xmrt, imsg.data, sizeof(struct mrt));
2479 			mrt = mrt_get(&mrthead, &xmrt);
2480 			if (mrt != NULL) {
2481 				mrt_clean(mrt);
2482 				LIST_REMOVE(mrt, entry);
2483 				free(mrt);
2484 			}
2485 			break;
2486 		case IMSG_CTL_KROUTE:
2487 		case IMSG_CTL_KROUTE_ADDR:
2488 		case IMSG_CTL_SHOW_NEXTHOP:
2489 		case IMSG_CTL_SHOW_INTERFACE:
2490 		case IMSG_CTL_SHOW_FIB_TABLES:
2491 			if (idx != PFD_PIPE_MAIN)
2492 				fatalx("ctl kroute request not from parent");
2493 			control_imsg_relay(&imsg);
2494 			break;
2495 		case IMSG_CTL_SHOW_RIB:
2496 		case IMSG_CTL_SHOW_RIB_PREFIX:
2497 		case IMSG_CTL_SHOW_RIB_ATTR:
2498 		case IMSG_CTL_SHOW_RIB_MEM:
2499 		case IMSG_CTL_SHOW_NETWORK:
2500 		case IMSG_CTL_SHOW_NEIGHBOR:
2501 			if (idx != PFD_PIPE_ROUTE_CTL)
2502 				fatalx("ctl rib request not from RDE");
2503 			control_imsg_relay(&imsg);
2504 			break;
2505 		case IMSG_CTL_END:
2506 		case IMSG_CTL_RESULT:
2507 			control_imsg_relay(&imsg);
2508 			break;
2509 		case IMSG_UPDATE:
2510 			if (idx != PFD_PIPE_ROUTE)
2511 				fatalx("update request not from RDE");
2512 			if (imsg.hdr.len > IMSG_HEADER_SIZE +
2513 			    MAX_PKTSIZE - MSGSIZE_HEADER ||
2514 			    imsg.hdr.len < IMSG_HEADER_SIZE +
2515 			    MSGSIZE_UPDATE_MIN - MSGSIZE_HEADER)
2516 				log_warnx("RDE sent invalid update");
2517 			else
2518 				session_update(imsg.hdr.peerid, imsg.data,
2519 				    imsg.hdr.len - IMSG_HEADER_SIZE);
2520 			break;
2521 		case IMSG_UPDATE_ERR:
2522 			if (idx != PFD_PIPE_ROUTE)
2523 				fatalx("update request not from RDE");
2524 			if (imsg.hdr.len < IMSG_HEADER_SIZE + 2) {
2525 				log_warnx("RDE sent invalid notification");
2526 				break;
2527 			}
2528 			if ((p = getpeerbyid(imsg.hdr.peerid)) == NULL) {
2529 				log_warnx("no such peer: id=%u",
2530 				    imsg.hdr.peerid);
2531 				break;
2532 			}
2533 			data = imsg.data;
2534 			errcode = *data++;
2535 			subcode = *data++;
2536 
2537 			if (imsg.hdr.len == IMSG_HEADER_SIZE + 2)
2538 				data = NULL;
2539 
2540 			session_notification(p, errcode, subcode,
2541 			    data, imsg.hdr.len - IMSG_HEADER_SIZE - 2);
2542 			switch (errcode) {
2543 			case ERR_CEASE:
2544 				switch (subcode) {
2545 				case ERR_CEASE_MAX_PREFIX:
2546 					bgp_fsm(p, EVNT_STOP);
2547 					if (p->conf.max_prefix_restart)
2548 						timer_set(p, Timer_IdleHold, 60 *
2549 						    p->conf.max_prefix_restart);
2550 					break;
2551 				default:
2552 					bgp_fsm(p, EVNT_CON_FATAL);
2553 					break;
2554 				}
2555 				break;
2556 			default:
2557 				bgp_fsm(p, EVNT_CON_FATAL);
2558 				break;
2559 			}
2560 			break;
2561 		default:
2562 			break;
2563 		}
2564 		imsg_free(&imsg);
2565 	}
2566 }
2567 
2568 int
2569 la_cmp(struct listen_addr *a, struct listen_addr *b)
2570 {
2571 	struct sockaddr_in	*in_a, *in_b;
2572 	struct sockaddr_in6	*in6_a, *in6_b;
2573 
2574 	if (a->sa.ss_family != b->sa.ss_family)
2575 		return (1);
2576 
2577 	switch (a->sa.ss_family) {
2578 	case AF_INET:
2579 		in_a = (struct sockaddr_in *)&a->sa;
2580 		in_b = (struct sockaddr_in *)&b->sa;
2581 		if (in_a->sin_addr.s_addr != in_b->sin_addr.s_addr)
2582 			return (1);
2583 		if (in_a->sin_port != in_b->sin_port)
2584 			return (1);
2585 		break;
2586 	case AF_INET6:
2587 		in6_a = (struct sockaddr_in6 *)&a->sa;
2588 		in6_b = (struct sockaddr_in6 *)&b->sa;
2589 		if (bcmp(&in6_a->sin6_addr, &in6_b->sin6_addr,
2590 		    sizeof(struct in6_addr)))
2591 			return (1);
2592 		if (in6_a->sin6_port != in6_b->sin6_port)
2593 			return (1);
2594 		break;
2595 	default:
2596 		fatal("king bula sez: unknown address family");
2597 		/* NOTREACHED */
2598 	}
2599 
2600 	return (0);
2601 }
2602 
2603 struct peer *
2604 getpeerbyaddr(struct bgpd_addr *addr)
2605 {
2606 	struct peer *p;
2607 
2608 	/* we might want a more effective way to find peers by IP */
2609 	for (p = peers; p != NULL &&
2610 	    memcmp(&p->conf.remote_addr, addr, sizeof(p->conf.remote_addr));
2611 	    p = p->next)
2612 		;	/* nothing */
2613 
2614 	return (p);
2615 }
2616 
2617 struct peer *
2618 getpeerbydesc(const char *descr)
2619 {
2620 	struct peer	*p, *res = NULL;
2621 	int		 match = 0;
2622 
2623 	for (p = peers; p != NULL; p = p->next)
2624 		if (!strcmp(p->conf.descr, descr)) {
2625 			res = p;
2626 			match++;
2627 		}
2628 
2629 	if (match > 1)
2630 		log_info("neighbor description \"%s\" not unique, request "
2631 		    "aborted", descr);
2632 
2633 	if (match == 1)
2634 		return (res);
2635 	else
2636 		return (NULL);
2637 }
2638 
2639 struct peer *
2640 getpeerbyip(struct sockaddr *ip)
2641 {
2642 	struct bgpd_addr addr;
2643 	struct peer	*p, *newpeer, *loose = NULL;
2644 	u_int32_t	 id;
2645 
2646 	sa2addr(ip, &addr);
2647 
2648 	/* we might want a more effective way to find peers by IP */
2649 	for (p = peers; p != NULL; p = p->next)
2650 		if (!p->conf.template &&
2651 		    !memcmp(&addr, &p->conf.remote_addr, sizeof(addr)))
2652 			return (p);
2653 
2654 	/* try template matching */
2655 	for (p = peers; p != NULL; p = p->next)
2656 		if (p->conf.template &&
2657 		    p->conf.remote_addr.aid == addr.aid &&
2658 		    session_match_mask(p, &addr))
2659 			if (loose == NULL || loose->conf.remote_masklen <
2660 			    p->conf.remote_masklen)
2661 				loose = p;
2662 
2663 	if (loose != NULL) {
2664 		/* clone */
2665 		if ((newpeer = malloc(sizeof(struct peer))) == NULL)
2666 			fatal(NULL);
2667 		memcpy(newpeer, loose, sizeof(struct peer));
2668 		for (id = UINT_MAX; id > UINT_MAX / 2; id--) {
2669 			for (p = peers; p != NULL && p->conf.id != id;
2670 			    p = p->next)
2671 				;	/* nothing */
2672 			if (p == NULL) {	/* we found a free id */
2673 				newpeer->conf.id = id;
2674 				break;
2675 			}
2676 		}
2677 		sa2addr(ip, &newpeer->conf.remote_addr);
2678 		switch (ip->sa_family) {
2679 		case AF_INET:
2680 			newpeer->conf.remote_masklen = 32;
2681 			break;
2682 		case AF_INET6:
2683 			newpeer->conf.remote_masklen = 128;
2684 			break;
2685 		}
2686 		newpeer->conf.template = 0;
2687 		newpeer->conf.cloned = 1;
2688 		newpeer->state = newpeer->prev_state = STATE_NONE;
2689 		newpeer->conf.reconf_action = RECONF_KEEP;
2690 		newpeer->rbuf = NULL;
2691 		init_peer(newpeer);
2692 		bgp_fsm(newpeer, EVNT_START);
2693 		newpeer->next = peers;
2694 		peers = newpeer;
2695 		return (newpeer);
2696 	}
2697 
2698 	return (NULL);
2699 }
2700 
2701 int
2702 session_match_mask(struct peer *p, struct bgpd_addr *a)
2703 {
2704 	in_addr_t	 v4mask;
2705 	struct in6_addr	 masked;
2706 
2707 	switch (p->conf.remote_addr.aid) {
2708 	case AID_INET:
2709 		v4mask = htonl(prefixlen2mask(p->conf.remote_masklen));
2710 		if (p->conf.remote_addr.v4.s_addr == (a->v4.s_addr & v4mask))
2711 			return (1);
2712 		return (0);
2713 	case AID_INET6:
2714 		inet6applymask(&masked, &a->v6, p->conf.remote_masklen);
2715 
2716 		if (!memcmp(&masked, &p->conf.remote_addr.v6, sizeof(masked)))
2717 			return (1);
2718 		return (0);
2719 	}
2720 	return (0);
2721 }
2722 
2723 struct peer *
2724 getpeerbyid(u_int32_t peerid)
2725 {
2726 	struct peer *p;
2727 
2728 	/* we might want a more effective way to find peers by IP */
2729 	for (p = peers; p != NULL &&
2730 	    p->conf.id != peerid; p = p->next)
2731 		;	/* nothing */
2732 
2733 	return (p);
2734 }
2735 
2736 void
2737 session_down(struct peer *peer)
2738 {
2739 	bzero(&peer->capa.neg, sizeof(peer->capa.neg));
2740 	peer->stats.last_updown = time(NULL);
2741 	if (imsg_compose(ibuf_rde, IMSG_SESSION_DOWN, peer->conf.id, 0, -1,
2742 	    NULL, 0) == -1)
2743 		fatalx("imsg_compose error");
2744 }
2745 
2746 void
2747 session_up(struct peer *p)
2748 {
2749 	struct session_up	 sup;
2750 
2751 	if (imsg_compose(ibuf_rde, IMSG_SESSION_ADD, p->conf.id, 0, -1,
2752 	    &p->conf, sizeof(p->conf)) == -1)
2753 		fatalx("imsg_compose error");
2754 
2755 	sa2addr((struct sockaddr *)&p->sa_local, &sup.local_addr);
2756 	sa2addr((struct sockaddr *)&p->sa_remote, &sup.remote_addr);
2757 
2758 	sup.remote_bgpid = p->remote_bgpid;
2759 	sup.short_as = p->short_as;
2760 	memcpy(&sup.capa, &p->capa.neg, sizeof(sup.capa));
2761 	p->stats.last_updown = time(NULL);
2762 	if (imsg_compose(ibuf_rde, IMSG_SESSION_UP, p->conf.id, 0, -1,
2763 	    &sup, sizeof(sup)) == -1)
2764 		fatalx("imsg_compose error");
2765 }
2766 
2767 int
2768 imsg_compose_parent(int type, u_int32_t peerid, pid_t pid, void *data,
2769     u_int16_t datalen)
2770 {
2771 	return (imsg_compose(ibuf_main, type, peerid, pid, -1, data, datalen));
2772 }
2773 
2774 int
2775 imsg_compose_rde(int type, pid_t pid, void *data, u_int16_t datalen)
2776 {
2777 	return (imsg_compose(ibuf_rde, type, 0, pid, -1, data, datalen));
2778 }
2779 
2780 void
2781 session_demote(struct peer *p, int level)
2782 {
2783 	struct demote_msg	msg;
2784 
2785 	strlcpy(msg.demote_group, p->conf.demote_group,
2786 	    sizeof(msg.demote_group));
2787 	msg.level = level;
2788 	if (imsg_compose(ibuf_main, IMSG_DEMOTE, p->conf.id, 0, -1,
2789 	    &msg, sizeof(msg)) == -1)
2790 		fatalx("imsg_compose error");
2791 
2792 	p->demoted += level;
2793 }
2794 
2795 void
2796 session_stop(struct peer *peer, u_int8_t subcode)
2797 {
2798 	switch (peer->state) {
2799 	case STATE_OPENSENT:
2800 	case STATE_OPENCONFIRM:
2801 	case STATE_ESTABLISHED:
2802 		session_notification(peer, ERR_CEASE, subcode, NULL, 0);
2803 		break;
2804 	default:
2805 		/* session not open, no need to send notification */
2806 		break;
2807 	}
2808 	bgp_fsm(peer, EVNT_STOP);
2809 }
2810