1 /*
2  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
3  * Use is subject to license terms.
4  */
5 #pragma ident	"%Z%%M%	%I%	%E% SMI"
6 /*
7  * lib/krb5/os/sendto_kdc.c
8  *
9  * Copyright 1990,1991,2001,2002 by the Massachusetts Institute of Technology.
10  * All Rights Reserved.
11  *
12  * Export of this software from the United States of America may
13  *   require a specific license from the United States Government.
14  *   It is the responsibility of any person or organization contemplating
15  *   export to obtain such a license before exporting.
16  *
17  * WITHIN THAT CONSTRAINT, permission to use, copy, modify, and
18  * distribute this software and its documentation for any purpose and
19  * without fee is hereby granted, provided that the above copyright
20  * notice appear in all copies and that both that copyright notice and
21  * this permission notice appear in supporting documentation, and that
22  * the name of M.I.T. not be used in advertising or publicity pertaining
23  * to distribution of the software without specific, written prior
24  * permission.  Furthermore if you modify this software you must label
25  * your software as modified software and not distribute it in such a
26  * fashion that it might be confused with the original M.I.T. software.
27  * M.I.T. makes no representations about the suitability of
28  * this software for any purpose.  It is provided "as is" without express
29  * or implied warranty.
30  *
31  *
32  * Send packet to KDC for realm; wait for response, retransmitting
33  * as necessary.
34  */
35 
36 #define NEED_SOCKETS
37 #define NEED_LOWLEVEL_IO
38 #include <fake-addrinfo.h>
39 #include <k5-int.h>
40 
41 #ifdef HAVE_SYS_TIME_H
42 #include <sys/time.h>
43 #else
44 #include <time.h>
45 #endif
46 #include "os-proto.h"
47 
48 #ifdef _AIX
49 #include <sys/select.h>
50 #endif
51 
52 /* For FIONBIO.  */
53 #include <sys/ioctl.h>
54 #ifdef HAVE_SYS_FILIO_H
55 #include <sys/filio.h>
56 #endif
57 
58 #define MAX_PASS		    3
59 /* Solaris Kerberos: moved to k5-int.h */
60 /* #define DEFAULT_UDP_PREF_LIMIT	 1465 */
61 #define HARD_UDP_LIMIT		32700 /* could probably do 64K-epsilon ? */
62 
63 krb5_error_code krb5int_sendto(krb5_context, const krb5_data *,
64 			    const struct addrlist *, krb5_data *,
65 			    struct sockaddr_storage *,
66 			    socklen_t *, int *);
67 
68 /* Solaris kerberos: leaving this here because other code depends on this. */
69 static void default_debug_handler (const void *data, size_t len)
70 {
71     fwrite(data, 1, len, stderr);
72     /* stderr is unbuffered */
73 }
74 
75 void (*krb5int_sendtokdc_debug_handler) (const void *, size_t) = default_debug_handler;
76 
77 /*
78  * Solaris Kerberos: only including the debug stuff if DEBUG defined outside
79  * this file.
80  */
81 #ifdef  DEBUG
82 
83 static char global_err_str[NI_MAXHOST + NI_MAXSERV + 1024];
84 
85 /* Solaris kerberos: removed put() since it isn't needed. */
86 
87 static void putstr(const char *str)
88 {
89     /* Solaris kerberos: build the string which will be passed to syslog later */
90     strlcat(global_err_str, str, sizeof (global_err_str));
91 }
92 
93 #define dprint krb5int_debug_fprint
94 #define dperror dprint
95 
96 #include <com_err.h>
97 
98 static void
99 krb5int_debug_fprint (const char *fmt, ...)
100 {
101     va_list args;
102 
103     /* Temporaries for variable arguments, etc.  */
104     krb5_error_code kerr;
105     int err;
106     fd_set *rfds, *wfds, *xfds;
107     int i;
108     int maxfd;
109     struct timeval *tv;
110     struct addrinfo *ai;
111     const krb5_data *d;
112     char addrbuf[NI_MAXHOST], portbuf[NI_MAXSERV];
113     const char *p;
114     char tmpbuf[NI_MAXHOST + NI_MAXSERV + 30];
115 
116     /*
117      * Solaris kerberos: modified this function to create a string to pass to
118      * syslog()
119      */
120     global_err_str[0] = NULL;
121 
122     va_start(args, fmt);
123 
124 #define putf(FMT,X)	(sprintf(tmpbuf,FMT,X),putstr(tmpbuf))
125 
126     for (; *fmt; fmt++) {
127 	if (*fmt != '%') {
128 	    /* Possible optimization: Look for % and print all chars
129 	       up to it in one call.  */
130 	    putf("%c", *fmt);
131 	    continue;
132 	}
133 	/* After this, always processing a '%' sequence.  */
134 	fmt++;
135 	switch (*fmt) {
136 	case 0:
137 	default:
138 	    abort();
139 	case 'E':
140 	    /* %E => krb5_error_code */
141 	    kerr = va_arg(args, krb5_error_code);
142 	    sprintf(tmpbuf, "%lu/", (unsigned long) kerr);
143 	    putstr(tmpbuf);
144 	    p = error_message(kerr);
145 	    putstr(p);
146 	    break;
147 	case 'm':
148 	    /* %m => errno value (int) */
149 	    /* Like syslog's %m except the errno value is passed in
150 	       rather than the current value.  */
151 	    err = va_arg(args, int);
152 	    putf("%d/", err);
153 	    p = strerror(err);
154 	    putstr(p);
155 	    break;
156 	case 'F':
157 	    /* %F => fd_set *, fd_set *, fd_set *, int */
158 	    rfds = va_arg(args, fd_set *);
159 	    wfds = va_arg(args, fd_set *);
160 	    xfds = va_arg(args, fd_set *);
161 	    maxfd = va_arg(args, int);
162 
163 	    for (i = 0; i < maxfd; i++) {
164 		int r = FD_ISSET(i, rfds);
165 		int w = wfds && FD_ISSET(i, wfds);
166 		int x = xfds && FD_ISSET(i, xfds);
167 		if (r || w || x) {
168 		    putf(" %d", i);
169 		    if (r)
170 			putstr("r");
171 		    if (w)
172 			putstr("w");
173 		    if (x)
174 			putstr("x");
175 		}
176 	    }
177 	    putstr(" ");
178 	    break;
179 	case 's':
180 	    /* %s => char * */
181 	    p = va_arg(args, const char *);
182 	    putstr(p);
183 	    break;
184 	case 't':
185 	    /* %t => struct timeval * */
186 	    tv = va_arg(args, struct timeval *);
187 	    if (tv) {
188 		sprintf(tmpbuf, "%ld.%06ld",
189 			(long) tv->tv_sec, (long) tv->tv_usec);
190 		putstr(tmpbuf);
191 	    } else
192 		putstr("never");
193 	    break;
194 	case 'd':
195 	    /* %d => int */
196 	    putf("%d", va_arg(args, int));
197 	    break;
198 	case 'p':
199 	    /* %p => pointer */
200 	    putf("%p", va_arg(args, void*));
201 	    break;
202 	case 'A':
203 	    /* %A => addrinfo */
204 	    ai = va_arg(args, struct addrinfo *);
205 	    if (0 != getnameinfo (ai->ai_addr, ai->ai_addrlen,
206 				  addrbuf, sizeof (addrbuf),
207 				  portbuf, sizeof (portbuf),
208 				  NI_NUMERICHOST | NI_NUMERICSERV))
209 		strcpy (addrbuf, "??"), strcpy (portbuf, "??");
210 	    sprintf(tmpbuf, "%s %s.%s",
211 		    (ai->ai_socktype == SOCK_DGRAM
212 		     ? "udp"
213 		     : ai->ai_socktype == SOCK_STREAM
214 		     ? "tcp"
215 		     : "???"),
216 		    addrbuf, portbuf);
217 	    putstr(tmpbuf);
218 	    break;
219 	case 'D':
220 	    /* %D => krb5_data * */
221 	    d = va_arg(args, krb5_data *);
222 	    p = d->data;
223 	    putstr("0x");
224 	    for (i = 0; i < d->length; i++) {
225 		putf("%.2x", *p++);
226 	    }
227 	    break;
228 	}
229     }
230     va_end(args);
231 
232     /* Solaris kerberos: use syslog() for debug output */
233     syslog(LOG_DEBUG, global_err_str);
234 }
235 
236 #else
237 #define dprint (void)
238 #define dperror(MSG) ((void)(MSG))
239 #endif
240 
241 static int
242 merge_addrlists (struct addrlist *dest, struct addrlist *src)
243 {
244     int err, i;
245 
246 #ifdef DEBUG
247     /*LINTED*/
248     dprint("merging addrlists:\n\tlist1: ");
249     for (i = 0; i < dest->naddrs; i++)
250 	/*LINTED*/
251 	dprint(" %A", dest->addrs[i]);
252     /*LINTED*/
253     dprint("\n\tlist2: ");
254     for (i = 0; i < src->naddrs; i++)
255 	/*LINTED*/
256 	dprint(" %A", src->addrs[i]);
257     /*LINTED*/
258     dprint("\n");
259 #endif
260 
261     err = krb5int_grow_addrlist (dest, src->naddrs);
262     if (err)
263 	return err;
264     for (i = 0; i < src->naddrs; i++) {
265 	dest->addrs[dest->naddrs + i] = src->addrs[i];
266 	src->addrs[i] = 0;
267     }
268     dest->naddrs += i;
269     src->naddrs = 0;
270 
271 #ifdef DEBUG
272     /*LINTED*/
273     dprint("\tout:   ");
274     for (i = 0; i < dest->naddrs; i++)
275 	/*LINTED*/
276 	dprint(" %A", dest->addrs[i]);
277     /*LINTED*/
278     dprint("\n");
279 #endif
280 
281     return 0;
282 }
283 
284 /*
285  * send the formatted request 'message' to a KDC for realm 'realm' and
286  * return the response (if any) in 'reply'.
287  *
288  * If the message is sent and a response is received, 0 is returned,
289  * otherwise an error code is returned.
290  *
291  * The storage for 'reply' is allocated and should be freed by the caller
292  * when finished.
293  */
294 
295 krb5_error_code
296 krb5_sendto_kdc (krb5_context context, const krb5_data *message,
297 		 const krb5_data *realm, krb5_data *reply,
298 		 int *use_master, int tcp_only)
299 {
300     krb5_error_code retval;
301     struct addrlist addrs;
302     int socktype1 = 0, socktype2 = 0, addr_used;
303 
304     /*
305      * find KDC location(s) for realm
306      */
307 
308     /*
309      * BUG: This code won't return "interesting" errors (e.g., out of mem,
310      * bad config file) from locate_kdc.  KRB5_REALM_CANT_RESOLVE can be
311      * ignored from one query of two, but if only one query is done, or
312      * both return that error, it should be returned to the caller.  Also,
313      * "interesting" errors (not KRB5_KDC_UNREACH) from sendto_{udp,tcp}
314      * should probably be returned as well.
315      */
316 
317     /*LINTED*/
318     dprint("krb5_sendto_kdc(%d@%p, \"%D\", use_master=%d, tcp_only=%d)\n",
319     /*LINTED*/
320 	   message->length, message->data, realm, *use_master, tcp_only);
321 
322     if (!tcp_only && context->udp_pref_limit < 0) {
323 	int tmp;
324 	retval = profile_get_integer(context->profile,
325 				     "libdefaults", "udp_preference_limit", 0,
326 				     DEFAULT_UDP_PREF_LIMIT, &tmp);
327 	if (retval)
328 	    return retval;
329 	if (tmp < 0)
330 	    tmp = DEFAULT_UDP_PREF_LIMIT;
331 	else if (tmp > HARD_UDP_LIMIT)
332 	    /* In the unlikely case that a *really* big value is
333 	       given, let 'em use as big as we think we can
334 	       support.  */
335 	    tmp = HARD_UDP_LIMIT;
336 	context->udp_pref_limit = tmp;
337     }
338 
339     retval = (*use_master ? KRB5_KDC_UNREACH : KRB5_REALM_UNKNOWN);
340 
341     if (tcp_only)
342 	socktype1 = SOCK_STREAM, socktype2 = 0;
343     else if (message->length <= context->udp_pref_limit)
344 	socktype1 = SOCK_DGRAM, socktype2 = SOCK_STREAM;
345     else
346 	socktype1 = SOCK_STREAM, socktype2 = SOCK_DGRAM;
347 
348     retval = krb5_locate_kdc(context, realm, &addrs, *use_master, socktype1, 0);
349     if (socktype2) {
350 	struct addrlist addrs2;
351 
352 	retval = krb5_locate_kdc(context, realm, &addrs2, *use_master,
353 				 socktype2, 0);
354 	if (retval == 0) {
355 	    (void) merge_addrlists(&addrs, &addrs2);
356 	    krb5int_free_addrlist(&addrs2);
357 	}
358     }
359     if (addrs.naddrs > 0) {
360         retval = krb5int_sendto (context, message, &addrs, reply, 0, 0,
361 		&addr_used);
362 	if (retval == 0) {
363             /*
364 	     * Set use_master to 1 if we ended up talking to a master when
365 	     * didn't explicitly request to
366 	     */
367 
368 	    if (*use_master == 0) {
369 	        struct addrlist addrs3;
370 		retval = krb5_locate_kdc(context, realm, &addrs3, 1,
371 					addrs.addrs[addr_used]->ai_socktype,
372 					addrs.addrs[addr_used]->ai_family);
373 		if (retval == 0) {
374 		    int i;
375 		    for (i = 0; i < addrs3.naddrs; i++) {
376 			if (addrs.addrs[addr_used]->ai_addrlen ==
377 			    addrs3.addrs[i]->ai_addrlen &&
378 			    memcmp(addrs.addrs[addr_used]->ai_addr,
379 				addrs3.addrs[i]->ai_addr,
380 				addrs.addrs[addr_used]->ai_addrlen) == 0) {
381 				*use_master = 1;
382 				break;
383 			}
384 		    }
385 		    krb5int_free_addrlist (&addrs3);
386 		}
387 	    }
388 	    krb5int_free_addrlist (&addrs);
389 	    return 0;
390 	}
391 	krb5int_free_addrlist (&addrs);
392     }
393     return retval;
394 }
395 
396 
397 /*
398  * Notes:
399  *
400  * Getting "connection refused" on a connected UDP socket causes
401  * select to indicate write capability on UNIX, but only shows up
402  * as an exception on Windows.  (I don't think any UNIX system flags
403  * the error as an exception.)  So we check for both, or make it
404  * system-specific.
405  *
406  * Always watch for responses from *any* of the servers.  Eventually
407  * fix the UDP code to do the same.
408  *
409  * To do:
410  * - TCP NOPUSH/CORK socket options?
411  * - error codes that don't suck
412  * - getsockopt(SO_ERROR) to check connect status
413  * - handle error RESPONSE_TOO_BIG from UDP server and use TCP
414  *   connections already in progress
415  */
416 
417 #include <cm.h>
418 
419 static const char *const state_strings[] = {
420     "INITIALIZING", "CONNECTING", "WRITING", "READING", "FAILED"
421 };
422 enum conn_states { INITIALIZING, CONNECTING, WRITING, READING, FAILED };
423 struct incoming_krb5_message {
424     size_t bufsizebytes_read;
425     size_t bufsize;
426     char *buf;
427     char *pos;
428     unsigned char bufsizebytes[4];
429     size_t n_left;
430 };
431 struct conn_state {
432     SOCKET fd;
433     krb5_error_code err;
434     enum conn_states state;
435     unsigned int is_udp : 1;
436     int (*service)(struct conn_state *, struct select_state *, int);
437     struct addrinfo *addr;
438     struct {
439 	struct {
440 	    sg_buf sgbuf[2];
441 	    sg_buf *sgp;
442 	    int sg_count;
443 	} out;
444 	struct incoming_krb5_message in;
445     } x;
446 };
447 
448 static int getcurtime (struct timeval *tvp)
449 {
450     if (gettimeofday(tvp, 0)) {
451 	dperror("gettimeofday");
452 	return errno;
453     }
454     return 0;
455 }
456 
457 /*
458  * Call select and return results.
459  * Input: interesting file descriptors and absolute timeout
460  * Output: select return value (-1 or num fds ready) and fd_sets
461  * Return: 0 (for i/o available or timeout) or error code.
462  */
463 krb5_error_code
464 krb5int_cm_call_select (const struct select_state *in,
465 			struct select_state *out, int *sret)
466 {
467     struct timeval now, *timo;
468     krb5_error_code e;
469 
470     *out = *in;
471     e = getcurtime(&now);
472     if (e)
473 	return e;
474     if (out->end_time.tv_sec == 0)
475 	timo = 0;
476     else {
477 	timo = &out->end_time;
478 	out->end_time.tv_sec -= now.tv_sec;
479 	out->end_time.tv_usec -= now.tv_usec;
480 	if (out->end_time.tv_usec < 0) {
481 	    out->end_time.tv_usec += 1000000;
482 	    out->end_time.tv_sec--;
483 	}
484 	if (out->end_time.tv_sec < 0) {
485 	    *sret = 0;
486 	    return 0;
487 	}
488     }
489     /*LINTED*/
490     dprint("selecting on max=%d sockets [%F] timeout %t\n",
491 	    /*LINTED*/
492 	   out->max, &out->rfds, &out->wfds, &out->xfds, out->max, timo);
493     *sret = select(out->max, &out->rfds, &out->wfds, &out->xfds, timo);
494     e = SOCKET_ERRNO;
495 
496 #ifdef DEBUG
497     /*LINTED*/
498     dprint("select returns %d", *sret);
499     if (*sret < 0)
500 	/*LINTED*/
501 	dprint(", error = %E\n", e);
502     else if (*sret == 0)
503 	/*LINTED*/
504 	dprint(" (timeout)\n");
505     else
506 	/*LINTED*/
507 	dprint(":%F\n", &out->rfds, &out->wfds, &out->xfds, out->max);
508 #endif
509 
510     if (*sret < 0)
511 	return e;
512     return 0;
513 }
514 
515 static int service_tcp_fd (struct conn_state *conn,
516 			   struct select_state *selstate, int ssflags);
517 static int service_udp_fd (struct conn_state *conn,
518 			   struct select_state *selstate, int ssflags);
519 
520 
521 static int
522 setup_connection (struct conn_state *state, struct addrinfo *ai,
523 		  const krb5_data *message, unsigned char *message_len_buf,
524 		  char **udpbufp)
525 {
526     state->state = INITIALIZING;
527     state->err = 0;
528     state->x.out.sgp = state->x.out.sgbuf;
529     state->addr = ai;
530     state->fd = INVALID_SOCKET;
531     SG_SET(&state->x.out.sgbuf[1], 0, 0);
532     if (ai->ai_socktype == SOCK_STREAM) {
533 	SG_SET(&state->x.out.sgbuf[0], message_len_buf, 4);
534 	SG_SET(&state->x.out.sgbuf[1], message->data, message->length);
535 	state->x.out.sg_count = 2;
536 	state->is_udp = 0;
537 	state->service = service_tcp_fd;
538     } else {
539 	SG_SET(&state->x.out.sgbuf[0], message->data, message->length);
540 	SG_SET(&state->x.out.sgbuf[1], 0, 0);
541 	state->x.out.sg_count = 1;
542 	state->is_udp = 1;
543 	state->service = service_udp_fd;
544 
545 	if (*udpbufp == 0) {
546 	    *udpbufp = malloc(krb5_max_dgram_size);
547 	    if (*udpbufp == 0) {
548 		dperror("malloc(krb5_max_dgram_size)");
549 		(void) closesocket(state->fd);
550 		state->fd = INVALID_SOCKET;
551 		state->state = FAILED;
552 		return 1;
553 	    }
554 	}
555 	state->x.in.buf = *udpbufp;
556 	state->x.in.bufsize = krb5_max_dgram_size;
557     }
558     return 0;
559 }
560 
561 static int
562 start_connection (struct conn_state *state, struct select_state *selstate)
563 {
564     int fd, e;
565     struct addrinfo *ai = state->addr;
566 
567     /*LINTED*/
568     dprint("start_connection(@%p)\ngetting %s socket in family %d...", state,
569 	   /*LINTED*/
570 	   ai->ai_socktype == SOCK_STREAM ? "stream" : "dgram", ai->ai_family);
571     fd = socket(ai->ai_family, ai->ai_socktype, 0);
572     if (fd == INVALID_SOCKET) {
573 	state->err = SOCKET_ERRNO;
574 	/*LINTED*/
575 	dprint("socket: %m creating with af %d\n", state->err, ai->ai_family);
576 	return -1;		/* try other hosts */
577     }
578     /* Make it non-blocking.  */
579     if (ai->ai_socktype == SOCK_STREAM) {
580 	static const int one = 1;
581 	static const struct linger lopt = { 0, 0 };
582 
583 	if (ioctlsocket(fd, FIONBIO, (const void *) &one))
584 	    dperror("sendto_kdc: ioctl(FIONBIO)");
585 	if (setsockopt(fd, SOL_SOCKET, SO_LINGER, &lopt, sizeof(lopt)))
586 	    dperror("sendto_kdc: setsockopt(SO_LINGER)");
587     }
588 
589     /* Start connecting to KDC.  */
590     /*LINTED*/
591     dprint(" fd %d; connecting to %A...\n", fd, ai);
592     e = connect(fd, ai->ai_addr, ai->ai_addrlen);
593     if (e != 0) {
594 	/*
595 	 * This is the path that should be followed for non-blocking
596 	 * connections.
597 	 */
598 	if (SOCKET_ERRNO == EINPROGRESS || SOCKET_ERRNO == EWOULDBLOCK) {
599 	    state->state = CONNECTING;
600 	} else {
601 	    /*LINTED*/
602 	    dprint("connect failed: %m\n", SOCKET_ERRNO);
603 	    state->err = SOCKET_ERRNO;
604 	    state->state = FAILED;
605 	    return -2;
606 	}
607     } else {
608 	/*
609 	 * Connect returned zero even though we tried to make it
610 	 * non-blocking, which should have caused it to return before
611 	 * finishing the connection.  Oh well.  Someone's network
612 	 * stack is broken, but if they gave us a connection, use it.
613 	 */
614 	state->state = WRITING;
615     }
616     /*LINTED*/
617     dprint("new state = %s\n", state_strings[state->state]);
618 
619     state->fd = fd;
620 
621     if (ai->ai_socktype == SOCK_DGRAM) {
622 	/* Send it now.  */
623 	int ret;
624 	sg_buf *sg = &state->x.out.sgbuf[0];
625 
626 	/*LINTED*/
627 	dprint("sending %d bytes on fd %d\n", SG_LEN(sg), state->fd);
628 	ret = send(state->fd, SG_BUF(sg), SG_LEN(sg), 0);
629 	if (ret != SG_LEN(sg)) {
630 	    dperror("sendto");
631 	    (void) closesocket(state->fd);
632 	    state->fd = INVALID_SOCKET;
633 	    state->state = FAILED;
634 	    return -3;
635 	} else {
636 	    state->state = READING;
637 	}
638     }
639 
640     FD_SET(state->fd, &selstate->rfds);
641     if (state->state == CONNECTING || state->state == WRITING)
642 	FD_SET(state->fd, &selstate->wfds);
643     FD_SET(state->fd, &selstate->xfds);
644     if (selstate->max <= state->fd)
645 	selstate->max = state->fd + 1;
646     selstate->nfds++;
647 
648     /*LINTED*/
649     dprint("new select vectors: %F\n",
650 	   /*LINTED*/
651 	   &selstate->rfds, &selstate->wfds, &selstate->xfds, selstate->max);
652 
653     return 0;
654 }
655 
656 /* Return 0 if we sent something, non-0 otherwise.
657    If 0 is returned, the caller should delay waiting for a response.
658    Otherwise, the caller should immediately move on to process the
659    next connection.  */
660 static int
661 maybe_send (struct conn_state *conn, struct select_state *selstate)
662 {
663     sg_buf *sg;
664 
665     /*LINTED*/
666     dprint("maybe_send(@%p) state=%s type=%s\n", conn,
667 	   /*LINTED*/
668 	   state_strings[conn->state], conn->is_udp ? "udp" : "tcp");
669     if (conn->state == INITIALIZING)
670 	return start_connection(conn, selstate);
671 
672     /* Did we already shut down this channel?  */
673     if (conn->state == FAILED) {
674 	dprint("connection already closed\n");
675 	return -1;
676     }
677 
678     if (conn->addr->ai_socktype == SOCK_STREAM) {
679 	dprint("skipping stream socket\n");
680 	/* The select callback will handle flushing any data we
681 	   haven't written yet, and we only write it once.  */
682 	return -1;
683     }
684 
685     /* UDP - Send message, possibly for the first time, possibly a
686        retransmit if a previous attempt timed out.  */
687     sg = &conn->x.out.sgbuf[0];
688     /*LINTED*/
689     dprint("sending %d bytes on fd %d\n", SG_LEN(sg), conn->fd);
690     if (send(conn->fd, SG_BUF(sg), SG_LEN(sg), 0) != SG_LEN(sg)) {
691 	dperror("send");
692 	/* Keep connection alive, we'll try again next pass.
693 
694 	   Is this likely to catch any errors we didn't get from the
695 	   select callbacks?  */
696 	return -1;
697     }
698     /* Yay, it worked.  */
699     return 0;
700 }
701 
702 static void
703 kill_conn(struct conn_state *conn, struct select_state *selstate, int err)
704 {
705     conn->state = FAILED;
706     shutdown(conn->fd, SHUTDOWN_BOTH);
707     FD_CLR(conn->fd, &selstate->rfds);
708     FD_CLR(conn->fd, &selstate->wfds);
709     FD_CLR(conn->fd, &selstate->xfds);
710     conn->err = err;
711     /*LINTED*/
712     dprint("abandoning connection %d: %m\n", conn->fd, err);
713     /* Fix up max fd for next select call.  */
714     if (selstate->max == 1 + conn->fd) {
715 	while (selstate->max > 0
716 	       && ! FD_ISSET(selstate->max-1, &selstate->rfds)
717 	       && ! FD_ISSET(selstate->max-1, &selstate->wfds)
718 	       && ! FD_ISSET(selstate->max-1, &selstate->xfds))
719 	    selstate->max--;
720 	/*LINTED*/
721 	dprint("new max_fd + 1 is %d\n", selstate->max);
722     }
723     selstate->nfds--;
724 }
725 
726 /* Return nonzero only if we're finished and the caller should exit
727    its loop.  This happens in two cases: We have a complete message,
728    or the socket has closed and no others are open.  */
729 
730 static int
731 service_tcp_fd (struct conn_state *conn, struct select_state *selstate,
732 		int ssflags)
733 {
734     krb5_error_code e = 0;
735     int nwritten, nread;
736 
737     if (!(ssflags & (SSF_READ|SSF_WRITE|SSF_EXCEPTION)))
738 	abort();
739     switch (conn->state) {
740 	SOCKET_WRITEV_TEMP tmp;
741 
742     case CONNECTING:
743 	if (ssflags & SSF_READ) {
744 	    /* Bad -- the KDC shouldn't be sending to us first.  */
745 	    e = EINVAL /* ?? */;
746 	kill_conn:
747 	    kill_conn(conn, selstate, e);
748 	    if (e == EINVAL) {
749 		closesocket(conn->fd);
750 		conn->fd = INVALID_SOCKET;
751 	    }
752 	    return e == 0;
753 	}
754 	if (ssflags & SSF_EXCEPTION) {
755 	handle_exception:
756 	    e = 1;		/* need only be non-zero */
757 	    goto kill_conn;
758 	}
759 
760 	/*
761 	 * Connect finished -- but did it succeed or fail?
762 	 * UNIX sets can_write if failed.
763 	 * Try writing, I guess, and find out.
764 	 */
765 	conn->state = WRITING;
766 	goto try_writing;
767 
768     case WRITING:
769 	if (ssflags & SSF_READ) {
770 	    e = E2BIG;
771 	    /* Bad -- the KDC shouldn't be sending anything yet.  */
772 	    goto kill_conn;
773 	}
774 	if (ssflags & SSF_EXCEPTION)
775 	    goto handle_exception;
776 
777     try_writing:
778 	/*LINTED*/
779 	dprint("trying to writev %d (%d bytes) to fd %d\n",
780 		/*LINTED*/
781 	       conn->x.out.sg_count,
782 	       ((conn->x.out.sg_count == 2 ? SG_LEN(&conn->x.out.sgp[1]) : 0)
783 		/*LINTED*/
784 		+ SG_LEN(&conn->x.out.sgp[0])),
785 	       conn->fd);
786 	nwritten = SOCKET_WRITEV(conn->fd, conn->x.out.sgp,
787 				 conn->x.out.sg_count, tmp);
788 	if (nwritten < 0) {
789 	    e = SOCKET_ERRNO;
790 	    /*LINTED*/
791 	    dprint("failed: %m\n", e);
792 	    goto kill_conn;
793 	}
794 	/*LINTED*/
795 	dprint("wrote %d bytes\n", nwritten);
796 	while (nwritten) {
797 	    sg_buf *sgp = conn->x.out.sgp;
798 	    if (nwritten < SG_LEN(sgp)) {
799 		/*LINTED*/
800 		SG_ADVANCE(sgp, nwritten);
801 		nwritten = 0;
802 	    } else {
803 		nwritten -= SG_LEN(conn->x.out.sgp);
804 		conn->x.out.sgp++;
805 		conn->x.out.sg_count--;
806 		if (conn->x.out.sg_count == 0 && nwritten != 0)
807 		    /* Wrote more than we wanted to?  */
808 		    abort();
809 	    }
810 	}
811 	if (conn->x.out.sg_count == 0) {
812 	    /* Done writing, switch to reading.  */
813 	    /* Don't call shutdown at this point because
814 	     * some implementations cannot deal with half-closed connections.*/
815 	    FD_CLR(conn->fd, &selstate->wfds);
816 	    /* Q: How do we detect failures to send the remaining data
817 	       to the remote side, since we're in non-blocking mode?
818 	       Will we always get errors on the reading side?  */
819 	    /*LINTED*/
820 	    dprint("switching fd %d to READING\n", conn->fd);
821 	    conn->state = READING;
822 	    conn->x.in.bufsizebytes_read = 0;
823 	    conn->x.in.bufsize = 0;
824 	    conn->x.in.buf = 0;
825 	    conn->x.in.pos = 0;
826 	    conn->x.in.n_left = 0;
827 	}
828 	return 0;
829 
830     case READING:
831 	if (ssflags & SSF_EXCEPTION) {
832 	    if (conn->x.in.buf) {
833 		free(conn->x.in.buf);
834 		conn->x.in.buf = 0;
835 	    }
836 	    goto handle_exception;
837 	}
838 
839 	if (conn->x.in.bufsizebytes_read == 4) {
840 	    /* Reading data.  */
841 	    /*LINTED*/
842 	    dprint("reading %d bytes of data from fd %d\n",
843 		   (int) conn->x.in.n_left, conn->fd);
844 	    nread = SOCKET_READ(conn->fd, conn->x.in.pos, conn->x.in.n_left);
845 	    if (nread <= 0) {
846 		e = nread ? SOCKET_ERRNO : ECONNRESET;
847 		free(conn->x.in.buf);
848 		conn->x.in.buf = 0;
849 		goto kill_conn;
850 	    }
851 	    conn->x.in.n_left -= nread;
852 	    conn->x.in.pos += nread;
853 	    if ((long)conn->x.in.n_left <= 0) {
854 		/* We win!  */
855 		return 1;
856 	    }
857 	} else {
858 	    /* Reading length.  */
859 	    nread = SOCKET_READ(conn->fd,
860 				conn->x.in.bufsizebytes + conn->x.in.bufsizebytes_read,
861 				4 - conn->x.in.bufsizebytes_read);
862 	    if (nread < 0) {
863 		e = SOCKET_ERRNO;
864 		goto kill_conn;
865 	    }
866 	    conn->x.in.bufsizebytes_read += nread;
867 	    if (conn->x.in.bufsizebytes_read == 4) {
868 		unsigned long len;
869 		len = conn->x.in.bufsizebytes[0];
870 		len = (len << 8) + conn->x.in.bufsizebytes[1];
871 		len = (len << 8) + conn->x.in.bufsizebytes[2];
872 		len = (len << 8) + conn->x.in.bufsizebytes[3];
873 		/*LINTED*/
874 		dprint("received length on fd %d is %d\n", conn->fd, (int)len);
875 		/* Arbitrary 1M cap.  */
876 		if (len > 1 * 1024 * 1024) {
877 		    e = E2BIG;
878 		    goto kill_conn;
879 		}
880 		conn->x.in.bufsize = conn->x.in.n_left = len;
881 		conn->x.in.buf = conn->x.in.pos = malloc(len);
882 		/*LINTED*/
883 		dprint("allocated %d byte buffer at %p\n", (int) len,
884 		       conn->x.in.buf);
885 		if (conn->x.in.buf == 0) {
886 		    /* allocation failure */
887 		    e = errno;
888 		    goto kill_conn;
889 		}
890 	    }
891 	}
892 	break;
893 
894     default:
895 	abort();
896     }
897     return 0;
898 }
899 
900 static int
901 service_udp_fd(struct conn_state *conn, struct select_state *selstate,
902 	       int ssflags)
903 {
904     int nread;
905 
906     if (!(ssflags & (SSF_READ|SSF_EXCEPTION)))
907 	abort();
908     if (conn->state != READING)
909 	abort();
910 
911     nread = recv(conn->fd, conn->x.in.buf, conn->x.in.bufsize, 0);
912     if (nread < 0) {
913 	kill_conn(conn, selstate, SOCKET_ERRNO);
914 	return 0;
915     }
916     conn->x.in.pos = conn->x.in.buf + nread;
917     return 1;
918 }
919 
920 static int
921 service_fds (struct select_state *selstate,
922 	     struct conn_state *conns, size_t n_conns, int *winning_conn)
923 {
924     int e, selret;
925     struct select_state sel_results;
926 
927     e = 0;
928     while (selstate->nfds > 0
929 	   && (e = krb5int_cm_call_select(selstate, &sel_results, &selret)) == 0) {
930 	int i;
931 
932 	/*LINTED*/
933 	dprint("service_fds examining results, selret=%d\n", selret);
934 
935 	if (selret == 0)
936 	    /* Timeout, return to caller.  */
937 	    return 0;
938 
939 	/* Got something on a socket, process it.  */
940 	for (i = 0; i <= selstate->max && selret > 0 && i < n_conns; i++) {
941 	    int ssflags;
942 
943 	    if (conns[i].fd == INVALID_SOCKET)
944 		continue;
945 	    ssflags = 0;
946 	    if (FD_ISSET(conns[i].fd, &sel_results.rfds))
947 		ssflags |= SSF_READ, selret--;
948 	    if (FD_ISSET(conns[i].fd, &sel_results.wfds))
949 		ssflags |= SSF_WRITE, selret--;
950 	    if (FD_ISSET(conns[i].fd, &sel_results.xfds))
951 		ssflags |= SSF_EXCEPTION, selret--;
952 	    if (!ssflags)
953 		continue;
954 
955 	    /*LINTED*/
956 	    dprint("handling flags '%s%s%s' on fd %d (%A) in state %s\n",
957 		    /*LINTED*/
958 		   (ssflags & SSF_READ) ? "r" : "",
959 		    /*LINTED*/
960 		   (ssflags & SSF_WRITE) ? "w" : "",
961 		    /*LINTED*/
962 		   (ssflags & SSF_EXCEPTION) ? "x" : "",
963 		    /*LINTED*/
964 		   conns[i].fd, conns[i].addr,
965 		   state_strings[(int) conns[i].state]);
966 
967 	    if (conns[i].service (&conns[i], selstate, ssflags)) {
968 		dprint("fd service routine says we're done\n");
969 		*winning_conn = i;
970 		return 1;
971 	    }
972 	}
973     }
974     if (e != 0) {
975 	/*LINTED*/
976 	dprint("select returned %m\n", e);
977 	*winning_conn = -1;
978 	return 1;
979     }
980     return 0;
981 }
982 
983 /*
984  * Current worst-case timeout behavior:
985  *
986  * First pass, 1s per udp or tcp server, plus 2s at end.
987  * Second pass, 1s per udp server, plus 4s.
988  * Third pass, 1s per udp server, plus 8s.
989  * Fourth => 16s, etc.
990  *
991  * Restated:
992  * Per UDP server, 1s per pass.
993  * Per TCP server, 1s.
994  * Backoff delay, 2**(P+1) - 2, where P is total number of passes.
995  *
996  * Total = 2**(P+1) + U*P + T - 2.
997  *
998  * If P=3, Total = 3*U + T + 14.
999  * If P=4, Total = 4*U + T + 30.
1000  *
1001  * Note that if you try to reach two ports (e.g., both 88 and 750) on
1002  * one server, it counts as two.
1003  */
1004 
1005 krb5_error_code
1006 /*ARGSUSED*/
1007 krb5int_sendto (krb5_context context, const krb5_data *message,
1008 		const struct addrlist *addrs, krb5_data *reply,
1009 		struct sockaddr_storage *localaddr, socklen_t *localaddrlen,
1010 		int *addr_used)
1011 {
1012     int i, pass;
1013     int delay_this_pass = 2;
1014     krb5_error_code retval;
1015     struct conn_state *conns;
1016     size_t n_conns, host;
1017     struct select_state select_state;
1018     struct timeval now;
1019     int winning_conn = -1, e = 0;
1020     unsigned char message_len_buf[4];
1021     char *udpbuf = 0;
1022 
1023     /*LINTED*/
1024     dprint("krb5int_sendto(message=%d@%p)\n", message->length, message->data);
1025 
1026     reply->data = 0;
1027     reply->length = 0;
1028 
1029     n_conns = addrs->naddrs;
1030     conns = malloc(n_conns * sizeof(struct conn_state));
1031     if (conns == NULL) {
1032 	return ENOMEM;
1033     }
1034     memset(conns, 0, n_conns * sizeof(conns[i]));
1035     for (i = 0; i < n_conns; i++) {
1036 	conns[i].fd = INVALID_SOCKET;
1037     }
1038 
1039     select_state.max = 0;
1040     select_state.nfds = 0;
1041     FD_ZERO(&select_state.rfds);
1042     FD_ZERO(&select_state.wfds);
1043     FD_ZERO(&select_state.xfds);
1044 
1045     message_len_buf[0] = (message->length >> 24) & 0xff;
1046     message_len_buf[1] = (message->length >> 16) & 0xff;
1047     message_len_buf[2] = (message->length >>  8) & 0xff;
1048     message_len_buf[3] =  message->length        & 0xff;
1049 
1050     /* Set up connections.  */
1051     for (host = 0; host < n_conns; host++) {
1052 	retval = setup_connection (&conns[host], addrs->addrs[host],
1053 				   message, message_len_buf, &udpbuf);
1054 	if (retval)
1055 	    continue;
1056     }
1057     for (pass = 0; pass < MAX_PASS; pass++) {
1058 	/* Possible optimization: Make only one pass if TCP only.
1059 	   Stop making passes if all UDP ports are closed down.  */
1060 	/*LINTED*/
1061 	dprint("pass %d delay=%d\n", pass, delay_this_pass);
1062 	for (host = 0; host < n_conns; host++) {
1063 	    /*LINTED*/
1064 	    dprint("host %d\n", host);
1065 
1066 	    /* Send to the host, wait for a response, then move on. */
1067 	    if (maybe_send(&conns[host], &select_state))
1068 		continue;
1069 
1070 	    retval = getcurtime(&now);
1071 	    if (retval)
1072 		goto egress;
1073 	    select_state.end_time = now;
1074 	    select_state.end_time.tv_sec += 1;
1075 	    e = service_fds(&select_state, conns, host+1, &winning_conn);
1076 	    if (e)
1077 		break;
1078 	    if (pass > 0 && select_state.nfds == 0)
1079 		/*
1080 		 * After the first pass, if we close all fds, break
1081 		 * out right away.  During the first pass, it's okay,
1082 		 * we're probably about to open another connection.
1083 		 */
1084 		break;
1085 	}
1086 	if (e)
1087 	    break;
1088 	retval = getcurtime(&now);
1089 	if (retval)
1090 	    goto egress;
1091 	/* Possible optimization: Find a way to integrate this select
1092 	   call with the last one from the above loop, if the loop
1093 	   actually calls select.  */
1094 	select_state.end_time.tv_sec += delay_this_pass;
1095 	e = service_fds(&select_state, conns, host+1, &winning_conn);
1096 	if (e)
1097 	    break;
1098 	if (select_state.nfds == 0)
1099 	    break;
1100 	delay_this_pass *= 2;
1101     }
1102 
1103     if (select_state.nfds == 0) {
1104 	/* No addresses?  */
1105 	retval = KRB5_KDC_UNREACH;
1106 	goto egress;
1107     }
1108     if (e == 0 || winning_conn < 0) {
1109 	retval = KRB5_KDC_UNREACH;
1110 	goto egress;
1111     }
1112     /* Success!  */
1113     reply->data = conns[winning_conn].x.in.buf;
1114     reply->length = (conns[winning_conn].x.in.pos
1115 		     - conns[winning_conn].x.in.buf);
1116     /*LINTED*/
1117     dprint("returning %d bytes in buffer %p (winning_conn=%d)\n",
1118 	(int) reply->length, reply->data, winning_conn);
1119     retval = 0;
1120     conns[winning_conn].x.in.buf = 0;
1121     if (addr_used)
1122 	    *addr_used = winning_conn;
1123     if (localaddr != 0 && localaddrlen != 0 && *localaddrlen > 0)
1124 	(void) getsockname(conns[winning_conn].fd, (struct sockaddr *)localaddr,
1125 			   localaddrlen);
1126 egress:
1127     for (i = 0; i < n_conns; i++) {
1128 	if (conns[i].fd != INVALID_SOCKET)
1129 	    close(conns[i].fd);
1130 	if (conns[i].state == READING
1131 	    && conns[i].x.in.buf != 0
1132 	    && conns[i].x.in.buf != udpbuf)
1133 	    free(conns[i].x.in.buf);
1134     }
1135     free(conns);
1136     if (reply->data != udpbuf)
1137 	free(udpbuf);
1138     return retval;
1139 }
1140