xref: /illumos-gate/usr/src/stand/lib/inet/ipv4.c (revision 3db86aab)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  *
26  * ipv4.c, Code implementing the IPv4 internet protocol.
27  */
28 
29 #pragma ident	"%Z%%M%	%I%	%E% SMI"
30 
31 #include <sys/types.h>
32 #include <socket_impl.h>
33 #include <socket_inet.h>
34 #include <sys/sysmacros.h>
35 #include <sys/socket.h>
36 #include <netinet/in_systm.h>
37 #include <netinet/in.h>
38 #include <netinet/ip.h>
39 #include <netinet/udp.h>
40 #include <net/if_arp.h>
41 #include <sys/promif.h>
42 #include <sys/bootconf.h>
43 #include <sys/fcntl.h>
44 #include <sys/salib.h>
45 
46 #include "icmp4.h"
47 #include "ipv4.h"
48 #include "ipv4_impl.h"
49 #include "mac.h"
50 #include "mac_impl.h"
51 #include "v4_sum_impl.h"
52 #include <sys/bootdebug.h>
53 
54 static struct ip_frag	fragment[FRAG_MAX];	/* ip fragment buffers */
55 static int		fragments;		/* Number of fragments */
56 static uint8_t		ttl = MAXTTL;		/* IP ttl */
57 static struct in_addr	myip;			/* our network-order IP addr */
58 static struct in_addr	mynet;			/* net-order netaddr */
59 static struct in_addr	netmask =
60 	{ 0xff, 0xff, 0xff, 0xff };		/* our network-order netmask */
61 static boolean_t	netmask_set = B_FALSE;	/* has anyone set netmask? */
62 static struct in_addr	defaultrouter;		/* net-order defaultrouter */
63 static int		promiscuous;		/* promiscuous mode */
64 static struct routing table[IPV4_ROUTE_TABLE_SIZE];
65 
66 static uint16_t	g_ip_id;
67 
68 #ifdef	DEBUG
69 #define	FRAG_DEBUG
70 #endif	/* DEBUG */
71 
72 #ifdef FRAG_DEBUG
73 /*
74  * display the fragment list. For debugging purposes.
75  */
76 static void
77 frag_disp(uint16_t size)
78 {
79 	int	i;
80 	uint_t	total = 0;
81 
82 	printf("Dumping fragment info: (%d)\n\n", fragments);
83 	printf("More:\tOffset:\tDatap:\t\tIPid:\t\tIPlen:\tIPhlen:\n");
84 	for (i = 0; i < FRAG_MAX; i++) {
85 		if (fragment[i].mp == NULL)
86 			continue;
87 		printf("%d\t%d\t0x%x\t%d\t\t%d\t%d\n", fragment[i].more,
88 		    fragment[i].offset, fragment[i].mp->b_rptr,
89 		    fragment[i].ipid, fragment[i].iplen, fragment[i].iphlen);
90 		total += (fragment[i].iplen - fragment[i].iphlen);
91 	}
92 	printf("Total length is: %d. It should be: %d\n\n", total, size);
93 }
94 #endif /* FRAG_DEBUG */
95 
96 /*
97  * This function returns index of fragment 0 of the current fragmented DGRAM
98  * (which would contain the transport header). Return the fragment number
99  * for success, -1 if we don't yet have the first fragment.
100  */
101 static int
102 frag_first(void)
103 {
104 	int		i;
105 
106 	if (fragments == 0)
107 		return (-1);
108 
109 	for (i = 0; i < FRAG_MAX; i++) {
110 		if (fragment[i].mp != NULL && fragment[i].offset == 0)
111 			return (i);
112 	}
113 	return (-1);
114 }
115 
116 /*
117  * This function returns index of the last fragment of the current DGRAM.
118  * Returns the fragment number for success, -1 if we don't yet have the
119  * last fragment.
120  */
121 static int
122 frag_last(void)
123 {
124 	int		i;
125 
126 	if (fragments == 0)
127 		return (-1);
128 
129 	for (i = 0; i < FRAG_MAX; i++) {
130 		if (fragment[i].mp != NULL && !fragment[i].more)
131 			return (i);
132 	}
133 	return (-1);
134 }
135 
136 /*
137  * This function adds a fragment to the current pkt fragment list. Returns
138  * FRAG_NOSLOTS if there are no more slots, FRAG_DUP if the fragment is
139  * a duplicate, or FRAG_SUCCESS if it is successful.
140  */
141 static int
142 frag_add(int16_t offset, mblk_t *mp, uint16_t ipid,
143     int16_t iplen, int16_t iphlen, uint8_t ipp)
144 {
145 	int	i;
146 	int16_t	true_offset = IPV4_OFFSET(offset);
147 
148 	/* first pass - look for duplicates */
149 	for (i = 0; i < FRAG_MAX; i++) {
150 		if (fragment[i].mp != NULL &&
151 		    fragment[i].offset == true_offset)
152 			return (FRAG_DUP);
153 	}
154 
155 	/* second pass - fill in empty slot */
156 	for (i = 0; i < FRAG_MAX; i++) {
157 		if (fragment[i].mp == NULL) {
158 			fragment[i].more = (offset & IP_MF);
159 			fragment[i].offset = true_offset;
160 			fragment[i].mp = mp;
161 			fragment[i].ipid = ipid;
162 			fragment[i].iplen = iplen;
163 			fragment[i].iphlen = iphlen;
164 			fragment[i].ipp = ipp;
165 			fragments++;
166 			return (FRAG_SUCCESS);
167 		}
168 	}
169 	return (FRAG_NOSLOTS);
170 }
171 
172 /*
173  * Nuke a fragment.
174  */
175 static void
176 frag_free(int index)
177 {
178 	if (fragment[index].mp != NULL) {
179 		freeb(fragment[index].mp);
180 		fragments--;
181 	}
182 	bzero((caddr_t)&fragment[index], sizeof (struct ip_frag));
183 }
184 
185 /*
186  * zero the frag list.
187  */
188 static void
189 frag_flush(void)
190 {
191 	int i;
192 
193 	for (i = 0; i < FRAG_MAX; i++)
194 		frag_free(i);
195 
196 	fragments = 0;
197 }
198 
199 /*
200  * Analyze the fragment list - see if we captured all our fragments.
201  *
202  * Returns TRUE if we've got all the fragments, and FALSE if we don't.
203  */
204 static int
205 frag_chk(void)
206 {
207 	int		i, first_frag, last_frag;
208 	int16_t		actual, total;
209 	uint16_t	ip_id;
210 	uint8_t		ipp;
211 
212 	if (fragments == 0 || (first_frag = frag_first()) < 0 ||
213 	    (last_frag = frag_last()) < 0)
214 		return (FALSE);
215 
216 	/*
217 	 * Validate the ipid's of our fragments - nuke those that don't
218 	 * match the id of the first fragment or don't match the IP
219 	 * protocol of the first fragment.
220 	 */
221 	ip_id = fragment[first_frag].ipid;
222 	ipp = fragment[first_frag].ipp;
223 	for (i = 0; i < FRAG_MAX; i++) {
224 		if (fragment[i].mp != NULL && ip_id != fragment[i].ipid &&
225 			fragment[i].ipp != ipp) {
226 #ifdef FRAG_DEBUG
227 			printf("ipv4: Frag id mismatch: %x != %x\n",
228 			    fragment[i].ipid, ip_id);
229 #endif /* FRAG_DEBUG */
230 			frag_free(i);
231 		}
232 	}
233 
234 	if (frag_last() < 0)
235 		return (FALSE);
236 
237 	total = fragment[last_frag].offset + fragment[last_frag].iplen -
238 	    fragment[last_frag].iphlen;
239 
240 	for (i = 0, actual = 0; i < FRAG_MAX; i++)
241 		actual += (fragment[i].iplen - fragment[i].iphlen);
242 
243 #ifdef FRAG_DEBUG
244 	frag_disp(total);
245 #endif /* FRAG_DEBUG */
246 
247 	return (total == actual);
248 }
249 
250 /*
251  * Load the assembled fragments into igp. Returns 0 for success, nonzero
252  * otherwise.
253  */
254 static int
255 frag_load(struct inetgram *igp)
256 {
257 	int	i;
258 	int16_t	len;
259 	uint_t	total_len;
260 	boolean_t first_frag = B_FALSE;
261 	mblk_t *mp;
262 	struct ip *iph;
263 	int first_iph_len;
264 
265 	if (fragments == 0)
266 		return (ENOENT);
267 
268 	mp = igp->igm_mp;
269 	/* Get the IP header length of the first fragment. */
270 	i = frag_first();
271 	assert(i >= 0);
272 	first_iph_len = fragment[i].iphlen;
273 	for (i = 0, len = 0, total_len = 0; i < FRAG_MAX; i++) {
274 		if (fragment[i].mp != NULL) {
275 			/*
276 			 * Copy just the data (omit the ip header of all
277 			 * fragments except the first one which contains
278 			 * all the info...)
279 			 */
280 			if (fragment[i].offset == 0) {
281 				len = fragment[i].iplen;
282 				first_frag = B_TRUE;
283 			} else {
284 				len = fragment[i].iplen - fragment[i].iphlen;
285 			}
286 			total_len += len;
287 			if (total_len > mp->b_size)
288 				return (E2BIG);
289 			if (first_frag) {
290 				bcopy((caddr_t)(fragment[i].mp->b_rptr),
291 				    (caddr_t)mp->b_rptr, len);
292 				first_frag = B_FALSE;
293 			} else {
294 				bcopy((caddr_t)(fragment[i].mp->b_rptr +
295 				    fragment[i].iphlen),
296 				    (caddr_t)(mp->b_rptr + first_iph_len +
297 				    fragment[i].offset), len);
298 			}
299 			mp->b_wptr += len;
300 		}
301 	}
302 	/* Fix the total length in the IP header. */
303 	iph = (struct ip *)mp->b_rptr;
304 	iph->ip_len = htons(total_len);
305 	return (0);
306 }
307 
308 /*
309  * Locate a routing table entry based upon arguments. IP addresses expected
310  * in network order. Returns index for success, -1 if entry not found.
311  */
312 static int
313 find_route(uint8_t *flagp, struct in_addr *destp, struct in_addr *gatewayp)
314 {
315 	int i, table_entry = -1;
316 
317 	for (i = 0; table_entry == -1 && i < IPV4_ROUTE_TABLE_SIZE; i++) {
318 		if (flagp != NULL) {
319 			if (*flagp & table[i].flag)
320 				table_entry = i;
321 		}
322 		if (destp != NULL) {
323 			if (destp->s_addr == table[i].dest.s_addr)
324 				table_entry = i;
325 			else
326 				table_entry = -1;
327 		}
328 		if (gatewayp != NULL) {
329 			if (gatewayp->s_addr == table[i].gateway.s_addr)
330 				table_entry = i;
331 			else
332 				table_entry = -1;
333 		}
334 	}
335 	return (table_entry);
336 }
337 
338 /*
339  * ADD or DEL a routing table entry. Returns 0 for success, -1 and errno
340  * otherwise. IP addresses are expected in network order.
341  */
342 int
343 ipv4_route(int cmd, uint8_t flag, struct in_addr *destp,
344     struct in_addr *gatewayp)
345 {
346 	static	int	routing_table_initialized;
347 	int		index;
348 	uint8_t 	tmp_flag;
349 
350 	if (gatewayp == NULL) {
351 		errno = EINVAL;
352 		return (-1);
353 	}
354 
355 	/* initialize routing table */
356 	if (routing_table_initialized == 0) {
357 		for (index = 0; index < IPV4_ROUTE_TABLE_SIZE; index++)
358 			table[index].flag = RT_UNUSED;
359 		routing_table_initialized = 1;
360 	}
361 
362 	switch (cmd) {
363 	case IPV4_ADD_ROUTE:
364 		tmp_flag = (uint8_t)RT_UNUSED;
365 		if ((index = find_route(&tmp_flag, NULL, NULL)) == -1) {
366 			dprintf("ipv4_route: routing table full.\n");
367 			errno = ENOSPC;
368 			return (-1);
369 		}
370 		table[index].flag = flag;
371 		if (destp != NULL)
372 			table[index].dest.s_addr = destp->s_addr;
373 		else
374 			table[index].dest.s_addr = htonl(INADDR_ANY);
375 		table[index].gateway.s_addr = gatewayp->s_addr;
376 		break;
377 	case IPV4_BAD_ROUTE:
378 		/* FALLTHRU */
379 	case IPV4_DEL_ROUTE:
380 		if ((index = find_route(&flag, destp, gatewayp)) == -1) {
381 			dprintf("ipv4_route: No such routing entry.\n");
382 			errno = ENOENT;
383 			return (-1);
384 		}
385 		if (cmd == IPV4_DEL_ROUTE) {
386 			table[index].flag = RT_UNUSED;
387 			table[index].dest.s_addr = htonl(INADDR_ANY);
388 			table[index].gateway.s_addr = htonl(INADDR_ANY);
389 		} else
390 			table[index].flag = RT_NG;
391 	default:
392 		errno = EINVAL;
393 		return (-1);
394 	}
395 	return (0);
396 }
397 
398 /*
399  * Return gateway to destination. Returns gateway IP address in network order
400  * for success, NULL if no route to destination exists.
401  */
402 struct in_addr *
403 ipv4_get_route(uint8_t flag, struct in_addr *destp, struct in_addr *gatewayp)
404 {
405 	int index;
406 	if ((index = find_route(&flag, destp, gatewayp)) == -1)
407 		return (NULL);
408 	return (&table[index].gateway);
409 }
410 
411 /*
412  * Initialize the IPv4 generic parts of the socket, as well as the routing
413  * table.
414  */
415 void
416 ipv4_socket_init(struct inetboot_socket *isp)
417 {
418 	isp->input[NETWORK_LVL] = ipv4_input;
419 	isp->output[NETWORK_LVL] = ipv4_output;
420 	isp->close[NETWORK_LVL] = NULL;
421 	isp->headerlen[NETWORK_LVL] = ipv4_header_len;
422 }
423 
424 /*
425  * Initialize a raw ipv4 socket.
426  */
427 void
428 ipv4_raw_socket(struct inetboot_socket *isp, uint8_t proto)
429 {
430 	isp->type = INETBOOT_RAW;
431 	if (proto == 0)
432 		isp->proto = IPPROTO_IP;
433 	else
434 		isp->proto = proto;
435 	isp->input[TRANSPORT_LVL] = NULL;
436 	isp->output[TRANSPORT_LVL] = NULL;
437 	isp->headerlen[TRANSPORT_LVL] = NULL;
438 	isp->ports = NULL;
439 }
440 
441 /*
442  * Return the size of an IPv4 header (no options)
443  */
444 /* ARGSUSED */
445 int
446 ipv4_header_len(struct inetgram *igm)
447 {
448 	return (sizeof (struct ip));
449 }
450 
451 /*
452  * Set our source address.
453  * Argument is assumed to be host order.
454  */
455 void
456 ipv4_setipaddr(struct in_addr *ip)
457 {
458 	myip.s_addr = htonl(ip->s_addr);
459 }
460 
461 /*
462  * Returns our current source address in host order.
463  */
464 void
465 ipv4_getipaddr(struct in_addr *ip)
466 {
467 	ip->s_addr = ntohl(myip.s_addr);
468 }
469 
470 /*
471  * Set our netmask.
472  * Argument is assumed to be host order.
473  */
474 void
475 ipv4_setnetmask(struct in_addr *ip)
476 {
477 	netmask_set = B_TRUE;
478 	netmask.s_addr = htonl(ip->s_addr);
479 	mynet.s_addr = netmask.s_addr & myip.s_addr; /* implicit */
480 }
481 
482 void
483 ipv4_getnetid(struct in_addr *my_netid)
484 {
485 	struct in_addr my_netmask;
486 	if (mynet.s_addr != 0)
487 		my_netid->s_addr = ntohl(mynet.s_addr);
488 	else {
489 		ipv4_getnetmask(&my_netmask);
490 		my_netid->s_addr = my_netmask.s_addr & ntohl(myip.s_addr);
491 	}
492 }
493 
494 /*
495  * Returns our current netmask in host order.
496  * Neither OBP nor the standalone DHCP client mandate
497  * that the netmask be specified, so in the absence of
498  * a netmask, we attempt to derive it using class-based
499  * heuristics.
500  */
501 void
502 ipv4_getnetmask(struct in_addr *ip)
503 {
504 	if (netmask_set || (myip.s_addr == 0))
505 		ip->s_addr = ntohl(netmask.s_addr);
506 	else {
507 		/* base the netmask on our IP address */
508 		if (IN_CLASSA(ntohl(myip.s_addr)))
509 			ip->s_addr = ntohl(IN_CLASSA_NET);
510 		else if (IN_CLASSB(ntohl(myip.s_addr)))
511 			ip->s_addr = ntohl(IN_CLASSB_NET);
512 		else
513 			ip->s_addr = ntohl(IN_CLASSC_NET);
514 	}
515 }
516 
517 /*
518  * Set our default router.
519  * Argument is assumed to be host order, and *MUST* be on the same network
520  * as our source IP address.
521  */
522 void
523 ipv4_setdefaultrouter(struct in_addr *ip)
524 {
525 	defaultrouter.s_addr = htonl(ip->s_addr);
526 }
527 
528 /*
529  * Returns our current default router in host order.
530  */
531 void
532 ipv4_getdefaultrouter(struct in_addr *ip)
533 {
534 	ip->s_addr = ntohl(defaultrouter.s_addr);
535 }
536 
537 /*
538  * Toggle promiscuous flag. If set, client disregards destination IP
539  * address. Otherwise, only limited broadcast, network broadcast, and
540  * unicast traffic get through. Returns previous setting.
541  */
542 int
543 ipv4_setpromiscuous(int toggle)
544 {
545 	int old = promiscuous;
546 
547 	promiscuous = toggle;
548 
549 	return (old);
550 }
551 
552 /*
553  * Set IP TTL.
554  */
555 void
556 ipv4_setmaxttl(uint8_t cttl)
557 {
558 	ttl = cttl;
559 }
560 
561 /*
562  * Convert an ipv4 address to dotted notation.
563  * Returns ptr to statically allocated buffer containing dotted string.
564  */
565 char *
566 inet_ntoa(struct in_addr ip)
567 {
568 	uint8_t *p;
569 	static char ipaddr[16];
570 
571 	p = (uint8_t *)&ip.s_addr;
572 	(void) sprintf(ipaddr, "%u.%u.%u.%u", p[0], p[1], p[2], p[3]);
573 	return (ipaddr);
574 }
575 
576 /*
577  * Construct a transport datagram from a series of IP fragments (igp == NULL)
578  * or from a single IP datagram (igp != NULL). Return the address of the
579  * contructed transport datagram.
580  */
581 struct inetgram *
582 make_trans_datagram(int index, struct inetgram *igp, struct in_addr ipsrc,
583     struct in_addr ipdst, uint16_t iphlen)
584 {
585 	uint16_t	trans_len, *transp, new_len;
586 	int		first_frag, last_frag;
587 	boolean_t	fragmented;
588 	struct inetgram	*ngp;
589 	struct ip	*iph;
590 
591 	fragmented = (igp == NULL);
592 
593 	ngp = (struct inetgram *)bkmem_zalloc(sizeof (struct inetgram));
594 	if (ngp == NULL) {
595 		errno = ENOMEM;
596 		if (fragmented)
597 			frag_flush();
598 		return (NULL);
599 	}
600 
601 	if (fragmented) {
602 		last_frag = frag_last();
603 		trans_len = fragment[last_frag].offset +
604 		    fragment[last_frag].iplen - fragment[last_frag].iphlen;
605 		first_frag = frag_first();
606 		/*
607 		 * The returned buffer contains the IP header of the
608 		 * first fragment.
609 		 */
610 		trans_len += fragment[first_frag].iphlen;
611 		transp = (uint16_t *)(fragment[first_frag].mp->b_rptr +
612 		    fragment[first_frag].iphlen);
613 	} else {
614 		/*
615 		 * Note that igm_len may not be the real length of an
616 		 * IP packet because some network interface, such as
617 		 * Ethernet, as a minimum frame size.  So we should not
618 		 * use the interface frame size to determine the
619 		 * length of an IP packet.  We should use the IP
620 		 * length field in the IP header.
621 		 */
622 		iph = (struct ip *)igp->igm_mp->b_rptr;
623 		trans_len = ntohs(iph->ip_len);
624 		transp = (uint16_t *)(igp->igm_mp->b_rptr + iphlen);
625 	}
626 
627 	ngp->igm_saddr.sin_addr.s_addr = ipsrc.s_addr;
628 	ngp->igm_saddr.sin_port = sockets[index].ports(transp, SOURCE);
629 	ngp->igm_target.s_addr = ipdst.s_addr;
630 	ngp->igm_level = TRANSPORT_LVL;
631 
632 	/*
633 	 * Align to 16bit value.  Checksum code may require an extra byte
634 	 * for padding.
635 	 */
636 	new_len = ((trans_len + sizeof (int16_t) - 1) &
637 	    ~(sizeof (int16_t) - 1));
638 	if ((ngp->igm_mp = allocb(new_len, 0)) == NULL) {
639 		errno = ENOMEM;
640 		bkmem_free((caddr_t)ngp, sizeof (struct inetgram));
641 		if (fragmented)
642 			frag_flush();
643 		return (NULL);
644 	}
645 
646 	if (fragmented) {
647 		if (frag_load(ngp) != 0) {
648 			freeb(ngp->igm_mp);
649 			bkmem_free((caddr_t)ngp, sizeof (struct inetgram));
650 			frag_flush();
651 			return (NULL);
652 		}
653 		frag_flush();
654 	} else {
655 		bcopy((caddr_t)(igp->igm_mp->b_rptr),
656 		    (caddr_t)ngp->igm_mp->b_rptr, trans_len);
657 		ngp->igm_mp->b_wptr += trans_len;
658 	}
659 	return (ngp);
660 }
661 
662 /*
663  * ipv4_input: Pull in IPv4 datagrams addressed to us. Handle IP fragmentation
664  * (fragments received in any order) and ICMP at this level.
665  *
666  * Note that because our network is serviced by polling when we expect
667  * something (upon a referenced socket), we don't go through the work of
668  * locating the appropriate socket a datagram is destined for. We'll only
669  * accept data for the referenced socket. This means we don't have
670  * asynchronous networking, but since we can't service the net using an
671  * interrupt handler, it doesn't do us any good to try to service datagrams
672  * destined for sockets other than the referenced one. Data is handled in
673  * a fifo manner.
674  *
675  * The mac layer will grab all frames for us. If we find we don't have all
676  * the necessary fragments to reassemble the datagram, we'll call the mac
677  * layer again for FRAG_ATTEMPTS to see if it has any more frames.
678  *
679  * Supported protocols: IPPROTO_IP, IPPROTO_ICMP, IPPROTO_UDP.
680  *
681  * Returns: number of NETWORK_LVL datagrams placed on socket , -1 if error
682  * occurred.
683  *
684  * Note: errno is set to ETIMEDOUT if fragment reassembly fails.
685  */
686 int
687 ipv4_input(int index)
688 {
689 	int			datagrams = 0;
690 	int			frag_stat, input_attempts = 0;
691 	uint16_t		iphlen, iplen, ip_id;
692 	int16_t			curr_off;
693 	struct ip		*iphp;
694 	struct inetgram		*igp, *newgp = NULL, *ipv4_listp = NULL;
695 	struct in_addr		ipdst, ipsrc;
696 	mblk_t			*mp;
697 	enum SockType		type;
698 
699 #ifdef	DEBUG
700 	printf("ipv4_input(%d): start ######################################\n",
701 	    index);
702 #endif	/* DEBUG */
703 
704 	frag_flush();
705 
706 ipv4_try_again:
707 
708 	while ((igp = sockets[index].inq) != NULL) {
709 		if (igp->igm_level != NETWORK_LVL) {
710 #ifdef	DEBUG
711 			printf("ipv4_input(%d): unexpected frame type: %d\n",
712 			    index, igp->igm_level);
713 #endif	/* DEBUG */
714 			del_gram(&sockets[index].inq, igp, TRUE);
715 			continue;
716 		}
717 		iphp = (struct ip *)igp->igm_mp->b_rptr;
718 		if (iphp->ip_v != IPVERSION) {
719 			dprintf("ipv4_input(%d): IPv%d datagram discarded\n",
720 			index, iphp->ip_v);
721 			del_gram(&sockets[index].inq, igp, TRUE);
722 			continue;
723 		}
724 		iphlen = iphp->ip_hl << 2;
725 		if (iphlen < sizeof (struct ip)) {
726 			dprintf("ipv4_input(%d): IP msg too short (%d < %u)\n",
727 			    index, iphlen, (uint_t)sizeof (struct ip));
728 			del_gram(&sockets[index].inq, igp, TRUE);
729 			continue;
730 		}
731 		iplen = ntohs(iphp->ip_len);
732 		if (iplen > msgdsize(igp->igm_mp)) {
733 			dprintf("ipv4_input(%d): IP len/buffer mismatch "
734 			    "(%d > %lu)\n", index, iplen, igp->igm_mp->b_size);
735 			del_gram(&sockets[index].inq, igp, TRUE);
736 			continue;
737 		}
738 
739 		bcopy((caddr_t)&(iphp->ip_dst), (caddr_t)&ipdst,
740 		    sizeof (ipdst));
741 		bcopy((caddr_t)&(iphp->ip_src), (caddr_t)&ipsrc,
742 		    sizeof (ipsrc));
743 
744 		/* igp->igm_mp->b_datap is guaranteed to be 64 bit aligned] */
745 		if (ipv4cksum((uint16_t *)iphp, iphlen) != 0) {
746 			dprintf("ipv4_input(%d): Bad IP header checksum "
747 			    "(to %s)\n", index, inet_ntoa(ipdst));
748 			del_gram(&sockets[index].inq, igp, TRUE);
749 			continue;
750 		}
751 
752 		if (!promiscuous) {
753 			/* validate destination address */
754 			if (ipdst.s_addr != htonl(INADDR_BROADCAST) &&
755 			    ipdst.s_addr != (mynet.s_addr | ~netmask.s_addr) &&
756 			    ipdst.s_addr != myip.s_addr) {
757 #ifdef	DEBUG
758 				printf("ipv4_input(%d): msg to %s discarded.\n",
759 				    index, inet_ntoa(ipdst));
760 #endif	/* DEBUG */
761 				/* not ours */
762 				del_gram(&sockets[index].inq, igp, TRUE);
763 				continue;
764 			}
765 		}
766 
767 		/* Intercept ICMP first */
768 		if (!promiscuous && (iphp->ip_p == IPPROTO_ICMP)) {
769 			icmp4(igp, iphp, iphlen, ipsrc);
770 			del_gram(&sockets[index].inq, igp, TRUE);
771 			continue;
772 		}
773 
774 #ifdef	DEBUG
775 		printf("ipv4_input(%d): processing ID: 0x%x protocol %d "
776 		    "(0x%x) (0x%x,%d)\n",
777 		    index, ntohs(iphp->ip_id), iphp->ip_p, igp, igp->igm_mp,
778 		    igp->igm_mp->b_size);
779 #endif	/* DEBUG */
780 		type = sockets[index].type;
781 		if (type == INETBOOT_RAW) {
782 			/* No fragmentation - Just the raw packet. */
783 #ifdef	DEBUG
784 			printf("ipv4_input(%d): Raw packet.\n", index);
785 #endif	/* DEBUG */
786 			del_gram(&sockets[index].inq, igp, FALSE);
787 			add_grams(&ipv4_listp, igp);
788 			igp->igm_mp->b_rptr += iphlen;
789 			igp->igm_mp->b_wptr = igp->igm_mp->b_rptr + iplen;
790 			datagrams++;
791 			continue;
792 		}
793 
794 		if ((type == INETBOOT_DGRAM && iphp->ip_p != IPPROTO_UDP) ||
795 		    (type == INETBOOT_STREAM && iphp->ip_p != IPPROTO_TCP)) {
796 			/* Wrong protocol. */
797 			dprintf("ipv4_input(%d): unexpected protocol: "
798 			    "%d for socket type %d\n", index, iphp->ip_p, type);
799 			del_gram(&sockets[index].inq, igp, TRUE);
800 			continue;
801 		}
802 
803 		/*
804 		 * The following code is common to both STREAM and DATAGRAM
805 		 * sockets.
806 		 */
807 
808 		/*
809 		 * Once we process the first fragment, we won't have
810 		 * the transport header, so we'll have to  match on
811 		 * IP id.
812 		 */
813 		curr_off = ntohs(iphp->ip_off);
814 		if ((curr_off & ~(IP_DF | IP_MF)) == 0) {
815 			uint16_t	*transp;
816 
817 			/* Validate transport header. */
818 			mp = igp->igm_mp;
819 			if ((mp->b_wptr - mp->b_rptr - iphlen) <
820 			    sockets[index].headerlen[TRANSPORT_LVL](igp)) {
821 				dprintf("ipv4_input(%d): datagram 0 "
822 				    "too small to hold transport header "
823 				    "(from %s)\n", index, inet_ntoa(ipsrc));
824 				del_gram(&sockets[index].inq, igp, TRUE);
825 				continue;
826 			}
827 
828 			/*
829 			 * check alignment - transport elements are 16
830 			 * bit aligned..
831 			 */
832 			transp = (uint16_t *)(mp->b_rptr + iphlen);
833 			if ((uintptr_t)transp % sizeof (uint16_t)) {
834 				dprintf("ipv4_input(%d): Transport "
835 				    "header is not 16-bit aligned "
836 				    "(0x%lx, from %s)\n", index, (long)transp,
837 				    inet_ntoa(ipsrc));
838 				del_gram(&sockets[index].inq, igp, TRUE);
839 				continue;
840 			}
841 
842 			if (curr_off & IP_MF) {
843 				/* fragment 0 of fragmented datagram */
844 				ip_id = ntohs(iphp->ip_id);
845 				frag_stat = frag_add(curr_off, igp->igm_mp,
846 				    ip_id, iplen, iphlen, iphp->ip_p);
847 				if (frag_stat != FRAG_SUCCESS) {
848 #ifdef	FRAG_DEBUG
849 					if (frag_stat == FRAG_DUP) {
850 						printf("ipv4_input"
851 						    "(%d): Frag dup.\n", index);
852 					} else {
853 						printf("ipv4_input"
854 						    "(%d): too many "
855 						    "frags\n", index);
856 					}
857 #endif	/* FRAG_DEBUG */
858 					del_gram(&sockets[index].inq,
859 					    igp, TRUE);
860 					continue;
861 				}
862 
863 				del_gram(&sockets[index].inq, igp, FALSE);
864 				/* keep the data, lose the inetgram */
865 				bkmem_free((caddr_t)igp,
866 				    sizeof (struct inetgram));
867 #ifdef	FRAG_DEBUG
868 				printf("ipv4_input(%d): Frag/Off/Id "
869 				    "(%d/%d/%x)\n", index, fragments,
870 				    IPV4_OFFSET(curr_off), ip_id);
871 #endif	/* FRAG_DEBUG */
872 			} else {
873 				/* Single, unfragmented datagram */
874 				newgp = make_trans_datagram(index, igp,
875 				    ipsrc, ipdst, iphlen);
876 				if (newgp != NULL) {
877 					add_grams(&ipv4_listp, newgp);
878 					datagrams++;
879 				}
880 				del_gram(&sockets[index].inq, igp,
881 				    TRUE);
882 				continue;
883 			}
884 		} else {
885 			/* fragments other than 0 */
886 			frag_stat = frag_add(curr_off, igp->igm_mp,
887 			    ntohs(iphp->ip_id), iplen, iphlen, iphp->ip_p);
888 
889 			if (frag_stat == FRAG_SUCCESS) {
890 #ifdef	FRAG_DEBUG
891 				printf("ipv4_input(%d): Frag(%d) "
892 				    "off(%d) id(%x)\n", index,
893 				    fragments, IPV4_OFFSET(curr_off),
894 				    ntohs(iphp->ip_id));
895 #endif	/* FRAG_DEBUG */
896 				del_gram(&sockets[index].inq, igp, FALSE);
897 				/* keep the data, lose the inetgram */
898 				bkmem_free((caddr_t)igp,
899 				    sizeof (struct inetgram));
900 			} else {
901 #ifdef	FRAG_DEBUG
902 				if (frag_stat == FRAG_DUP)
903 					printf("ipv4_input(%d): Frag "
904 					    "dup.\n", index);
905 				else {
906 					printf("ipv4_input(%d): too "
907 					    "many frags\n", index);
908 				}
909 #endif	/* FRAG_DEBUG */
910 				del_gram(&sockets[index].inq, igp, TRUE);
911 				continue;
912 			}
913 		}
914 
915 		/*
916 		 * Determine if we have all of the fragments.
917 		 *
918 		 * NOTE: at this point, we've placed the data in the
919 		 * fragment table, and the inetgram (igp) has been
920 		 * deleted.
921 		 */
922 		if (!frag_chk())
923 			continue;
924 
925 		newgp = make_trans_datagram(index, NULL, ipsrc, ipdst, iphlen);
926 		if (newgp == NULL)
927 			continue;
928 		add_grams(&ipv4_listp, newgp);
929 		datagrams++;
930 	}
931 	if (ipv4_listp == NULL && fragments != 0) {
932 		if (++input_attempts > FRAG_ATTEMPTS) {
933 			dprintf("ipv4_input(%d): reassembly(%d) timed out in "
934 			    "%d msecs.\n", index, fragments,
935 			    sockets[index].in_timeout * input_attempts);
936 			frag_flush();
937 			errno = ETIMEDOUT;
938 			return (-1);
939 		} else {
940 			/*
941 			 * Call the media layer again... there may be more
942 			 * packets waiting.
943 			 */
944 			if (sockets[index].input[MEDIA_LVL](index) < 0) {
945 				/* errno will be set appropriately */
946 				frag_flush();
947 				return (-1);
948 			}
949 			goto ipv4_try_again;
950 		}
951 	}
952 
953 	add_grams(&sockets[index].inq, ipv4_listp);
954 
955 	return (datagrams);
956 }
957 
958 /*
959  * ipv4_output: Generate IPv4 datagram(s) for the payload and deliver them.
960  * Routing is handled here as well, by reusing the saddr field to hold the
961  * router's IP address.
962  *
963  * We don't deal with fragmentation on the outgoing side.
964  *
965  * Arguments: index to socket, inetgram to send.
966  *
967  * Returns: 0 for success, -1 if error occurred.
968  */
969 int
970 ipv4_output(int index, struct inetgram *ogp)
971 {
972 	struct ip	*iphp;
973 	uint64_t	iphbuffer[sizeof (struct ip)];
974 
975 #ifdef	DEBUG
976 	printf("ipv4_output(%d): size %d\n", index,
977 	    ogp->igm_mp->b_wptr - ogp->igm_mp->b_rptr);
978 #endif	/* DEBUG */
979 
980 	/* we don't deal (yet) with fragmentation. Maybe never will */
981 	if ((ogp->igm_mp->b_wptr - ogp->igm_mp->b_rptr) > mac_get_mtu()) {
982 		dprintf("ipv4: datagram too big for MAC layer.\n");
983 		errno = E2BIG;
984 		return (-1);
985 	}
986 
987 	if (ogp->igm_level != NETWORK_LVL) {
988 #ifdef	DEBUG
989 		printf("ipv4_output(%d): unexpected frame type: %d\n", index,
990 		    ogp->igm_level);
991 #endif	/* DEBUG */
992 		errno = EINVAL;
993 		return (-1);
994 	}
995 
996 	if (sockets[index].out_flags & SO_DONTROUTE)
997 		ogp->igm_oflags |= MSG_DONTROUTE;
998 
999 	iphp = (struct ip *)&iphbuffer;
1000 	iphp->ip_v = IPVERSION;
1001 	iphp->ip_hl = sizeof (struct ip) / 4;
1002 	iphp->ip_tos = 0;
1003 	iphp->ip_len = htons(ogp->igm_mp->b_wptr - ogp->igm_mp->b_rptr +
1004 	    sizeof (struct ip));
1005 	iphp->ip_id = htons(++g_ip_id);
1006 	iphp->ip_off = htons(IP_DF);
1007 	iphp->ip_p = sockets[index].proto;
1008 	iphp->ip_sum = htons(0);
1009 	iphp->ip_ttl = ttl;
1010 
1011 	/* struct copies */
1012 	iphp->ip_src = myip;
1013 	iphp->ip_dst = ogp->igm_saddr.sin_addr;
1014 
1015 	/*
1016 	 * On local / limited broadcasts, don't route. From a purist's
1017 	 * perspective, we should be setting the TTL to 1. But
1018 	 * operational experience has shown that some BOOTP relay agents
1019 	 * (ciscos) discard our packets. Furthermore, these devices also
1020 	 * *don't* reset the TTL to MAXTTL on the unicast side of the
1021 	 * BOOTP relay agent! Sigh. Thus to work correctly in these
1022 	 * environments, we leave the TTL as it has been been set by
1023 	 * the application layer, and simply don't check for a route.
1024 	 */
1025 	if (iphp->ip_dst.s_addr == htonl(INADDR_BROADCAST) ||
1026 	    (netmask.s_addr != htonl(INADDR_BROADCAST) &&
1027 	    iphp->ip_dst.s_addr == (mynet.s_addr | ~netmask.s_addr))) {
1028 		ogp->igm_oflags |= MSG_DONTROUTE;
1029 	}
1030 
1031 	/* Routing necessary? */
1032 	if ((ogp->igm_oflags & MSG_DONTROUTE) == 0 &&
1033 	    ((iphp->ip_dst.s_addr & netmask.s_addr) != mynet.s_addr)) {
1034 		struct in_addr *rip;
1035 		if ((rip = ipv4_get_route(RT_HOST, &iphp->ip_dst,
1036 		    NULL)) == NULL) {
1037 			rip = ipv4_get_route(RT_DEFAULT, NULL, NULL);
1038 		}
1039 		if (rip == NULL) {
1040 			dprintf("ipv4(%d): No route to %s.\n",
1041 			    index, inet_ntoa(iphp->ip_dst));
1042 			errno = EHOSTUNREACH;
1043 			return (-1);
1044 		}
1045 		ogp->igm_router.s_addr = rip->s_addr;
1046 	} else
1047 		ogp->igm_router.s_addr = htonl(INADDR_ANY);
1048 
1049 	iphp->ip_sum = ipv4cksum((uint16_t *)iphp, sizeof (struct ip));
1050 	ogp->igm_mp->b_rptr -= sizeof (struct ip);
1051 	bcopy((caddr_t)iphp, (caddr_t)(ogp->igm_mp->b_rptr),
1052 	    sizeof (struct ip));
1053 
1054 	ogp->igm_level = MEDIA_LVL;
1055 
1056 	return (0);
1057 }
1058 
1059 /*
1060  * Function to be called by TCP to send out a packet.  This is used
1061  * when TCP wants to send out packets which it has already filled in
1062  * most of the header fields.
1063  */
1064 int
1065 ipv4_tcp_output(int sock_id, mblk_t *pkt)
1066 {
1067 	struct ip *iph;
1068 	struct in_addr *rip = NULL;
1069 	struct inetgram datagram;
1070 
1071 	iph = (struct ip *)pkt->b_rptr;
1072 
1073 	bzero(&datagram, sizeof (struct inetgram));
1074 
1075 	/*
1076 	 * Bootparams doesn't know about subnet masks, so we need to
1077 	 * explicitly check for this flag.
1078 	 */
1079 	if (sockets[sock_id].out_flags & SO_DONTROUTE)
1080 		datagram.igm_oflags |= MSG_DONTROUTE;
1081 
1082 	/* Routing necessary? */
1083 	if (((datagram.igm_oflags & MSG_DONTROUTE) == 0) &&
1084 		((iph->ip_dst.s_addr & netmask.s_addr) != mynet.s_addr)) {
1085 		if ((rip = ipv4_get_route(RT_HOST, &iph->ip_dst,
1086 		    NULL)) == NULL) {
1087 			rip = ipv4_get_route(RT_DEFAULT, NULL, NULL);
1088 		}
1089 		if (rip == NULL) {
1090 			dprintf("ipv4(%d): No route to %s.\n",
1091 			    sock_id, inet_ntoa(iph->ip_dst));
1092 			errno = EHOSTUNREACH;
1093 			return (-1);
1094 		}
1095 	}
1096 
1097 	iph->ip_id = htons(++g_ip_id);
1098 	iph->ip_sum = ipv4cksum((uint16_t *)iph, sizeof (struct ip));
1099 #if DEBUG > 1
1100 	printf("ipv4_tcp_output: dump IP packet(%d)\n", iph->ip_len);
1101 	hexdump((char *)pkt->b_rptr, iph->ip_len);
1102 #endif
1103 	/* Call the MAC layer output routine to send it out. */
1104 	datagram.igm_mp = pkt;
1105 	datagram.igm_level = MEDIA_LVL;
1106 	if (rip != NULL)
1107 		datagram.igm_router.s_addr = rip->s_addr;
1108 	else
1109 		datagram.igm_router.s_addr = 0;
1110 	return (mac_state.mac_output(sock_id, &datagram));
1111 }
1112 
1113 /*
1114  * Internet address interpretation routine.
1115  * All the network library routines call this
1116  * routine to interpret entries in the data bases
1117  * which are expected to be an address.
1118  * The value returned is in network order.
1119  */
1120 in_addr_t
1121 inet_addr(const char *cp)
1122 {
1123 	uint32_t val, base, n;
1124 	char c;
1125 	uint32_t parts[4], *pp = parts;
1126 
1127 	if (*cp == '\0')
1128 		return ((uint32_t)-1); /* disallow null string in cp */
1129 again:
1130 	/*
1131 	 * Collect number up to ``.''.
1132 	 * Values are specified as for C:
1133 	 * 0x=hex, 0=octal, other=decimal.
1134 	 */
1135 	val = 0; base = 10;
1136 	if (*cp == '0') {
1137 		if (*++cp == 'x' || *cp == 'X')
1138 			base = 16, cp++;
1139 		else
1140 			base = 8;
1141 	}
1142 	while ((c = *cp) != NULL) {
1143 		if (isdigit(c)) {
1144 			if ((c - '0') >= base)
1145 			    break;
1146 			val = (val * base) + (c - '0');
1147 			cp++;
1148 			continue;
1149 		}
1150 		if (base == 16 && isxdigit(c)) {
1151 			val = (val << 4) + (c + 10 - (islower(c) ? 'a' : 'A'));
1152 			cp++;
1153 			continue;
1154 		}
1155 		break;
1156 	}
1157 	if (*cp == '.') {
1158 		/*
1159 		 * Internet format:
1160 		 *	a.b.c.d
1161 		 *	a.b.c	(with c treated as 16-bits)
1162 		 *	a.b	(with b treated as 24 bits)
1163 		 */
1164 		if ((pp >= parts + 3) || (val > 0xff)) {
1165 			return ((uint32_t)-1);
1166 		}
1167 		*pp++ = val, cp++;
1168 		goto again;
1169 	}
1170 	/*
1171 	 * Check for trailing characters.
1172 	 */
1173 	if (*cp && !isspace(*cp)) {
1174 		return ((uint32_t)-1);
1175 	}
1176 	*pp++ = val;
1177 	/*
1178 	 * Concoct the address according to
1179 	 * the number of parts specified.
1180 	 */
1181 	n = pp - parts;
1182 	switch (n) {
1183 
1184 	case 1:				/* a -- 32 bits */
1185 		val = parts[0];
1186 		break;
1187 
1188 	case 2:				/* a.b -- 8.24 bits */
1189 		if (parts[1] > 0xffffff)
1190 		    return ((uint32_t)-1);
1191 		val = (parts[0] << 24) | (parts[1] & 0xffffff);
1192 		break;
1193 
1194 	case 3:				/* a.b.c -- 8.8.16 bits */
1195 		if (parts[2] > 0xffff)
1196 		    return ((uint32_t)-1);
1197 		val = (parts[0] << 24) | ((parts[1] & 0xff) << 16) |
1198 			(parts[2] & 0xffff);
1199 		break;
1200 
1201 	case 4:				/* a.b.c.d -- 8.8.8.8 bits */
1202 		if (parts[3] > 0xff)
1203 		    return ((uint32_t)-1);
1204 		val = (parts[0] << 24) | ((parts[1] & 0xff) << 16) |
1205 		    ((parts[2] & 0xff) << 8) | (parts[3] & 0xff);
1206 		break;
1207 
1208 	default:
1209 		return ((uint32_t)-1);
1210 	}
1211 	val = htonl(val);
1212 	return (val);
1213 }
1214 
1215 void
1216 hexdump(char *data, int datalen)
1217 {
1218 	char *p;
1219 	ushort_t *p16 = (ushort_t *)data;
1220 	char *p8 = data;
1221 	int i, left, len;
1222 	int chunk = 16;  /* 16 bytes per line */
1223 
1224 	printf("\n");
1225 
1226 	for (p = data; p < data + datalen; p += chunk) {
1227 		printf("\t%4d: ", (int)(p - data));
1228 		left = (data + datalen) - p;
1229 		len = MIN(chunk, left);
1230 		for (i = 0; i < (len / 2); i++)
1231 			printf("%04x ", ntohs(*p16++) & 0xffff);
1232 		if (len % 2) {
1233 			printf("%02x   ", *((unsigned char *)p16));
1234 		}
1235 		for (i = 0; i < (chunk - left) / 2; i++)
1236 			printf("     ");
1237 
1238 		printf("   ");
1239 		for (i = 0; i < len; i++, p8++)
1240 			printf("%c", isprint(*p8) ? *p8 : '.');
1241 		printf("\n");
1242 	}
1243 
1244 	printf("\n");
1245 }
1246