1 /*
2  * Copyright (C) 2006 Michael Brown <mbrown@fensystems.co.uk>.
3  * Copyright (C) 2006 Nikhil Chandru Rao
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation; either version 2 of the
8  * License, or (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18  * 02110-1301, USA.
19  *
20  * You can also choose to distribute this program under the terms of
21  * the Unmodified Binary Distribution Licence (as given in the file
22  * COPYING.UBDL), provided that you have satisfied its requirements.
23  */
24 
25 #include <string.h>
26 #include <stdint.h>
27 #include <stdlib.h>
28 #include <stdio.h>
29 #include <errno.h>
30 #include <byteswap.h>
31 #include <ipxe/list.h>
32 #include <ipxe/in.h>
33 #include <ipxe/arp.h>
34 #include <ipxe/if_ether.h>
35 #include <ipxe/iobuf.h>
36 #include <ipxe/netdevice.h>
37 #include <ipxe/ip.h>
38 #include <ipxe/tcpip.h>
39 #include <ipxe/dhcp.h>
40 #include <ipxe/settings.h>
41 #include <ipxe/fragment.h>
42 #include <ipxe/ipstat.h>
43 #include <ipxe/profile.h>
44 
45 /** @file
46  *
47  * IPv4 protocol
48  *
49  */
50 
51 FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
52 
53 /* Unique IP datagram identification number (high byte) */
54 static uint8_t next_ident_high = 0;
55 
56 /** List of IPv4 miniroutes */
57 struct list_head ipv4_miniroutes = LIST_HEAD_INIT ( ipv4_miniroutes );
58 
59 /** IPv4 statistics */
60 static struct ip_statistics ipv4_stats;
61 
62 /** IPv4 statistics family */
63 struct ip_statistics_family
64 ipv4_stats_family __ip_statistics_family ( IP_STATISTICS_IPV4 ) = {
65 	.version = 4,
66 	.stats = &ipv4_stats,
67 };
68 
69 /** Transmit profiler */
70 static struct profiler ipv4_tx_profiler __profiler = { .name = "ipv4.tx" };
71 
72 /** Receive profiler */
73 static struct profiler ipv4_rx_profiler __profiler = { .name = "ipv4.rx" };
74 
75 /**
76  * Add IPv4 minirouting table entry
77  *
78  * @v netdev		Network device
79  * @v address		IPv4 address
80  * @v netmask		Subnet mask
81  * @v gateway		Gateway address (if any)
82  * @ret rc		Return status code
83  */
add_ipv4_miniroute(struct net_device * netdev,struct in_addr address,struct in_addr netmask,struct in_addr gateway)84 static int add_ipv4_miniroute ( struct net_device *netdev,
85 				struct in_addr address, struct in_addr netmask,
86 				struct in_addr gateway ) {
87 	struct ipv4_miniroute *miniroute;
88 
89 	DBGC ( netdev, "IPv4 add %s", inet_ntoa ( address ) );
90 	DBGC ( netdev, "/%s ", inet_ntoa ( netmask ) );
91 	if ( gateway.s_addr )
92 		DBGC ( netdev, "gw %s ", inet_ntoa ( gateway ) );
93 	DBGC ( netdev, "via %s\n", netdev->name );
94 
95 	/* Allocate and populate miniroute structure */
96 	miniroute = malloc ( sizeof ( *miniroute ) );
97 	if ( ! miniroute ) {
98 		DBGC ( netdev, "IPv4 could not add miniroute\n" );
99 		return -ENOMEM;
100 	}
101 
102 	/* Record routing information */
103 	miniroute->netdev = netdev_get ( netdev );
104 	miniroute->address = address;
105 	miniroute->netmask = netmask;
106 	miniroute->gateway = gateway;
107 
108 	/* Add to end of list if we have a gateway, otherwise
109 	 * to start of list.
110 	 */
111 	if ( gateway.s_addr ) {
112 		list_add_tail ( &miniroute->list, &ipv4_miniroutes );
113 	} else {
114 		list_add ( &miniroute->list, &ipv4_miniroutes );
115 	}
116 
117 	return 0;
118 }
119 
120 /**
121  * Delete IPv4 minirouting table entry
122  *
123  * @v miniroute		Routing table entry
124  */
del_ipv4_miniroute(struct ipv4_miniroute * miniroute)125 static void del_ipv4_miniroute ( struct ipv4_miniroute *miniroute ) {
126 	struct net_device *netdev = miniroute->netdev;
127 
128 	DBGC ( netdev, "IPv4 del %s", inet_ntoa ( miniroute->address ) );
129 	DBGC ( netdev, "/%s ", inet_ntoa ( miniroute->netmask ) );
130 	if ( miniroute->gateway.s_addr )
131 		DBGC ( netdev, "gw %s ", inet_ntoa ( miniroute->gateway ) );
132 	DBGC ( netdev, "via %s\n", miniroute->netdev->name );
133 
134 	netdev_put ( miniroute->netdev );
135 	list_del ( &miniroute->list );
136 	free ( miniroute );
137 }
138 
139 /**
140  * Perform IPv4 routing
141  *
142  * @v scope_id		Destination address scope ID
143  * @v dest		Final destination address
144  * @ret dest		Next hop destination address
145  * @ret miniroute	Routing table entry to use, or NULL if no route
146  *
147  * If the route requires use of a gateway, the next hop destination
148  * address will be overwritten with the gateway address.
149  */
ipv4_route(unsigned int scope_id,struct in_addr * dest)150 static struct ipv4_miniroute * ipv4_route ( unsigned int scope_id,
151 					    struct in_addr *dest ) {
152 	struct ipv4_miniroute *miniroute;
153 
154 	/* Find first usable route in routing table */
155 	list_for_each_entry ( miniroute, &ipv4_miniroutes, list ) {
156 
157 		/* Skip closed network devices */
158 		if ( ! netdev_is_open ( miniroute->netdev ) )
159 			continue;
160 
161 		if ( IN_IS_MULTICAST ( dest->s_addr ) ) {
162 
163 			/* If destination is non-global, and the scope ID
164 			 * matches this network device, then use this route.
165 			 */
166 			if ( miniroute->netdev->index == scope_id )
167 				return miniroute;
168 
169 		} else {
170 
171 			/* If destination is an on-link global
172 			 * address, then use this route.
173 			 */
174 			if ( ( ( dest->s_addr ^ miniroute->address.s_addr )
175 			       & miniroute->netmask.s_addr ) == 0 )
176 				return miniroute;
177 
178 			/* If destination is an off-link global
179 			 * address, and we have a default gateway,
180 			 * then use this route.
181 			 */
182 			if ( miniroute->gateway.s_addr ) {
183 				*dest = miniroute->gateway;
184 				return miniroute;
185 			}
186 		}
187 	}
188 
189 	return NULL;
190 }
191 
192 /**
193  * Determine transmitting network device
194  *
195  * @v st_dest		Destination network-layer address
196  * @ret netdev		Transmitting network device, or NULL
197  */
ipv4_netdev(struct sockaddr_tcpip * st_dest)198 static struct net_device * ipv4_netdev ( struct sockaddr_tcpip *st_dest ) {
199 	struct sockaddr_in *sin_dest = ( ( struct sockaddr_in * ) st_dest );
200 	struct in_addr dest = sin_dest->sin_addr;
201 	struct ipv4_miniroute *miniroute;
202 
203 	/* Find routing table entry */
204 	miniroute = ipv4_route ( sin_dest->sin_scope_id, &dest );
205 	if ( ! miniroute )
206 		return NULL;
207 
208 	return miniroute->netdev;
209 }
210 
211 /**
212  * Check if IPv4 fragment matches fragment reassembly buffer
213  *
214  * @v fragment		Fragment reassembly buffer
215  * @v iobuf		I/O buffer
216  * @v hdrlen		Length of non-fragmentable potion of I/O buffer
217  * @ret is_fragment	Fragment matches this reassembly buffer
218  */
ipv4_is_fragment(struct fragment * fragment,struct io_buffer * iobuf,size_t hdrlen __unused)219 static int ipv4_is_fragment ( struct fragment *fragment,
220 			      struct io_buffer *iobuf,
221 			      size_t hdrlen __unused ) {
222 	struct iphdr *frag_iphdr = fragment->iobuf->data;
223 	struct iphdr *iphdr = iobuf->data;
224 
225 	return ( ( iphdr->src.s_addr == frag_iphdr->src.s_addr ) &&
226 		 ( iphdr->ident == frag_iphdr->ident ) );
227 }
228 
229 /**
230  * Get IPv4 fragment offset
231  *
232  * @v iobuf		I/O buffer
233  * @v hdrlen		Length of non-fragmentable potion of I/O buffer
234  * @ret offset		Offset
235  */
ipv4_fragment_offset(struct io_buffer * iobuf,size_t hdrlen __unused)236 static size_t ipv4_fragment_offset ( struct io_buffer *iobuf,
237 				     size_t hdrlen __unused ) {
238 	struct iphdr *iphdr = iobuf->data;
239 
240 	return ( ( ntohs ( iphdr->frags ) & IP_MASK_OFFSET ) << 3 );
241 }
242 
243 /**
244  * Check if more fragments exist
245  *
246  * @v iobuf		I/O buffer
247  * @v hdrlen		Length of non-fragmentable potion of I/O buffer
248  * @ret more_frags	More fragments exist
249  */
ipv4_more_fragments(struct io_buffer * iobuf,size_t hdrlen __unused)250 static int ipv4_more_fragments ( struct io_buffer *iobuf,
251 				 size_t hdrlen __unused ) {
252 	struct iphdr *iphdr = iobuf->data;
253 
254 	return ( iphdr->frags & htons ( IP_MASK_MOREFRAGS ) );
255 }
256 
257 /** IPv4 fragment reassembler */
258 static struct fragment_reassembler ipv4_reassembler = {
259 	.list = LIST_HEAD_INIT ( ipv4_reassembler.list ),
260 	.is_fragment = ipv4_is_fragment,
261 	.fragment_offset = ipv4_fragment_offset,
262 	.more_fragments = ipv4_more_fragments,
263 	.stats = &ipv4_stats,
264 };
265 
266 /**
267  * Add IPv4 pseudo-header checksum to existing checksum
268  *
269  * @v iobuf		I/O buffer
270  * @v csum		Existing checksum
271  * @ret csum		Updated checksum
272  */
ipv4_pshdr_chksum(struct io_buffer * iobuf,uint16_t csum)273 static uint16_t ipv4_pshdr_chksum ( struct io_buffer *iobuf, uint16_t csum ) {
274 	struct ipv4_pseudo_header pshdr;
275 	struct iphdr *iphdr = iobuf->data;
276 	size_t hdrlen = ( ( iphdr->verhdrlen & IP_MASK_HLEN ) * 4 );
277 
278 	/* Build pseudo-header */
279 	pshdr.src = iphdr->src;
280 	pshdr.dest = iphdr->dest;
281 	pshdr.zero_padding = 0x00;
282 	pshdr.protocol = iphdr->protocol;
283 	pshdr.len = htons ( iob_len ( iobuf ) - hdrlen );
284 
285 	/* Update the checksum value */
286 	return tcpip_continue_chksum ( csum, &pshdr, sizeof ( pshdr ) );
287 }
288 
289 /**
290  * Transmit IP packet
291  *
292  * @v iobuf		I/O buffer
293  * @v tcpip		Transport-layer protocol
294  * @v st_src		Source network-layer address
295  * @v st_dest		Destination network-layer address
296  * @v netdev		Network device to use if no route found, or NULL
297  * @v trans_csum	Transport-layer checksum to complete, or NULL
298  * @ret rc		Status
299  *
300  * This function expects a transport-layer segment and prepends the IP header
301  */
ipv4_tx(struct io_buffer * iobuf,struct tcpip_protocol * tcpip_protocol,struct sockaddr_tcpip * st_src,struct sockaddr_tcpip * st_dest,struct net_device * netdev,uint16_t * trans_csum)302 static int ipv4_tx ( struct io_buffer *iobuf,
303 		     struct tcpip_protocol *tcpip_protocol,
304 		     struct sockaddr_tcpip *st_src,
305 		     struct sockaddr_tcpip *st_dest,
306 		     struct net_device *netdev,
307 		     uint16_t *trans_csum ) {
308 	struct iphdr *iphdr = iob_push ( iobuf, sizeof ( *iphdr ) );
309 	struct sockaddr_in *sin_src = ( ( struct sockaddr_in * ) st_src );
310 	struct sockaddr_in *sin_dest = ( ( struct sockaddr_in * ) st_dest );
311 	struct ipv4_miniroute *miniroute;
312 	struct in_addr next_hop;
313 	struct in_addr netmask = { .s_addr = 0 };
314 	uint8_t ll_dest_buf[MAX_LL_ADDR_LEN];
315 	const void *ll_dest;
316 	int rc;
317 
318 	/* Start profiling */
319 	profile_start ( &ipv4_tx_profiler );
320 
321 	/* Update statistics */
322 	ipv4_stats.out_requests++;
323 
324 	/* Fill up the IP header, except source address */
325 	memset ( iphdr, 0, sizeof ( *iphdr ) );
326 	iphdr->verhdrlen = ( IP_VER | ( sizeof ( *iphdr ) / 4 ) );
327 	iphdr->service = IP_TOS;
328 	iphdr->len = htons ( iob_len ( iobuf ) );
329 	iphdr->ttl = IP_TTL;
330 	iphdr->protocol = tcpip_protocol->tcpip_proto;
331 	iphdr->dest = sin_dest->sin_addr;
332 
333 	/* Use routing table to identify next hop and transmitting netdev */
334 	next_hop = iphdr->dest;
335 	if ( sin_src )
336 		iphdr->src = sin_src->sin_addr;
337 	if ( ( next_hop.s_addr != INADDR_BROADCAST ) &&
338 	     ( ( miniroute = ipv4_route ( sin_dest->sin_scope_id,
339 					  &next_hop ) ) != NULL ) ) {
340 		iphdr->src = miniroute->address;
341 		netmask = miniroute->netmask;
342 		netdev = miniroute->netdev;
343 	}
344 	if ( ! netdev ) {
345 		DBGC ( sin_dest->sin_addr, "IPv4 has no route to %s\n",
346 		       inet_ntoa ( iphdr->dest ) );
347 		ipv4_stats.out_no_routes++;
348 		rc = -ENETUNREACH;
349 		goto err;
350 	}
351 
352 	/* (Ab)use the "ident" field to convey metadata about the
353 	 * network device statistics into packet traces.  Useful for
354 	 * extracting debug information from non-debug builds.
355 	 */
356 	iphdr->ident = htons ( ( (++next_ident_high) << 8 ) |
357 			       ( ( netdev->rx_stats.bad & 0xf ) << 4 ) |
358 			       ( ( netdev->rx_stats.good & 0xf ) << 0 ) );
359 
360 	/* Fix up checksums */
361 	if ( trans_csum ) {
362 		*trans_csum = ipv4_pshdr_chksum ( iobuf, *trans_csum );
363 		if ( ! *trans_csum )
364 			*trans_csum = tcpip_protocol->zero_csum;
365 	}
366 	iphdr->chksum = tcpip_chksum ( iphdr, sizeof ( *iphdr ) );
367 
368 	/* Print IP4 header for debugging */
369 	DBGC2 ( sin_dest->sin_addr, "IPv4 TX %s->", inet_ntoa ( iphdr->src ) );
370 	DBGC2 ( sin_dest->sin_addr, "%s len %d proto %d id %04x csum %04x\n",
371 		inet_ntoa ( iphdr->dest ), ntohs ( iphdr->len ),
372 		iphdr->protocol, ntohs ( iphdr->ident ),
373 		ntohs ( iphdr->chksum ) );
374 
375 	/* Calculate link-layer destination address, if possible */
376 	if ( ( ( next_hop.s_addr ^ INADDR_BROADCAST ) & ~netmask.s_addr ) == 0){
377 		/* Broadcast address */
378 		ipv4_stats.out_bcast_pkts++;
379 		ll_dest = netdev->ll_broadcast;
380 	} else if ( IN_IS_MULTICAST ( next_hop.s_addr ) ) {
381 		/* Multicast address */
382 		ipv4_stats.out_mcast_pkts++;
383 		if ( ( rc = netdev->ll_protocol->mc_hash ( AF_INET, &next_hop,
384 							   ll_dest_buf ) ) !=0){
385 			DBGC ( sin_dest->sin_addr, "IPv4 could not hash "
386 			       "multicast %s: %s\n",
387 			       inet_ntoa ( next_hop ), strerror ( rc ) );
388 			goto err;
389 		}
390 		ll_dest = ll_dest_buf;
391 	} else {
392 		/* Unicast address */
393 		ll_dest = NULL;
394 	}
395 
396 	/* Update statistics */
397 	ipv4_stats.out_transmits++;
398 	ipv4_stats.out_octets += iob_len ( iobuf );
399 
400 	/* Hand off to link layer (via ARP if applicable) */
401 	if ( ll_dest ) {
402 		if ( ( rc = net_tx ( iobuf, netdev, &ipv4_protocol, ll_dest,
403 				     netdev->ll_addr ) ) != 0 ) {
404 			DBGC ( sin_dest->sin_addr, "IPv4 could not transmit "
405 			       "packet via %s: %s\n",
406 			       netdev->name, strerror ( rc ) );
407 			return rc;
408 		}
409 	} else {
410 		if ( ( rc = arp_tx ( iobuf, netdev, &ipv4_protocol, &next_hop,
411 				     &iphdr->src, netdev->ll_addr ) ) != 0 ) {
412 			DBGC ( sin_dest->sin_addr, "IPv4 could not transmit "
413 			       "packet via %s: %s\n",
414 			       netdev->name, strerror ( rc ) );
415 			return rc;
416 		}
417 	}
418 
419 	profile_stop ( &ipv4_tx_profiler );
420 	return 0;
421 
422  err:
423 	free_iob ( iobuf );
424 	return rc;
425 }
426 
427 /**
428  * Check if network device has any IPv4 address
429  *
430  * @v netdev		Network device
431  * @ret has_any_addr	Network device has any IPv4 address
432  */
ipv4_has_any_addr(struct net_device * netdev)433 int ipv4_has_any_addr ( struct net_device *netdev ) {
434 	struct ipv4_miniroute *miniroute;
435 
436 	list_for_each_entry ( miniroute, &ipv4_miniroutes, list ) {
437 		if ( miniroute->netdev == netdev )
438 			return 1;
439 	}
440 	return 0;
441 }
442 
443 /**
444  * Check if network device has a specific IPv4 address
445  *
446  * @v netdev		Network device
447  * @v addr		IPv4 address
448  * @ret has_addr	Network device has this IPv4 address
449  */
ipv4_has_addr(struct net_device * netdev,struct in_addr addr)450 static int ipv4_has_addr ( struct net_device *netdev, struct in_addr addr ) {
451 	struct ipv4_miniroute *miniroute;
452 
453 	list_for_each_entry ( miniroute, &ipv4_miniroutes, list ) {
454 		if ( ( miniroute->netdev == netdev ) &&
455 		     ( miniroute->address.s_addr == addr.s_addr ) ) {
456 			/* Found matching address */
457 			return 1;
458 		}
459 	}
460 	return 0;
461 }
462 
463 /**
464  * Process incoming packets
465  *
466  * @v iobuf		I/O buffer
467  * @v netdev		Network device
468  * @v ll_dest		Link-layer destination address
469  * @v ll_source		Link-layer destination source
470  * @v flags		Packet flags
471  * @ret rc		Return status code
472  *
473  * This function expects an IP4 network datagram. It processes the headers
474  * and sends it to the transport layer.
475  */
ipv4_rx(struct io_buffer * iobuf,struct net_device * netdev,const void * ll_dest __unused,const void * ll_source __unused,unsigned int flags)476 static int ipv4_rx ( struct io_buffer *iobuf,
477 		     struct net_device *netdev,
478 		     const void *ll_dest __unused,
479 		     const void *ll_source __unused,
480 		     unsigned int flags ) {
481 	struct iphdr *iphdr = iobuf->data;
482 	size_t hdrlen;
483 	size_t len;
484 	union {
485 		struct sockaddr_in sin;
486 		struct sockaddr_tcpip st;
487 	} src, dest;
488 	uint16_t csum;
489 	uint16_t pshdr_csum;
490 	int rc;
491 
492 	/* Start profiling */
493 	profile_start ( &ipv4_rx_profiler );
494 
495 	/* Update statistics */
496 	ipv4_stats.in_receives++;
497 	ipv4_stats.in_octets += iob_len ( iobuf );
498 	if ( flags & LL_BROADCAST ) {
499 		ipv4_stats.in_bcast_pkts++;
500 	} else if ( flags & LL_MULTICAST ) {
501 		ipv4_stats.in_mcast_pkts++;
502 	}
503 
504 	/* Sanity check the IPv4 header */
505 	if ( iob_len ( iobuf ) < sizeof ( *iphdr ) ) {
506 		DBGC ( iphdr->src, "IPv4 packet too short at %zd bytes (min "
507 		       "%zd bytes)\n", iob_len ( iobuf ), sizeof ( *iphdr ) );
508 		goto err_header;
509 	}
510 	if ( ( iphdr->verhdrlen & IP_MASK_VER ) != IP_VER ) {
511 		DBGC ( iphdr->src, "IPv4 version %#02x not supported\n",
512 		       iphdr->verhdrlen );
513 		goto err_header;
514 	}
515 	hdrlen = ( ( iphdr->verhdrlen & IP_MASK_HLEN ) * 4 );
516 	if ( hdrlen < sizeof ( *iphdr ) ) {
517 		DBGC ( iphdr->src, "IPv4 header too short at %zd bytes (min "
518 		       "%zd bytes)\n", hdrlen, sizeof ( *iphdr ) );
519 		goto err_header;
520 	}
521 	if ( hdrlen > iob_len ( iobuf ) ) {
522 		DBGC ( iphdr->src, "IPv4 header too long at %zd bytes "
523 		       "(packet is %zd bytes)\n", hdrlen, iob_len ( iobuf ) );
524 		goto err_header;
525 	}
526 	if ( ( csum = tcpip_chksum ( iphdr, hdrlen ) ) != 0 ) {
527 		DBGC ( iphdr->src, "IPv4 checksum incorrect (is %04x "
528 		       "including checksum field, should be 0000)\n", csum );
529 		goto err_header;
530 	}
531 	len = ntohs ( iphdr->len );
532 	if ( len < hdrlen ) {
533 		DBGC ( iphdr->src, "IPv4 length too short at %zd bytes "
534 		       "(header is %zd bytes)\n", len, hdrlen );
535 		goto err_header;
536 	}
537 	if ( len > iob_len ( iobuf ) ) {
538 		DBGC ( iphdr->src, "IPv4 length too long at %zd bytes "
539 		       "(packet is %zd bytes)\n", len, iob_len ( iobuf ) );
540 		ipv4_stats.in_truncated_pkts++;
541 		goto err_other;
542 	}
543 
544 	/* Truncate packet to correct length */
545 	iob_unput ( iobuf, ( iob_len ( iobuf ) - len ) );
546 
547 	/* Print IPv4 header for debugging */
548 	DBGC2 ( iphdr->src, "IPv4 RX %s<-", inet_ntoa ( iphdr->dest ) );
549 	DBGC2 ( iphdr->src, "%s len %d proto %d id %04x csum %04x\n",
550 		inet_ntoa ( iphdr->src ), ntohs ( iphdr->len ), iphdr->protocol,
551 		ntohs ( iphdr->ident ), ntohs ( iphdr->chksum ) );
552 
553 	/* Discard unicast packets not destined for us */
554 	if ( ( ! ( flags & LL_MULTICAST ) ) &&
555 	     ( iphdr->dest.s_addr != INADDR_BROADCAST ) &&
556 	     ipv4_has_any_addr ( netdev ) &&
557 	     ( ! ipv4_has_addr ( netdev, iphdr->dest ) ) ) {
558 		DBGC ( iphdr->src, "IPv4 discarding non-local unicast packet "
559 		       "for %s\n", inet_ntoa ( iphdr->dest ) );
560 		ipv4_stats.in_addr_errors++;
561 		goto err_other;
562 	}
563 
564 	/* Perform fragment reassembly if applicable */
565 	if ( iphdr->frags & htons ( IP_MASK_OFFSET | IP_MASK_MOREFRAGS ) ) {
566 		/* Pass the fragment to fragment_reassemble() which returns
567 		 * either a fully reassembled I/O buffer or NULL.
568 		 */
569 		iobuf = fragment_reassemble ( &ipv4_reassembler, iobuf,
570 					      &hdrlen );
571 		if ( ! iobuf )
572 			return 0;
573 		iphdr = iobuf->data;
574 	}
575 
576 	/* Construct socket addresses, calculate pseudo-header
577 	 * checksum, and hand off to transport layer
578 	 */
579 	memset ( &src, 0, sizeof ( src ) );
580 	src.sin.sin_family = AF_INET;
581 	src.sin.sin_addr = iphdr->src;
582 	memset ( &dest, 0, sizeof ( dest ) );
583 	dest.sin.sin_family = AF_INET;
584 	dest.sin.sin_addr = iphdr->dest;
585 	pshdr_csum = ipv4_pshdr_chksum ( iobuf, TCPIP_EMPTY_CSUM );
586 	iob_pull ( iobuf, hdrlen );
587 	if ( ( rc = tcpip_rx ( iobuf, netdev, iphdr->protocol, &src.st,
588 			       &dest.st, pshdr_csum, &ipv4_stats ) ) != 0 ) {
589 		DBGC ( src.sin.sin_addr, "IPv4 received packet rejected by "
590 		       "stack: %s\n", strerror ( rc ) );
591 		return rc;
592 	}
593 
594 	profile_stop ( &ipv4_rx_profiler );
595 	return 0;
596 
597  err_header:
598 	ipv4_stats.in_hdr_errors++;
599  err_other:
600 	free_iob ( iobuf );
601 	return -EINVAL;
602 }
603 
604 /**
605  * Check existence of IPv4 address for ARP
606  *
607  * @v netdev		Network device
608  * @v net_addr		Network-layer address
609  * @ret rc		Return status code
610  */
ipv4_arp_check(struct net_device * netdev,const void * net_addr)611 static int ipv4_arp_check ( struct net_device *netdev, const void *net_addr ) {
612 	const struct in_addr *address = net_addr;
613 
614 	if ( ipv4_has_addr ( netdev, *address ) )
615 		return 0;
616 
617 	return -ENOENT;
618 }
619 
620 /**
621  * Parse IPv4 address
622  *
623  * @v string		IPv4 address string
624  * @ret in		IPv4 address to fill in
625  * @ret ok		IPv4 address is valid
626  *
627  * Note that this function returns nonzero iff the address is valid,
628  * to match the standard BSD API function of the same name.  Unlike
629  * most other iPXE functions, a zero therefore indicates failure.
630  */
inet_aton(const char * string,struct in_addr * in)631 int inet_aton ( const char *string, struct in_addr *in ) {
632 	const char *separator = "...";
633 	uint8_t *byte = ( ( uint8_t * ) in );
634 	char *endp;
635 	unsigned long value;
636 
637 	while ( 1 ) {
638 		value = strtoul ( string, &endp, 0 );
639 		if ( string == endp )
640 			return 0;
641 		if ( value > 0xff )
642 			return 0;
643 		*(byte++) = value;
644 		if ( *endp != *separator )
645 			return 0;
646 		if ( ! *(separator++) )
647 			return 1;
648 		string = ( endp + 1 );
649 	}
650 }
651 
652 /**
653  * Convert IPv4 address to dotted-quad notation
654  *
655  * @v in		IPv4 address
656  * @ret string		IPv4 address in dotted-quad notation
657  */
inet_ntoa(struct in_addr in)658 char * inet_ntoa ( struct in_addr in ) {
659 	static char buf[16]; /* "xxx.xxx.xxx.xxx" */
660 	uint8_t *bytes = ( uint8_t * ) &in;
661 
662 	sprintf ( buf, "%d.%d.%d.%d", bytes[0], bytes[1], bytes[2], bytes[3] );
663 	return buf;
664 }
665 
666 /**
667  * Transcribe IPv4 address
668  *
669  * @v net_addr		IPv4 address
670  * @ret string		IPv4 address in dotted-quad notation
671  *
672  */
ipv4_ntoa(const void * net_addr)673 static const char * ipv4_ntoa ( const void *net_addr ) {
674 	return inet_ntoa ( * ( ( struct in_addr * ) net_addr ) );
675 }
676 
677 /**
678  * Transcribe IPv4 socket address
679  *
680  * @v sa		Socket address
681  * @ret string		Socket address in standard notation
682  */
ipv4_sock_ntoa(struct sockaddr * sa)683 static const char * ipv4_sock_ntoa ( struct sockaddr *sa ) {
684 	struct sockaddr_in *sin = ( ( struct sockaddr_in * ) sa );
685 
686 	return inet_ntoa ( sin->sin_addr );
687 }
688 
689 /**
690  * Parse IPv4 socket address
691  *
692  * @v string		Socket address string
693  * @v sa		Socket address to fill in
694  * @ret rc		Return status code
695  */
ipv4_sock_aton(const char * string,struct sockaddr * sa)696 static int ipv4_sock_aton ( const char *string, struct sockaddr *sa ) {
697 	struct sockaddr_in *sin = ( ( struct sockaddr_in * ) sa );
698 	struct in_addr in;
699 
700 	if ( inet_aton ( string, &in ) ) {
701 		sin->sin_addr = in;
702 		return 0;
703 	}
704 	return -EINVAL;
705 }
706 
707 /** IPv4 protocol */
708 struct net_protocol ipv4_protocol __net_protocol = {
709 	.name = "IP",
710 	.net_proto = htons ( ETH_P_IP ),
711 	.net_addr_len = sizeof ( struct in_addr ),
712 	.rx = ipv4_rx,
713 	.ntoa = ipv4_ntoa,
714 };
715 
716 /** IPv4 TCPIP net protocol */
717 struct tcpip_net_protocol ipv4_tcpip_protocol __tcpip_net_protocol = {
718 	.name = "IPv4",
719 	.sa_family = AF_INET,
720 	.header_len = sizeof ( struct iphdr ),
721 	.net_protocol = &ipv4_protocol,
722 	.tx = ipv4_tx,
723 	.netdev = ipv4_netdev,
724 };
725 
726 /** IPv4 ARP protocol */
727 struct arp_net_protocol ipv4_arp_protocol __arp_net_protocol = {
728 	.net_protocol = &ipv4_protocol,
729 	.check = ipv4_arp_check,
730 };
731 
732 /** IPv4 socket address converter */
733 struct sockaddr_converter ipv4_sockaddr_converter __sockaddr_converter = {
734 	.family = AF_INET,
735 	.ntoa = ipv4_sock_ntoa,
736 	.aton = ipv4_sock_aton,
737 };
738 
739 /******************************************************************************
740  *
741  * Settings
742  *
743  ******************************************************************************
744  */
745 
746 /**
747  * Parse IPv4 address setting value
748  *
749  * @v type		Setting type
750  * @v value		Formatted setting value
751  * @v buf		Buffer to contain raw value
752  * @v len		Length of buffer
753  * @ret len		Length of raw value, or negative error
754  */
parse_ipv4_setting(const struct setting_type * type __unused,const char * value,void * buf,size_t len)755 int parse_ipv4_setting ( const struct setting_type *type __unused,
756 			 const char *value, void *buf, size_t len ) {
757 	struct in_addr ipv4;
758 
759 	/* Parse IPv4 address */
760 	if ( inet_aton ( value, &ipv4 ) == 0 )
761 		return -EINVAL;
762 
763 	/* Copy to buffer */
764 	if ( len > sizeof ( ipv4 ) )
765 		len = sizeof ( ipv4 );
766 	memcpy ( buf, &ipv4, len );
767 
768 	return ( sizeof ( ipv4 ) );
769 }
770 
771 /**
772  * Format IPv4 address setting value
773  *
774  * @v type		Setting type
775  * @v raw		Raw setting value
776  * @v raw_len		Length of raw setting value
777  * @v buf		Buffer to contain formatted value
778  * @v len		Length of buffer
779  * @ret len		Length of formatted value, or negative error
780  */
format_ipv4_setting(const struct setting_type * type __unused,const void * raw,size_t raw_len,char * buf,size_t len)781 int format_ipv4_setting ( const struct setting_type *type __unused,
782 			  const void *raw, size_t raw_len, char *buf,
783 			  size_t len ) {
784 	const struct in_addr *ipv4 = raw;
785 
786 	if ( raw_len < sizeof ( *ipv4 ) )
787 		return -EINVAL;
788 	return snprintf ( buf, len, "%s", inet_ntoa ( *ipv4 ) );
789 }
790 
791 /** IPv4 address setting */
792 const struct setting ip_setting __setting ( SETTING_IP4, ip ) = {
793 	.name = "ip",
794 	.description = "IP address",
795 	.tag = DHCP_EB_YIADDR,
796 	.type = &setting_type_ipv4,
797 };
798 
799 /** IPv4 subnet mask setting */
800 const struct setting netmask_setting __setting ( SETTING_IP4, netmask ) = {
801 	.name = "netmask",
802 	.description = "Subnet mask",
803 	.tag = DHCP_SUBNET_MASK,
804 	.type = &setting_type_ipv4,
805 };
806 
807 /** Default gateway setting */
808 const struct setting gateway_setting __setting ( SETTING_IP4, gateway ) = {
809 	.name = "gateway",
810 	.description = "Default gateway",
811 	.tag = DHCP_ROUTERS,
812 	.type = &setting_type_ipv4,
813 };
814 
815 /**
816  * Send gratuitous ARP, if applicable
817  *
818  * @v netdev		Network device
819  * @v address		IPv4 address
820  * @v netmask		Subnet mask
821  * @v gateway		Gateway address (if any)
822  * @ret rc		Return status code
823  */
ipv4_gratuitous_arp(struct net_device * netdev,struct in_addr address,struct in_addr netmask __unused,struct in_addr gateway __unused)824 static int ipv4_gratuitous_arp ( struct net_device *netdev,
825 				 struct in_addr address,
826 				 struct in_addr netmask __unused,
827 				 struct in_addr gateway __unused ) {
828 	int rc;
829 
830 	/* Do nothing if network device already has this IPv4 address */
831 	if ( ipv4_has_addr ( netdev, address ) )
832 		return 0;
833 
834 	/* Transmit gratuitous ARP */
835 	DBGC ( netdev, "IPv4 sending gratuitous ARP for %s via %s\n",
836 	       inet_ntoa ( address ), netdev->name );
837 	if ( ( rc = arp_tx_request ( netdev, &ipv4_protocol, &address,
838 				     &address ) ) != 0 ) {
839 		DBGC ( netdev, "IPv4 could not transmit gratuitous ARP: %s\n",
840 		       strerror ( rc ) );
841 		/* Treat failures as non-fatal */
842 	}
843 
844 	return 0;
845 }
846 
847 /**
848  * Process IPv4 network device settings
849  *
850  * @v apply		Application method
851  * @ret rc		Return status code
852  */
ipv4_settings(int (* apply)(struct net_device * netdev,struct in_addr address,struct in_addr netmask,struct in_addr gateway))853 static int ipv4_settings ( int ( * apply ) ( struct net_device *netdev,
854 					     struct in_addr address,
855 					     struct in_addr netmask,
856 					     struct in_addr gateway ) ) {
857 	struct net_device *netdev;
858 	struct settings *settings;
859 	struct in_addr address = { 0 };
860 	struct in_addr netmask = { 0 };
861 	struct in_addr gateway = { 0 };
862 	int rc;
863 
864 	/* Process settings for each network device */
865 	for_each_netdev ( netdev ) {
866 
867 		/* Get network device settings */
868 		settings = netdev_settings ( netdev );
869 
870 		/* Get IPv4 address */
871 		address.s_addr = 0;
872 		fetch_ipv4_setting ( settings, &ip_setting, &address );
873 		if ( ! address.s_addr )
874 			continue;
875 
876 		/* Get subnet mask */
877 		fetch_ipv4_setting ( settings, &netmask_setting, &netmask );
878 
879 		/* Calculate default netmask, if necessary */
880 		if ( ! netmask.s_addr ) {
881 			if ( IN_IS_CLASSA ( address.s_addr ) ) {
882 				netmask.s_addr = INADDR_NET_CLASSA;
883 			} else if ( IN_IS_CLASSB ( address.s_addr ) ) {
884 				netmask.s_addr = INADDR_NET_CLASSB;
885 			} else if ( IN_IS_CLASSC ( address.s_addr ) ) {
886 				netmask.s_addr = INADDR_NET_CLASSC;
887 			}
888 		}
889 
890 		/* Get default gateway, if present */
891 		fetch_ipv4_setting ( settings, &gateway_setting, &gateway );
892 
893 		/* Apply settings */
894 		if ( ( rc = apply ( netdev, address, netmask, gateway ) ) != 0 )
895 			return rc;
896 	}
897 
898 	return 0;
899 }
900 
901 /**
902  * Create IPv4 routing table based on configured settings
903  *
904  * @ret rc		Return status code
905  */
ipv4_create_routes(void)906 static int ipv4_create_routes ( void ) {
907 	struct ipv4_miniroute *miniroute;
908 	struct ipv4_miniroute *tmp;
909 	int rc;
910 
911 	/* Send gratuitous ARPs for any new IPv4 addresses */
912 	ipv4_settings ( ipv4_gratuitous_arp );
913 
914 	/* Delete all existing routes */
915 	list_for_each_entry_safe ( miniroute, tmp, &ipv4_miniroutes, list )
916 		del_ipv4_miniroute ( miniroute );
917 
918 	/* Create a route for each configured network device */
919 	if ( ( rc = ipv4_settings ( add_ipv4_miniroute ) ) != 0 )
920 		return rc;
921 
922 	return 0;
923 }
924 
925 /** IPv4 settings applicator */
926 struct settings_applicator ipv4_settings_applicator __settings_applicator = {
927 	.apply = ipv4_create_routes,
928 };
929 
930 /* Drag in objects via ipv4_protocol */
931 REQUIRING_SYMBOL ( ipv4_protocol );
932 
933 /* Drag in ICMPv4 */
934 REQUIRE_OBJECT ( icmpv4 );
935