1 /*
2  * Copyright (C) 2006 Michael Brown <mbrown@fensystems.co.uk>.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License as
6  * published by the Free Software Foundation; either version 2 of the
7  * License, or any later version.
8  *
9  * This program is distributed in the hope that it will be useful, but
10  * WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17  * 02110-1301, USA.
18  *
19  * You can also choose to distribute this program under the terms of
20  * the Unmodified Binary Distribution Licence (as given in the file
21  * COPYING.UBDL), provided that you have satisfied its requirements.
22  */
23 
24 FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
25 
26 #include <stdint.h>
27 #include <stdlib.h>
28 #include <stdio.h>
29 #include <byteswap.h>
30 #include <string.h>
31 #include <errno.h>
32 #include <config/general.h>
33 #include <ipxe/if_ether.h>
34 #include <ipxe/iobuf.h>
35 #include <ipxe/tables.h>
36 #include <ipxe/process.h>
37 #include <ipxe/init.h>
38 #include <ipxe/malloc.h>
39 #include <ipxe/device.h>
40 #include <ipxe/errortab.h>
41 #include <ipxe/profile.h>
42 #include <ipxe/fault.h>
43 #include <ipxe/vlan.h>
44 #include <ipxe/netdevice.h>
45 
46 /** @file
47  *
48  * Network device management
49  *
50  */
51 
52 /** List of network devices */
53 struct list_head net_devices = LIST_HEAD_INIT ( net_devices );
54 
55 /** List of open network devices, in reverse order of opening */
56 static struct list_head open_net_devices = LIST_HEAD_INIT ( open_net_devices );
57 
58 /** Network device index */
59 static unsigned int netdev_index = 0;
60 
61 /** Network polling profiler */
62 static struct profiler net_poll_profiler __profiler = { .name = "net.poll" };
63 
64 /** Network receive profiler */
65 static struct profiler net_rx_profiler __profiler = { .name = "net.rx" };
66 
67 /** Network transmit profiler */
68 static struct profiler net_tx_profiler __profiler = { .name = "net.tx" };
69 
70 /** Default unknown link status code */
71 #define EUNKNOWN_LINK_STATUS __einfo_error ( EINFO_EUNKNOWN_LINK_STATUS )
72 #define EINFO_EUNKNOWN_LINK_STATUS \
73 	__einfo_uniqify ( EINFO_EINPROGRESS, 0x01, "Unknown" )
74 
75 /** Default not-yet-attempted-configuration status code */
76 #define EUNUSED_CONFIG __einfo_error ( EINFO_EUNUSED_CONFIG )
77 #define EINFO_EUNUSED_CONFIG \
78 	__einfo_uniqify ( EINFO_EINPROGRESS, 0x02, "Unused" )
79 
80 /** Default configuration-in-progress status code */
81 #define EINPROGRESS_CONFIG __einfo_error ( EINFO_EINPROGRESS_CONFIG )
82 #define EINFO_EINPROGRESS_CONFIG \
83 	__einfo_uniqify ( EINFO_EINPROGRESS, 0x03, "Incomplete" )
84 
85 /** Default link-down status code */
86 #define ENOTCONN_LINK_DOWN __einfo_error ( EINFO_ENOTCONN_LINK_DOWN )
87 #define EINFO_ENOTCONN_LINK_DOWN \
88 	__einfo_uniqify ( EINFO_ENOTCONN, 0x01, "Down" )
89 
90 /** Human-readable message for the default link statuses */
91 struct errortab netdev_errors[] __errortab = {
92 	__einfo_errortab ( EINFO_EUNKNOWN_LINK_STATUS ),
93 	__einfo_errortab ( EINFO_ENOTCONN_LINK_DOWN ),
94 	__einfo_errortab ( EINFO_EUNUSED_CONFIG ),
95 	__einfo_errortab ( EINFO_EINPROGRESS_CONFIG ),
96 };
97 
98 /**
99  * Check whether or not network device has a link-layer address
100  *
101  * @v netdev		Network device
102  * @ret has_ll_addr	Network device has a link-layer address
103  */
netdev_has_ll_addr(struct net_device * netdev)104 static int netdev_has_ll_addr ( struct net_device *netdev ) {
105 	uint8_t *ll_addr = netdev->ll_addr;
106 	size_t remaining = sizeof ( netdev->ll_addr );
107 
108 	while ( remaining-- ) {
109 		if ( *(ll_addr++) != 0 )
110 			return 1;
111 	}
112 	return 0;
113 }
114 
115 /**
116  * Notify drivers of network device or link state change
117  *
118  * @v netdev		Network device
119  */
netdev_notify(struct net_device * netdev)120 static void netdev_notify ( struct net_device *netdev ) {
121 	struct net_driver *driver;
122 
123 	for_each_table_entry ( driver, NET_DRIVERS ) {
124 		if ( driver->notify )
125 			driver->notify ( netdev );
126 	}
127 }
128 
129 /**
130  * Freeze network device receive queue processing
131  *
132  * @v netdev		Network device
133  */
netdev_rx_freeze(struct net_device * netdev)134 void netdev_rx_freeze ( struct net_device *netdev ) {
135 
136 	/* Mark receive queue processing as frozen */
137 	netdev->state |= NETDEV_RX_FROZEN;
138 
139 	/* Notify drivers of change */
140 	netdev_notify ( netdev );
141 }
142 
143 /**
144  * Unfreeze network device receive queue processing
145  *
146  * @v netdev		Network device
147  */
netdev_rx_unfreeze(struct net_device * netdev)148 void netdev_rx_unfreeze ( struct net_device *netdev ) {
149 
150 	/* Mark receive queue processing as not frozen */
151 	netdev->state &= ~NETDEV_RX_FROZEN;
152 
153 	/* Notify drivers of change */
154 	netdev_notify ( netdev );
155 }
156 
157 /**
158  * Mark network device as having a specific link state
159  *
160  * @v netdev		Network device
161  * @v rc		Link status code
162  */
netdev_link_err(struct net_device * netdev,int rc)163 void netdev_link_err ( struct net_device *netdev, int rc ) {
164 
165 	/* Stop link block timer */
166 	stop_timer ( &netdev->link_block );
167 
168 	/* Record link state */
169 	netdev->link_rc = rc;
170 	if ( netdev->link_rc == 0 ) {
171 		DBGC ( netdev, "NETDEV %s link is up\n", netdev->name );
172 	} else {
173 		DBGC ( netdev, "NETDEV %s link is down: %s\n",
174 		       netdev->name, strerror ( netdev->link_rc ) );
175 	}
176 
177 	/* Notify drivers of link state change */
178 	netdev_notify ( netdev );
179 }
180 
181 /**
182  * Mark network device as having link down
183  *
184  * @v netdev		Network device
185  */
netdev_link_down(struct net_device * netdev)186 void netdev_link_down ( struct net_device *netdev ) {
187 
188 	/* Avoid clobbering a more detailed link status code, if one
189 	 * is already set.
190 	 */
191 	if ( ( netdev->link_rc == 0 ) ||
192 	     ( netdev->link_rc == -EUNKNOWN_LINK_STATUS ) ) {
193 		netdev_link_err ( netdev, -ENOTCONN_LINK_DOWN );
194 	}
195 }
196 
197 /**
198  * Mark network device link as being blocked
199  *
200  * @v netdev		Network device
201  * @v timeout		Timeout (in ticks)
202  */
netdev_link_block(struct net_device * netdev,unsigned long timeout)203 void netdev_link_block ( struct net_device *netdev, unsigned long timeout ) {
204 
205 	/* Start link block timer */
206 	if ( ! netdev_link_blocked ( netdev ) ) {
207 		DBGC ( netdev, "NETDEV %s link blocked for %ld ticks\n",
208 		       netdev->name, timeout );
209 	}
210 	start_timer_fixed ( &netdev->link_block, timeout );
211 }
212 
213 /**
214  * Mark network device link as being unblocked
215  *
216  * @v netdev		Network device
217  */
netdev_link_unblock(struct net_device * netdev)218 void netdev_link_unblock ( struct net_device *netdev ) {
219 
220 	/* Stop link block timer */
221 	if ( netdev_link_blocked ( netdev ) )
222 		DBGC ( netdev, "NETDEV %s link unblocked\n", netdev->name );
223 	stop_timer ( &netdev->link_block );
224 }
225 
226 /**
227  * Handle network device link block timer expiry
228  *
229  * @v timer		Link block timer
230  * @v fail		Failure indicator
231  */
netdev_link_block_expired(struct retry_timer * timer,int fail __unused)232 static void netdev_link_block_expired ( struct retry_timer *timer,
233 					int fail __unused ) {
234 	struct net_device *netdev =
235 		container_of ( timer, struct net_device, link_block );
236 
237 	/* Assume link is no longer blocked */
238 	DBGC ( netdev, "NETDEV %s link block expired\n", netdev->name );
239 }
240 
241 /**
242  * Record network device statistic
243  *
244  * @v stats		Network device statistics
245  * @v rc		Status code
246  */
netdev_record_stat(struct net_device_stats * stats,int rc)247 static void netdev_record_stat ( struct net_device_stats *stats, int rc ) {
248 	struct net_device_error *error;
249 	struct net_device_error *least_common_error;
250 	unsigned int i;
251 
252 	/* If this is not an error, just update the good counter */
253 	if ( rc == 0 ) {
254 		stats->good++;
255 		return;
256 	}
257 
258 	/* Update the bad counter */
259 	stats->bad++;
260 
261 	/* Locate the appropriate error record */
262 	least_common_error = &stats->errors[0];
263 	for ( i = 0 ; i < ( sizeof ( stats->errors ) /
264 			    sizeof ( stats->errors[0] ) ) ; i++ ) {
265 		error = &stats->errors[i];
266 		/* Update matching record, if found */
267 		if ( error->rc == rc ) {
268 			error->count++;
269 			return;
270 		}
271 		if ( error->count < least_common_error->count )
272 			least_common_error = error;
273 	}
274 
275 	/* Overwrite the least common error record */
276 	least_common_error->rc = rc;
277 	least_common_error->count = 1;
278 }
279 
280 /**
281  * Transmit raw packet via network device
282  *
283  * @v netdev		Network device
284  * @v iobuf		I/O buffer
285  * @ret rc		Return status code
286  *
287  * Transmits the packet via the specified network device.  This
288  * function takes ownership of the I/O buffer.
289  */
netdev_tx(struct net_device * netdev,struct io_buffer * iobuf)290 int netdev_tx ( struct net_device *netdev, struct io_buffer *iobuf ) {
291 	int rc;
292 
293 	DBGC2 ( netdev, "NETDEV %s transmitting %p (%p+%zx)\n",
294 		netdev->name, iobuf, iobuf->data, iob_len ( iobuf ) );
295 	profile_start ( &net_tx_profiler );
296 
297 	/* Enqueue packet */
298 	list_add_tail ( &iobuf->list, &netdev->tx_queue );
299 
300 	/* Avoid calling transmit() on unopened network devices */
301 	if ( ! netdev_is_open ( netdev ) ) {
302 		rc = -ENETUNREACH;
303 		goto err;
304 	}
305 
306 	/* Discard packet (for test purposes) if applicable */
307 	if ( ( rc = inject_fault ( NETDEV_DISCARD_RATE ) ) != 0 )
308 		goto err;
309 
310 	/* Transmit packet */
311 	if ( ( rc = netdev->op->transmit ( netdev, iobuf ) ) != 0 )
312 		goto err;
313 
314 	profile_stop ( &net_tx_profiler );
315 	return 0;
316 
317  err:
318 	netdev_tx_complete_err ( netdev, iobuf, rc );
319 	return rc;
320 }
321 
322 /**
323  * Defer transmitted packet
324  *
325  * @v netdev		Network device
326  * @v iobuf		I/O buffer
327  *
328  * Drivers may call netdev_tx_defer() if there is insufficient space
329  * in the transmit descriptor ring.  Any packets deferred in this way
330  * will be automatically retransmitted as soon as space becomes
331  * available (i.e. as soon as the driver calls netdev_tx_complete()).
332  *
333  * The packet must currently be in the network device's TX queue.
334  *
335  * Drivers utilising netdev_tx_defer() must ensure that space in the
336  * transmit descriptor ring is freed up @b before calling
337  * netdev_tx_complete().  For example, if the ring is modelled using a
338  * producer counter and a consumer counter, then the consumer counter
339  * must be incremented before the call to netdev_tx_complete().
340  * Failure to do this will cause the retransmitted packet to be
341  * immediately redeferred (which will result in out-of-order
342  * transmissions and other nastiness).
343  */
netdev_tx_defer(struct net_device * netdev,struct io_buffer * iobuf)344 void netdev_tx_defer ( struct net_device *netdev, struct io_buffer *iobuf ) {
345 
346 	/* Catch data corruption as early as possible */
347 	list_check_contains_entry ( iobuf, &netdev->tx_queue, list );
348 
349 	/* Remove from transmit queue */
350 	list_del ( &iobuf->list );
351 
352 	/* Add to deferred transmit queue */
353 	list_add_tail ( &iobuf->list, &netdev->tx_deferred );
354 
355 	/* Record "out of space" statistic */
356 	netdev_tx_err ( netdev, NULL, -ENOBUFS );
357 }
358 
359 /**
360  * Discard transmitted packet
361  *
362  * @v netdev		Network device
363  * @v iobuf		I/O buffer, or NULL
364  * @v rc		Packet status code
365  *
366  * The packet is discarded and a TX error is recorded.  This function
367  * takes ownership of the I/O buffer.
368  */
netdev_tx_err(struct net_device * netdev,struct io_buffer * iobuf,int rc)369 void netdev_tx_err ( struct net_device *netdev,
370 		     struct io_buffer *iobuf, int rc ) {
371 
372 	/* Update statistics counter */
373 	netdev_record_stat ( &netdev->tx_stats, rc );
374 	if ( rc == 0 ) {
375 		DBGC2 ( netdev, "NETDEV %s transmission %p complete\n",
376 			netdev->name, iobuf );
377 	} else {
378 		DBGC ( netdev, "NETDEV %s transmission %p failed: %s\n",
379 		       netdev->name, iobuf, strerror ( rc ) );
380 	}
381 
382 	/* Discard packet */
383 	free_iob ( iobuf );
384 }
385 
386 /**
387  * Complete network transmission
388  *
389  * @v netdev		Network device
390  * @v iobuf		I/O buffer
391  * @v rc		Packet status code
392  *
393  * The packet must currently be in the network device's TX queue.
394  */
netdev_tx_complete_err(struct net_device * netdev,struct io_buffer * iobuf,int rc)395 void netdev_tx_complete_err ( struct net_device *netdev,
396 			      struct io_buffer *iobuf, int rc ) {
397 
398 	/* Catch data corruption as early as possible */
399 	list_check_contains_entry ( iobuf, &netdev->tx_queue, list );
400 
401 	/* Dequeue and free I/O buffer */
402 	list_del ( &iobuf->list );
403 	netdev_tx_err ( netdev, iobuf, rc );
404 
405 	/* Handle pending transmit queue */
406 	while ( ( iobuf = list_first_entry ( &netdev->tx_deferred,
407 					     struct io_buffer, list ) ) ) {
408 
409 		/* Remove from pending transmit queue */
410 		list_del ( &iobuf->list );
411 
412 		/* When any transmit completion fails, cancel all
413 		 * pending transmissions.
414 		 */
415 		if ( rc != 0 ) {
416 			netdev_tx_err ( netdev, iobuf, -ECANCELED );
417 			continue;
418 		}
419 
420 		/* Otherwise, attempt to transmit the first pending packet */
421 		netdev_tx ( netdev, iobuf );
422 		break;
423 	}
424 }
425 
426 /**
427  * Complete network transmission
428  *
429  * @v netdev		Network device
430  * @v rc		Packet status code
431  *
432  * Completes the oldest outstanding packet in the TX queue.
433  */
netdev_tx_complete_next_err(struct net_device * netdev,int rc)434 void netdev_tx_complete_next_err ( struct net_device *netdev, int rc ) {
435 	struct io_buffer *iobuf;
436 
437 	if ( ( iobuf = list_first_entry ( &netdev->tx_queue, struct io_buffer,
438 					  list ) ) != NULL ) {
439 		netdev_tx_complete_err ( netdev, iobuf, rc );
440 	}
441 }
442 
443 /**
444  * Flush device's transmit queue
445  *
446  * @v netdev		Network device
447  */
netdev_tx_flush(struct net_device * netdev)448 static void netdev_tx_flush ( struct net_device *netdev ) {
449 
450 	/* Discard any packets in the TX queue.  This will also cause
451 	 * any packets in the deferred TX queue to be discarded
452 	 * automatically.
453 	 */
454 	while ( ! list_empty ( &netdev->tx_queue ) ) {
455 		netdev_tx_complete_next_err ( netdev, -ECANCELED );
456 	}
457 	assert ( list_empty ( &netdev->tx_queue ) );
458 	assert ( list_empty ( &netdev->tx_deferred ) );
459 }
460 
461 /**
462  * Add packet to receive queue
463  *
464  * @v netdev		Network device
465  * @v iobuf		I/O buffer, or NULL
466  *
467  * The packet is added to the network device's RX queue.  This
468  * function takes ownership of the I/O buffer.
469  */
netdev_rx(struct net_device * netdev,struct io_buffer * iobuf)470 void netdev_rx ( struct net_device *netdev, struct io_buffer *iobuf ) {
471 	int rc;
472 
473 	DBGC2 ( netdev, "NETDEV %s received %p (%p+%zx)\n",
474 		netdev->name, iobuf, iobuf->data, iob_len ( iobuf ) );
475 
476 	/* Discard packet (for test purposes) if applicable */
477 	if ( ( rc = inject_fault ( NETDEV_DISCARD_RATE ) ) != 0 ) {
478 		netdev_rx_err ( netdev, iobuf, rc );
479 		return;
480 	}
481 
482 	/* Enqueue packet */
483 	list_add_tail ( &iobuf->list, &netdev->rx_queue );
484 
485 	/* Update statistics counter */
486 	netdev_record_stat ( &netdev->rx_stats, 0 );
487 }
488 
489 /**
490  * Discard received packet
491  *
492  * @v netdev		Network device
493  * @v iobuf		I/O buffer, or NULL
494  * @v rc		Packet status code
495  *
496  * The packet is discarded and an RX error is recorded.  This function
497  * takes ownership of the I/O buffer.  @c iobuf may be NULL if, for
498  * example, the net device wishes to report an error due to being
499  * unable to allocate an I/O buffer.
500  */
netdev_rx_err(struct net_device * netdev,struct io_buffer * iobuf,int rc)501 void netdev_rx_err ( struct net_device *netdev,
502 		     struct io_buffer *iobuf, int rc ) {
503 
504 	DBGC ( netdev, "NETDEV %s failed to receive %p: %s\n",
505 	       netdev->name, iobuf, strerror ( rc ) );
506 
507 	/* Discard packet */
508 	free_iob ( iobuf );
509 
510 	/* Update statistics counter */
511 	netdev_record_stat ( &netdev->rx_stats, rc );
512 }
513 
514 /**
515  * Poll for completed and received packets on network device
516  *
517  * @v netdev		Network device
518  *
519  * Polls the network device for completed transmissions and received
520  * packets.  Any received packets will be added to the RX packet queue
521  * via netdev_rx().
522  */
netdev_poll(struct net_device * netdev)523 void netdev_poll ( struct net_device *netdev ) {
524 
525 	if ( netdev_is_open ( netdev ) )
526 		netdev->op->poll ( netdev );
527 }
528 
529 /**
530  * Remove packet from device's receive queue
531  *
532  * @v netdev		Network device
533  * @ret iobuf		I/O buffer, or NULL
534  *
535  * Removes the first packet from the device's RX queue and returns it.
536  * Ownership of the packet is transferred to the caller.
537  */
netdev_rx_dequeue(struct net_device * netdev)538 struct io_buffer * netdev_rx_dequeue ( struct net_device *netdev ) {
539 	struct io_buffer *iobuf;
540 
541 	iobuf = list_first_entry ( &netdev->rx_queue, struct io_buffer, list );
542 	if ( ! iobuf )
543 		return NULL;
544 
545 	list_del ( &iobuf->list );
546 	return iobuf;
547 }
548 
549 /**
550  * Flush device's receive queue
551  *
552  * @v netdev		Network device
553  */
netdev_rx_flush(struct net_device * netdev)554 static void netdev_rx_flush ( struct net_device *netdev ) {
555 	struct io_buffer *iobuf;
556 
557 	/* Discard any packets in the RX queue */
558 	while ( ( iobuf = netdev_rx_dequeue ( netdev ) ) ) {
559 		netdev_rx_err ( netdev, iobuf, -ECANCELED );
560 	}
561 }
562 
563 /**
564  * Finish network device configuration
565  *
566  * @v config		Network device configuration
567  * @v rc		Reason for completion
568  */
netdev_config_close(struct net_device_configuration * config,int rc)569 static void netdev_config_close ( struct net_device_configuration *config,
570 				  int rc ) {
571 	struct net_device_configurator *configurator = config->configurator;
572 	struct net_device *netdev = config->netdev;
573 
574 	/* Restart interface */
575 	intf_restart ( &config->job, rc );
576 
577 	/* Record configuration result */
578 	config->rc = rc;
579 	if ( rc == 0 ) {
580 		DBGC ( netdev, "NETDEV %s configured via %s\n",
581 		       netdev->name, configurator->name );
582 	} else {
583 		DBGC ( netdev, "NETDEV %s configuration via %s failed: %s\n",
584 		       netdev->name, configurator->name, strerror ( rc ) );
585 	}
586 }
587 
588 /** Network device configuration interface operations */
589 static struct interface_operation netdev_config_ops[] = {
590 	INTF_OP ( intf_close, struct net_device_configuration *,
591 		  netdev_config_close ),
592 };
593 
594 /** Network device configuration interface descriptor */
595 static struct interface_descriptor netdev_config_desc =
596 	INTF_DESC ( struct net_device_configuration, job, netdev_config_ops );
597 
598 /**
599  * Free network device
600  *
601  * @v refcnt		Network device reference counter
602  */
free_netdev(struct refcnt * refcnt)603 static void free_netdev ( struct refcnt *refcnt ) {
604 	struct net_device *netdev =
605 		container_of ( refcnt, struct net_device, refcnt );
606 
607 	stop_timer ( &netdev->link_block );
608 	netdev_tx_flush ( netdev );
609 	netdev_rx_flush ( netdev );
610 	clear_settings ( netdev_settings ( netdev ) );
611 	free ( netdev );
612 }
613 
614 /**
615  * Allocate network device
616  *
617  * @v priv_len		Length of private data area (net_device::priv)
618  * @ret netdev		Network device, or NULL
619  *
620  * Allocates space for a network device and its private data area.
621  */
alloc_netdev(size_t priv_len)622 struct net_device * alloc_netdev ( size_t priv_len ) {
623 	struct net_device *netdev;
624 	struct net_device_configurator *configurator;
625 	struct net_device_configuration *config;
626 	unsigned int num_configs;
627 	size_t confs_len;
628 	size_t total_len;
629 
630 	num_configs = table_num_entries ( NET_DEVICE_CONFIGURATORS );
631 	confs_len = ( num_configs * sizeof ( netdev->configs[0] ) );
632 	total_len = ( sizeof ( *netdev ) + confs_len + priv_len );
633 	netdev = zalloc ( total_len );
634 	if ( netdev ) {
635 		ref_init ( &netdev->refcnt, free_netdev );
636 		netdev->link_rc = -EUNKNOWN_LINK_STATUS;
637 		timer_init ( &netdev->link_block, netdev_link_block_expired,
638 			     &netdev->refcnt );
639 		INIT_LIST_HEAD ( &netdev->tx_queue );
640 		INIT_LIST_HEAD ( &netdev->tx_deferred );
641 		INIT_LIST_HEAD ( &netdev->rx_queue );
642 		netdev_settings_init ( netdev );
643 		config = netdev->configs;
644 		for_each_table_entry ( configurator, NET_DEVICE_CONFIGURATORS ){
645 			config->netdev = netdev;
646 			config->configurator = configurator;
647 			config->rc = -EUNUSED_CONFIG;
648 			intf_init ( &config->job, &netdev_config_desc,
649 				    &netdev->refcnt );
650 			config++;
651 		}
652 		netdev->priv = ( ( ( void * ) netdev ) + sizeof ( *netdev ) +
653 				 confs_len );
654 	}
655 	return netdev;
656 }
657 
658 /**
659  * Register network device
660  *
661  * @v netdev		Network device
662  * @ret rc		Return status code
663  *
664  * Gives the network device a name and adds it to the list of network
665  * devices.
666  */
register_netdev(struct net_device * netdev)667 int register_netdev ( struct net_device *netdev ) {
668 	struct ll_protocol *ll_protocol = netdev->ll_protocol;
669 	struct net_driver *driver;
670 	struct net_device *duplicate;
671 	uint32_t seed;
672 	int rc;
673 
674 	/* Set initial link-layer address, if not already set */
675 	if ( ! netdev_has_ll_addr ( netdev ) ) {
676 		ll_protocol->init_addr ( netdev->hw_addr, netdev->ll_addr );
677 	}
678 
679 	/* Set MTU, if not already set */
680 	if ( ! netdev->mtu ) {
681 		netdev->mtu = ( netdev->max_pkt_len -
682 				ll_protocol->ll_header_len );
683 	}
684 
685 	/* Reject network devices that are already available via a
686 	 * different hardware device.
687 	 */
688 	duplicate = find_netdev_by_ll_addr ( ll_protocol, netdev->ll_addr );
689 	if ( duplicate && ( duplicate->dev != netdev->dev ) ) {
690 		DBGC ( netdev, "NETDEV rejecting duplicate (phys %s) of %s "
691 		       "(phys %s)\n", netdev->dev->name, duplicate->name,
692 		       duplicate->dev->name );
693 		rc = -EEXIST;
694 		goto err_duplicate;
695 	}
696 
697 	/* Reject named network devices that already exist */
698 	if ( netdev->name[0] && ( duplicate = find_netdev ( netdev->name ) ) ) {
699 		DBGC ( netdev, "NETDEV rejecting duplicate name %s\n",
700 		       duplicate->name );
701 		rc = -EEXIST;
702 		goto err_duplicate;
703 	}
704 
705 	/* Record device index and create device name */
706 	if ( netdev->name[0] == '\0' ) {
707 		snprintf ( netdev->name, sizeof ( netdev->name ), "net%d",
708 			   netdev_index );
709 	}
710 	netdev->index = ++netdev_index;
711 
712 	/* Use least significant bits of the link-layer address to
713 	 * improve the randomness of the (non-cryptographic) random
714 	 * number generator.
715 	 */
716 	memcpy ( &seed, ( netdev->ll_addr + ll_protocol->ll_addr_len
717 			  - sizeof ( seed ) ), sizeof ( seed ) );
718 	srand ( rand() ^ seed );
719 
720 	/* Add to device list */
721 	netdev_get ( netdev );
722 	list_add_tail ( &netdev->list, &net_devices );
723 	DBGC ( netdev, "NETDEV %s registered (phys %s hwaddr %s)\n",
724 	       netdev->name, netdev->dev->name,
725 	       netdev_addr ( netdev ) );
726 
727 	/* Register per-netdev configuration settings */
728 	if ( ( rc = register_settings ( netdev_settings ( netdev ),
729 					NULL, netdev->name ) ) != 0 ) {
730 		DBGC ( netdev, "NETDEV %s could not register settings: %s\n",
731 		       netdev->name, strerror ( rc ) );
732 		goto err_register_settings;
733 	}
734 
735 	/* Probe device */
736 	for_each_table_entry ( driver, NET_DRIVERS ) {
737 		if ( driver->probe && ( rc = driver->probe ( netdev ) ) != 0 ) {
738 			DBGC ( netdev, "NETDEV %s could not add %s device: "
739 			       "%s\n", netdev->name, driver->name,
740 			       strerror ( rc ) );
741 			goto err_probe;
742 		}
743 	}
744 
745 	return 0;
746 
747  err_probe:
748 	for_each_table_entry_continue_reverse ( driver, NET_DRIVERS ) {
749 		if ( driver->remove )
750 			driver->remove ( netdev );
751 	}
752 	clear_settings ( netdev_settings ( netdev ) );
753 	unregister_settings ( netdev_settings ( netdev ) );
754  err_register_settings:
755 	list_del ( &netdev->list );
756 	netdev_put ( netdev );
757  err_duplicate:
758 	return rc;
759 }
760 
761 /**
762  * Open network device
763  *
764  * @v netdev		Network device
765  * @ret rc		Return status code
766  */
netdev_open(struct net_device * netdev)767 int netdev_open ( struct net_device *netdev ) {
768 	int rc;
769 
770 	/* Do nothing if device is already open */
771 	if ( netdev->state & NETDEV_OPEN )
772 		return 0;
773 
774 	DBGC ( netdev, "NETDEV %s opening\n", netdev->name );
775 
776 	/* Mark as opened */
777 	netdev->state |= NETDEV_OPEN;
778 
779 	/* Open the device */
780 	if ( ( rc = netdev->op->open ( netdev ) ) != 0 )
781 		goto err;
782 
783 	/* Add to head of open devices list */
784 	list_add ( &netdev->open_list, &open_net_devices );
785 
786 	/* Notify drivers of device state change */
787 	netdev_notify ( netdev );
788 
789 	return 0;
790 
791  err:
792 	netdev->state &= ~NETDEV_OPEN;
793 	return rc;
794 }
795 
796 /**
797  * Close network device
798  *
799  * @v netdev		Network device
800  */
netdev_close(struct net_device * netdev)801 void netdev_close ( struct net_device *netdev ) {
802 	unsigned int num_configs;
803 	unsigned int i;
804 
805 	/* Do nothing if device is already closed */
806 	if ( ! ( netdev->state & NETDEV_OPEN ) )
807 		return;
808 
809 	DBGC ( netdev, "NETDEV %s closing\n", netdev->name );
810 
811 	/* Terminate any ongoing configurations.  Use intf_close()
812 	 * rather than intf_restart() to allow the cancellation to be
813 	 * reported back to us if a configuration is actually in
814 	 * progress.
815 	 */
816 	num_configs = table_num_entries ( NET_DEVICE_CONFIGURATORS );
817 	for ( i = 0 ; i < num_configs ; i++ )
818 		intf_close ( &netdev->configs[i].job, -ECANCELED );
819 
820 	/* Remove from open devices list */
821 	list_del ( &netdev->open_list );
822 
823 	/* Mark as closed */
824 	netdev->state &= ~NETDEV_OPEN;
825 
826 	/* Notify drivers of device state change */
827 	netdev_notify ( netdev );
828 
829 	/* Close the device */
830 	netdev->op->close ( netdev );
831 
832 	/* Flush TX and RX queues */
833 	netdev_tx_flush ( netdev );
834 	netdev_rx_flush ( netdev );
835 }
836 
837 /**
838  * Unregister network device
839  *
840  * @v netdev		Network device
841  *
842  * Removes the network device from the list of network devices.
843  */
unregister_netdev(struct net_device * netdev)844 void unregister_netdev ( struct net_device *netdev ) {
845 	struct net_driver *driver;
846 
847 	/* Ensure device is closed */
848 	netdev_close ( netdev );
849 
850 	/* Remove device */
851 	for_each_table_entry_reverse ( driver, NET_DRIVERS ) {
852 		if ( driver->remove )
853 			driver->remove ( netdev );
854 	}
855 
856 	/* Unregister per-netdev configuration settings */
857 	clear_settings ( netdev_settings ( netdev ) );
858 	unregister_settings ( netdev_settings ( netdev ) );
859 
860 	/* Remove from device list */
861 	DBGC ( netdev, "NETDEV %s unregistered\n", netdev->name );
862 	list_del ( &netdev->list );
863 	netdev_put ( netdev );
864 
865 	/* Reset network device index if no devices remain */
866 	if ( list_empty ( &net_devices ) )
867 		netdev_index = 0;
868 }
869 
870 /** Enable or disable interrupts
871  *
872  * @v netdev		Network device
873  * @v enable		Interrupts should be enabled
874  */
netdev_irq(struct net_device * netdev,int enable)875 void netdev_irq ( struct net_device *netdev, int enable ) {
876 
877 	/* Enable or disable device interrupts, if applicable */
878 	if ( netdev_irq_supported ( netdev ) )
879 		netdev->op->irq ( netdev, enable );
880 
881 	/* Record interrupt enabled state */
882 	netdev->state &= ~NETDEV_IRQ_ENABLED;
883 	if ( enable )
884 		netdev->state |= NETDEV_IRQ_ENABLED;
885 }
886 
887 /**
888  * Get network device by name
889  *
890  * @v name		Network device name
891  * @ret netdev		Network device, or NULL
892  */
find_netdev(const char * name)893 struct net_device * find_netdev ( const char *name ) {
894 	struct net_device *netdev;
895 
896 	/* Allow "netX" shortcut */
897 	if ( strcmp ( name, "netX" ) == 0 )
898 		return last_opened_netdev();
899 
900 	/* Identify network device by name */
901 	list_for_each_entry ( netdev, &net_devices, list ) {
902 		if ( strcmp ( netdev->name, name ) == 0 )
903 			return netdev;
904 	}
905 
906 	return NULL;
907 }
908 
909 /**
910  * Get network device by index
911  *
912  * @v index		Network device index
913  * @ret netdev		Network device, or NULL
914  */
find_netdev_by_index(unsigned int index)915 struct net_device * find_netdev_by_index ( unsigned int index ) {
916 	struct net_device *netdev;
917 
918 	/* Identify network device by index */
919 	list_for_each_entry ( netdev, &net_devices, list ) {
920 		if ( netdev->index == index )
921 			return netdev;
922 	}
923 
924 	return NULL;
925 }
926 
927 /**
928  * Get network device by PCI bus:dev.fn address
929  *
930  * @v bus_type		Bus type
931  * @v location		Bus location
932  * @ret netdev		Network device, or NULL
933  */
find_netdev_by_location(unsigned int bus_type,unsigned int location)934 struct net_device * find_netdev_by_location ( unsigned int bus_type,
935 					      unsigned int location ) {
936 	struct net_device *netdev;
937 
938 	list_for_each_entry ( netdev, &net_devices, list ) {
939 		if ( ( netdev->dev->desc.bus_type == bus_type ) &&
940 		     ( netdev->dev->desc.location == location ) )
941 			return netdev;
942 	}
943 
944 	return NULL;
945 }
946 
947 /**
948  * Get network device by link-layer address
949  *
950  * @v ll_protocol	Link-layer protocol
951  * @v ll_addr		Link-layer address
952  * @ret netdev		Network device, or NULL
953  */
find_netdev_by_ll_addr(struct ll_protocol * ll_protocol,const void * ll_addr)954 struct net_device * find_netdev_by_ll_addr ( struct ll_protocol *ll_protocol,
955 					     const void *ll_addr ) {
956 	struct net_device *netdev;
957 
958 	list_for_each_entry ( netdev, &net_devices, list ) {
959 		if ( ( netdev->ll_protocol == ll_protocol ) &&
960 		     ( memcmp ( netdev->ll_addr, ll_addr,
961 				ll_protocol->ll_addr_len ) == 0 ) )
962 			return netdev;
963 	}
964 
965 	return NULL;
966 }
967 
968 /**
969  * Get most recently opened network device
970  *
971  * @ret netdev		Most recently opened network device, or NULL
972  */
last_opened_netdev(void)973 struct net_device * last_opened_netdev ( void ) {
974 	struct net_device *netdev;
975 
976 	netdev = list_first_entry ( &open_net_devices, struct net_device,
977 				    open_list );
978 	if ( ! netdev )
979 		return NULL;
980 
981 	assert ( netdev_is_open ( netdev ) );
982 	return netdev;
983 }
984 
985 /**
986  * Transmit network-layer packet
987  *
988  * @v iobuf		I/O buffer
989  * @v netdev		Network device
990  * @v net_protocol	Network-layer protocol
991  * @v ll_dest		Destination link-layer address
992  * @v ll_source		Source link-layer address
993  * @ret rc		Return status code
994  *
995  * Prepends link-layer headers to the I/O buffer and transmits the
996  * packet via the specified network device.  This function takes
997  * ownership of the I/O buffer.
998  */
net_tx(struct io_buffer * iobuf,struct net_device * netdev,struct net_protocol * net_protocol,const void * ll_dest,const void * ll_source)999 int net_tx ( struct io_buffer *iobuf, struct net_device *netdev,
1000 	     struct net_protocol *net_protocol, const void *ll_dest,
1001 	     const void *ll_source ) {
1002 	struct ll_protocol *ll_protocol = netdev->ll_protocol;
1003 	int rc;
1004 
1005 	/* Add link-layer header */
1006 	if ( ( rc = ll_protocol->push ( netdev, iobuf, ll_dest, ll_source,
1007 					net_protocol->net_proto ) ) != 0 ) {
1008 		/* Record error for diagnosis */
1009 		netdev_tx_err ( netdev, iobuf, rc );
1010 		return rc;
1011 	}
1012 
1013 	/* Transmit packet */
1014 	return netdev_tx ( netdev, iobuf );
1015 }
1016 
1017 /**
1018  * Process received network-layer packet
1019  *
1020  * @v iobuf		I/O buffer
1021  * @v netdev		Network device
1022  * @v net_proto		Network-layer protocol, in network-byte order
1023  * @v ll_dest		Destination link-layer address
1024  * @v ll_source		Source link-layer address
1025  * @v flags		Packet flags
1026  * @ret rc		Return status code
1027  */
net_rx(struct io_buffer * iobuf,struct net_device * netdev,uint16_t net_proto,const void * ll_dest,const void * ll_source,unsigned int flags)1028 int net_rx ( struct io_buffer *iobuf, struct net_device *netdev,
1029 	     uint16_t net_proto, const void *ll_dest, const void *ll_source,
1030 	     unsigned int flags ) {
1031 	struct net_protocol *net_protocol;
1032 
1033 	/* Hand off to network-layer protocol, if any */
1034 	for_each_table_entry ( net_protocol, NET_PROTOCOLS ) {
1035 		if ( net_protocol->net_proto == net_proto )
1036 			return net_protocol->rx ( iobuf, netdev, ll_dest,
1037 						  ll_source, flags );
1038 	}
1039 
1040 	DBGC ( netdev, "NETDEV %s unknown network protocol %04x\n",
1041 	       netdev->name, ntohs ( net_proto ) );
1042 	free_iob ( iobuf );
1043 	return -ENOTSUP;
1044 }
1045 
1046 /**
1047  * Poll the network stack
1048  *
1049  * This polls all interfaces for received packets, and processes
1050  * packets from the RX queue.
1051  */
net_poll(void)1052 void net_poll ( void ) {
1053 	struct net_device *netdev;
1054 	struct io_buffer *iobuf;
1055 	struct ll_protocol *ll_protocol;
1056 	const void *ll_dest;
1057 	const void *ll_source;
1058 	uint16_t net_proto;
1059 	unsigned int flags;
1060 	int rc;
1061 
1062 	/* Poll and process each network device */
1063 	list_for_each_entry ( netdev, &net_devices, list ) {
1064 
1065 		/* Poll for new packets */
1066 		profile_start ( &net_poll_profiler );
1067 		netdev_poll ( netdev );
1068 		profile_stop ( &net_poll_profiler );
1069 
1070 		/* Leave received packets on the queue if receive
1071 		 * queue processing is currently frozen.  This will
1072 		 * happen when the raw packets are to be manually
1073 		 * dequeued using netdev_rx_dequeue(), rather than
1074 		 * processed via the usual networking stack.
1075 		 */
1076 		if ( netdev_rx_frozen ( netdev ) )
1077 			continue;
1078 
1079 		/* Process all received packets */
1080 		while ( ( iobuf = netdev_rx_dequeue ( netdev ) ) ) {
1081 
1082 			DBGC2 ( netdev, "NETDEV %s processing %p (%p+%zx)\n",
1083 				netdev->name, iobuf, iobuf->data,
1084 				iob_len ( iobuf ) );
1085 			profile_start ( &net_rx_profiler );
1086 
1087 			/* Remove link-layer header */
1088 			ll_protocol = netdev->ll_protocol;
1089 			if ( ( rc = ll_protocol->pull ( netdev, iobuf,
1090 							&ll_dest, &ll_source,
1091 							&net_proto,
1092 							&flags ) ) != 0 ) {
1093 				free_iob ( iobuf );
1094 				continue;
1095 			}
1096 
1097 			/* Hand packet to network layer */
1098 			if ( ( rc = net_rx ( iob_disown ( iobuf ), netdev,
1099 					     net_proto, ll_dest,
1100 					     ll_source, flags ) ) != 0 ) {
1101 				/* Record error for diagnosis */
1102 				netdev_rx_err ( netdev, NULL, rc );
1103 			}
1104 			profile_stop ( &net_rx_profiler );
1105 		}
1106 	}
1107 }
1108 
1109 /**
1110  * Single-step the network stack
1111  *
1112  * @v process		Network stack process
1113  */
net_step(struct process * process __unused)1114 static void net_step ( struct process *process __unused ) {
1115 	net_poll();
1116 }
1117 
1118 /**
1119  * Get the VLAN tag (when VLAN support is not present)
1120  *
1121  * @v netdev		Network device
1122  * @ret tag		0, indicating that device is not a VLAN device
1123  */
vlan_tag(struct net_device * netdev __unused)1124 __weak unsigned int vlan_tag ( struct net_device *netdev __unused ) {
1125 	return 0;
1126 }
1127 
1128 /**
1129  * Add VLAN tag-stripped packet to queue (when VLAN support is not present)
1130  *
1131  * @v netdev		Network device
1132  * @v tag		VLAN tag, or zero
1133  * @v iobuf		I/O buffer
1134  */
vlan_netdev_rx(struct net_device * netdev,unsigned int tag,struct io_buffer * iobuf)1135 __weak void vlan_netdev_rx ( struct net_device *netdev, unsigned int tag,
1136 			     struct io_buffer *iobuf ) {
1137 
1138 	if ( tag == 0 ) {
1139 		netdev_rx ( netdev, iobuf );
1140 	} else {
1141 		netdev_rx_err ( netdev, iobuf, -ENODEV );
1142 	}
1143 }
1144 
1145 /**
1146  * Discard received VLAN tag-stripped packet (when VLAN support is not present)
1147  *
1148  * @v netdev		Network device
1149  * @v tag		VLAN tag, or zero
1150  * @v iobuf		I/O buffer, or NULL
1151  * @v rc		Packet status code
1152  */
vlan_netdev_rx_err(struct net_device * netdev,unsigned int tag __unused,struct io_buffer * iobuf,int rc)1153 __weak void vlan_netdev_rx_err ( struct net_device *netdev,
1154 				 unsigned int tag __unused,
1155 				 struct io_buffer *iobuf, int rc ) {
1156 
1157 	netdev_rx_err ( netdev, iobuf, rc );
1158 }
1159 
1160 /** Networking stack process */
1161 PERMANENT_PROCESS ( net_process, net_step );
1162 
1163 /**
1164  * Discard some cached network device data
1165  *
1166  * @ret discarded	Number of cached items discarded
1167  */
net_discard(void)1168 static unsigned int net_discard ( void ) {
1169 	struct net_device *netdev;
1170 	struct io_buffer *iobuf;
1171 	unsigned int discarded = 0;
1172 
1173 	/* Try to drop one deferred TX packet from each network device */
1174 	for_each_netdev ( netdev ) {
1175 		if ( ( iobuf = list_first_entry ( &netdev->tx_deferred,
1176 						  struct io_buffer,
1177 						  list ) ) != NULL ) {
1178 
1179 			/* Discard first deferred packet */
1180 			list_del ( &iobuf->list );
1181 			free_iob ( iobuf );
1182 
1183 			/* Report discard */
1184 			discarded++;
1185 		}
1186 	}
1187 
1188 	return discarded;
1189 }
1190 
1191 /** Network device cache discarder */
1192 struct cache_discarder net_discarder __cache_discarder ( CACHE_NORMAL ) = {
1193 	.discard = net_discard,
1194 };
1195 
1196 /**
1197  * Find network device configurator
1198  *
1199  * @v name		Name
1200  * @ret configurator	Network device configurator, or NULL
1201  */
find_netdev_configurator(const char * name)1202 struct net_device_configurator * find_netdev_configurator ( const char *name ) {
1203 	struct net_device_configurator *configurator;
1204 
1205 	for_each_table_entry ( configurator, NET_DEVICE_CONFIGURATORS ) {
1206 		if ( strcmp ( configurator->name, name ) == 0 )
1207 			return configurator;
1208 	}
1209 	return NULL;
1210 }
1211 
1212 /**
1213  * Start network device configuration
1214  *
1215  * @v netdev		Network device
1216  * @v configurator	Network device configurator
1217  * @ret rc		Return status code
1218  */
netdev_configure(struct net_device * netdev,struct net_device_configurator * configurator)1219 int netdev_configure ( struct net_device *netdev,
1220 		       struct net_device_configurator *configurator ) {
1221 	struct net_device_configuration *config =
1222 		netdev_configuration ( netdev, configurator );
1223 	int rc;
1224 
1225 	/* Check applicability of configurator */
1226 	if ( ! netdev_configurator_applies ( netdev, configurator ) ) {
1227 		DBGC ( netdev, "NETDEV %s does not support configuration via "
1228 		       "%s\n", netdev->name, configurator->name );
1229 		return -ENOTSUP;
1230 	}
1231 
1232 	/* Terminate any ongoing configuration */
1233 	intf_restart ( &config->job, -ECANCELED );
1234 
1235 	/* Mark configuration as being in progress */
1236 	config->rc = -EINPROGRESS_CONFIG;
1237 
1238 	DBGC ( netdev, "NETDEV %s starting configuration via %s\n",
1239 	       netdev->name, configurator->name );
1240 
1241 	/* Start configuration */
1242 	if ( ( rc = configurator->start ( &config->job, netdev ) ) != 0 ) {
1243 		DBGC ( netdev, "NETDEV %s could not start configuration via "
1244 		       "%s: %s\n", netdev->name, configurator->name,
1245 		       strerror ( rc ) );
1246 		config->rc = rc;
1247 		return rc;
1248 	}
1249 
1250 	return 0;
1251 }
1252 
1253 /**
1254  * Start network device configuration via all supported configurators
1255  *
1256  * @v netdev		Network device
1257  * @ret rc		Return status code
1258  */
netdev_configure_all(struct net_device * netdev)1259 int netdev_configure_all ( struct net_device *netdev ) {
1260 	struct net_device_configurator *configurator;
1261 	int rc;
1262 
1263 	/* Start configuration for each configurator */
1264 	for_each_table_entry ( configurator, NET_DEVICE_CONFIGURATORS ) {
1265 
1266 		/* Skip any inapplicable configurators */
1267 		if ( ! netdev_configurator_applies ( netdev, configurator ) )
1268 			continue;
1269 
1270 		/* Start configuration */
1271 		if ( ( rc = netdev_configure ( netdev, configurator ) ) != 0 )
1272 			return rc;
1273 	}
1274 
1275 	return 0;
1276 }
1277 
1278 /**
1279  * Check if network device has a configuration with a specified status code
1280  *
1281  * @v netdev		Network device
1282  * @v rc		Status code
1283  * @ret has_rc		Network device has a configuration with this status code
1284  */
netdev_has_configuration_rc(struct net_device * netdev,int rc)1285 static int netdev_has_configuration_rc ( struct net_device *netdev, int rc ) {
1286 	unsigned int num_configs;
1287 	unsigned int i;
1288 
1289 	num_configs = table_num_entries ( NET_DEVICE_CONFIGURATORS );
1290 	for ( i = 0 ; i < num_configs ; i++ ) {
1291 		if ( netdev->configs[i].rc == rc )
1292 			return 1;
1293 	}
1294 	return 0;
1295 }
1296 
1297 /**
1298  * Check if network device configuration is in progress
1299  *
1300  * @v netdev		Network device
1301  * @ret is_in_progress	Network device configuration is in progress
1302  */
netdev_configuration_in_progress(struct net_device * netdev)1303 int netdev_configuration_in_progress ( struct net_device *netdev ) {
1304 
1305 	return netdev_has_configuration_rc ( netdev, -EINPROGRESS_CONFIG );
1306 }
1307 
1308 /**
1309  * Check if network device has at least one successful configuration
1310  *
1311  * @v netdev		Network device
1312  * @v configurator	Configurator
1313  * @ret rc		Return status code
1314  */
netdev_configuration_ok(struct net_device * netdev)1315 int netdev_configuration_ok ( struct net_device *netdev ) {
1316 
1317 	return netdev_has_configuration_rc ( netdev, 0 );
1318 }
1319