1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2012 - 2015 UNISYS CORPORATION
3  * All rights reserved.
4  */
5 
6 /* This driver lives in a spar partition, and registers to ethernet io
7  * channels from the visorbus driver. It creates netdev devices and
8  * forwards transmit to the IO channel and accepts rcvs from the IO
9  * Partition via the IO channel.
10  */
11 
12 #include <linux/debugfs.h>
13 #include <linux/etherdevice.h>
14 #include <linux/module.h>
15 #include <linux/netdevice.h>
16 #include <linux/kthread.h>
17 #include <linux/skbuff.h>
18 #include <linux/rtnetlink.h>
19 #include <linux/visorbus.h>
20 
21 #include "iochannel.h"
22 
23 #define VISORNIC_INFINITE_RSP_WAIT 0
24 
25 /* MAX_BUF = 64 lines x 32 MAXVNIC x 80 characters
26  *         = 163840 bytes
27  */
28 #define MAX_BUF 163840
29 #define NAPI_WEIGHT 64
30 
31 /* GUIDS for director channel type supported by this driver.  */
32 /* {8cd5994d-c58e-11da-95a9-00e08161165f} */
33 #define VISOR_VNIC_CHANNEL_GUID \
34 	GUID_INIT(0x8cd5994d, 0xc58e, 0x11da, \
35 		0x95, 0xa9, 0x0, 0xe0, 0x81, 0x61, 0x16, 0x5f)
36 #define VISOR_VNIC_CHANNEL_GUID_STR \
37 	"8cd5994d-c58e-11da-95a9-00e08161165f"
38 
39 static struct visor_channeltype_descriptor visornic_channel_types[] = {
40 	/* Note that the only channel type we expect to be reported by the
41 	 * bus driver is the VISOR_VNIC channel.
42 	 */
43 	{ VISOR_VNIC_CHANNEL_GUID, "ultravnic", sizeof(struct channel_header),
44 	  VISOR_VNIC_CHANNEL_VERSIONID },
45 	{}
46 };
47 MODULE_DEVICE_TABLE(visorbus, visornic_channel_types);
48 /* FIXME XXX: This next line of code must be fixed and removed before
49  * acceptance into the 'normal' part of the kernel.  It is only here as a place
50  * holder to get module autoloading functionality working for visorbus.  Code
51  * must be added to scripts/mode/file2alias.c, etc., to get this working
52  * properly.
53  */
54 MODULE_ALIAS("visorbus:" VISOR_VNIC_CHANNEL_GUID_STR);
55 
56 struct chanstat {
57 	unsigned long got_rcv;
58 	unsigned long got_enbdisack;
59 	unsigned long got_xmit_done;
60 	unsigned long xmit_fail;
61 	unsigned long sent_enbdis;
62 	unsigned long sent_promisc;
63 	unsigned long sent_post;
64 	unsigned long sent_post_failed;
65 	unsigned long sent_xmit;
66 	unsigned long reject_count;
67 	unsigned long extra_rcvbufs_sent;
68 };
69 
70 /* struct visornic_devdata
71  * @enabled:                        0 disabled 1 enabled to receive.
72  * @enab_dis_acked:                 NET_RCV_ENABLE/DISABLE acked by IOPART.
73  * @struct *dev:
74  * @struct *netdev:
75  * @struct net_stats:
76  * @interrupt_rcvd:
77  * @rsp_queue:
78  * @struct **rcvbuf:
79  * @incarnation_id:                 incarnation_id lets IOPART know about
80  *                                  re-birth.
81  * @old_flags:                      flags as they were prior to
82  *                                  set_multicast_list.
83  * @usage:                          count of users.
84  * @num_rcv_bufs:                   number of rcv buffers the vnic will post.
85  * @num_rcv_bufs_could_not_alloc:
86  * @num_rcvbuf_in_iovm:
87  * @alloc_failed_in_if_needed_cnt:
88  * @alloc_failed_in_repost_rtn_cnt:
89  * @max_outstanding_net_xmits:      absolute max number of outstanding xmits
90  *                                  - should never hit this.
91  * @upper_threshold_net_xmits:      high water mark for calling
92  *                                  netif_stop_queue().
93  * @lower_threshold_net_xmits:      high water mark for calling
94  *                                  netif_wake_queue().
95  * @struct xmitbufhead:             xmitbufhead - head of the xmit buffer list
96  *                                  sent to the IOPART end.
97  * @server_down_complete_func:
98  * @struct timeout_reset:
99  * @struct *cmdrsp_rcv:             cmdrsp_rcv is used for posting/unposting rcv
100  *                                  buffers.
101  * @struct *xmit_cmdrsp:            xmit_cmdrsp - issues NET_XMIT - only one
102  *                                  active xmit at a time.
103  * @server_down:                    IOPART is down.
104  * @server_change_state:            Processing SERVER_CHANGESTATE msg.
105  * @going_away:                     device is being torn down.
106  * @struct *eth_debugfs_dir:
107  * @interrupts_rcvd:
108  * @interrupts_notme:
109  * @interrupts_disabled:
110  * @busy_cnt:
111  * @priv_lock:                      spinlock to access devdata structures.
112  * @flow_control_upper_hits:
113  * @flow_control_lower_hits:
114  * @n_rcv0:                         # rcvs of 0 buffers.
115  * @n_rcv1:                         # rcvs of 1 buffers.
116  * @n_rcv2:                         # rcvs of 2 buffers.
117  * @n_rcvx:                         # rcvs of >2 buffers.
118  * @found_repost_rcvbuf_cnt:        # repost_rcvbuf_cnt.
119  * @repost_found_skb_cnt:           # of found the skb.
120  * @n_repost_deficit:               # of lost rcv buffers.
121  * @bad_rcv_buf:                    # of unknown rcv skb not freed.
122  * @n_rcv_packets_not_accepted:     # bogs rcv packets.
123  * @queuefullmsg_logged:
124  * @struct chstat:
125  * @struct napi:
126  * @struct cmdrsp:
127  */
128 struct visornic_devdata {
129 	unsigned short enabled;
130 	unsigned short enab_dis_acked;
131 
132 	struct visor_device *dev;
133 	struct net_device *netdev;
134 	struct net_device_stats net_stats;
135 	atomic_t interrupt_rcvd;
136 	wait_queue_head_t rsp_queue;
137 	struct sk_buff **rcvbuf;
138 	u64 incarnation_id;
139 	unsigned short old_flags;
140 	atomic_t usage;
141 
142 	int num_rcv_bufs;
143 	int num_rcv_bufs_could_not_alloc;
144 	atomic_t num_rcvbuf_in_iovm;
145 	unsigned long alloc_failed_in_if_needed_cnt;
146 	unsigned long alloc_failed_in_repost_rtn_cnt;
147 
148 	unsigned long max_outstanding_net_xmits;
149 	unsigned long upper_threshold_net_xmits;
150 	unsigned long lower_threshold_net_xmits;
151 	struct sk_buff_head xmitbufhead;
152 
153 	visorbus_state_complete_func server_down_complete_func;
154 	struct work_struct timeout_reset;
155 	struct uiscmdrsp *cmdrsp_rcv;
156 	struct uiscmdrsp *xmit_cmdrsp;
157 	bool server_down;
158 	bool server_change_state;
159 	bool going_away;
160 	struct dentry *eth_debugfs_dir;
161 	u64 interrupts_rcvd;
162 	u64 interrupts_notme;
163 	u64 interrupts_disabled;
164 	u64 busy_cnt;
165 	/* spinlock to access devdata structures. */
166 	spinlock_t priv_lock;
167 
168 	/* flow control counter */
169 	u64 flow_control_upper_hits;
170 	u64 flow_control_lower_hits;
171 
172 	/* debug counters */
173 	unsigned long n_rcv0;
174 	unsigned long n_rcv1;
175 	unsigned long n_rcv2;
176 	unsigned long n_rcvx;
177 	unsigned long found_repost_rcvbuf_cnt;
178 	unsigned long repost_found_skb_cnt;
179 	unsigned long n_repost_deficit;
180 	unsigned long bad_rcv_buf;
181 	unsigned long n_rcv_packets_not_accepted;
182 
183 	int queuefullmsg_logged;
184 	struct chanstat chstat;
185 	struct napi_struct napi;
186 	struct uiscmdrsp cmdrsp[SIZEOF_CMDRSP];
187 };
188 
189 /* Returns next non-zero index on success or 0 on failure (i.e. out of room). */
add_physinfo_entries(u64 inp_pfn,u16 inp_off,u16 inp_len,u16 index,u16 max_pi_arr_entries,struct phys_info pi_arr[])190 static u16 add_physinfo_entries(u64 inp_pfn, u16 inp_off, u16 inp_len,
191 				u16 index, u16 max_pi_arr_entries,
192 				struct phys_info pi_arr[])
193 {
194 	u16 i, len, firstlen;
195 
196 	firstlen = PI_PAGE_SIZE - inp_off;
197 	if (inp_len <= firstlen) {
198 		/* The input entry spans only one page - add as is. */
199 		if (index >= max_pi_arr_entries)
200 			return 0;
201 		pi_arr[index].pi_pfn = inp_pfn;
202 		pi_arr[index].pi_off = (u16)inp_off;
203 		pi_arr[index].pi_len = (u16)inp_len;
204 		return index + 1;
205 	}
206 
207 	/* This entry spans multiple pages. */
208 	for (len = inp_len, i = 0; len;
209 		len -= pi_arr[index + i].pi_len, i++) {
210 		if (index + i >= max_pi_arr_entries)
211 			return 0;
212 		pi_arr[index + i].pi_pfn = inp_pfn + i;
213 		if (i == 0) {
214 			pi_arr[index].pi_off = inp_off;
215 			pi_arr[index].pi_len = firstlen;
216 		} else {
217 			pi_arr[index + i].pi_off = 0;
218 			pi_arr[index + i].pi_len = min_t(u16, len,
219 							 PI_PAGE_SIZE);
220 		}
221 	}
222 	return index + i;
223 }
224 
225 /* visor_copy_fragsinfo_from_skb - copy fragment list in the SKB to a phys_info
226  *				   array that the IOPART understands
227  * @skb:	  Skbuff that we are pulling the frags from.
228  * @firstfraglen: Length of first fragment in skb.
229  * @frags_max:	  Max len of frags array.
230  * @frags:	  Frags array filled in on output.
231  *
232  * Return: Positive integer indicating number of entries filled in frags on
233  *         success, negative integer on error.
234  */
visor_copy_fragsinfo_from_skb(struct sk_buff * skb,unsigned int firstfraglen,unsigned int frags_max,struct phys_info frags[])235 static int visor_copy_fragsinfo_from_skb(struct sk_buff *skb,
236 					 unsigned int firstfraglen,
237 					 unsigned int frags_max,
238 					 struct phys_info frags[])
239 {
240 	unsigned int count = 0, frag, size, offset = 0, numfrags;
241 	unsigned int total_count;
242 
243 	numfrags = skb_shinfo(skb)->nr_frags;
244 
245 	/* Compute the number of fragments this skb has, and if its more than
246 	 * frag array can hold, linearize the skb
247 	 */
248 	total_count = numfrags + (firstfraglen / PI_PAGE_SIZE);
249 	if (firstfraglen % PI_PAGE_SIZE)
250 		total_count++;
251 
252 	if (total_count > frags_max) {
253 		if (skb_linearize(skb))
254 			return -EINVAL;
255 		numfrags = skb_shinfo(skb)->nr_frags;
256 		firstfraglen = 0;
257 	}
258 
259 	while (firstfraglen) {
260 		if (count == frags_max)
261 			return -EINVAL;
262 
263 		frags[count].pi_pfn =
264 			page_to_pfn(virt_to_page(skb->data + offset));
265 		frags[count].pi_off =
266 			(unsigned long)(skb->data + offset) & PI_PAGE_MASK;
267 		size = min_t(unsigned int, firstfraglen,
268 			     PI_PAGE_SIZE - frags[count].pi_off);
269 
270 		/* can take smallest of firstfraglen (what's left) OR
271 		 * bytes left in the page
272 		 */
273 		frags[count].pi_len = size;
274 		firstfraglen -= size;
275 		offset += size;
276 		count++;
277 	}
278 	if (numfrags) {
279 		if ((count + numfrags) > frags_max)
280 			return -EINVAL;
281 
282 		for (frag = 0; frag < numfrags; frag++) {
283 			count = add_physinfo_entries(page_to_pfn(
284 				  skb_frag_page(&skb_shinfo(skb)->frags[frag])),
285 				  skb_frag_off(&skb_shinfo(skb)->frags[frag]),
286 				  skb_frag_size(&skb_shinfo(skb)->frags[frag]),
287 				  count, frags_max, frags);
288 			/* add_physinfo_entries only returns
289 			 * zero if the frags array is out of room
290 			 * That should never happen because we
291 			 * fail above, if count+numfrags > frags_max.
292 			 */
293 			if (!count)
294 				return -EINVAL;
295 		}
296 	}
297 	if (skb_shinfo(skb)->frag_list) {
298 		struct sk_buff *skbinlist;
299 		int c;
300 
301 		for (skbinlist = skb_shinfo(skb)->frag_list; skbinlist;
302 		     skbinlist = skbinlist->next) {
303 			c = visor_copy_fragsinfo_from_skb(skbinlist,
304 							  skbinlist->len -
305 							  skbinlist->data_len,
306 							  frags_max - count,
307 							  &frags[count]);
308 			if (c < 0)
309 				return c;
310 			count += c;
311 		}
312 	}
313 	return count;
314 }
315 
enable_ints_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)316 static ssize_t enable_ints_write(struct file *file,
317 				 const char __user *buffer,
318 				 size_t count, loff_t *ppos)
319 {
320 	/* Don't want to break ABI here by having a debugfs
321 	 * file that no longer exists or is writable, so
322 	 * lets just make this a vestigual function
323 	 */
324 	return count;
325 }
326 
327 static const struct file_operations debugfs_enable_ints_fops = {
328 	.write = enable_ints_write,
329 };
330 
331 /* visornic_serverdown_complete - pause device following IOPART going down
332  * @devdata: Device managed by IOPART.
333  *
334  * The IO partition has gone down, and we need to do some cleanup for when it
335  * comes back. Treat the IO partition as the link being down.
336  */
visornic_serverdown_complete(struct visornic_devdata * devdata)337 static void visornic_serverdown_complete(struct visornic_devdata *devdata)
338 {
339 	struct net_device *netdev = devdata->netdev;
340 
341 	/* Stop polling for interrupts */
342 	visorbus_disable_channel_interrupts(devdata->dev);
343 
344 	rtnl_lock();
345 	dev_close(netdev);
346 	rtnl_unlock();
347 
348 	atomic_set(&devdata->num_rcvbuf_in_iovm, 0);
349 	devdata->chstat.sent_xmit = 0;
350 	devdata->chstat.got_xmit_done = 0;
351 
352 	if (devdata->server_down_complete_func)
353 		(*devdata->server_down_complete_func)(devdata->dev, 0);
354 
355 	devdata->server_down = true;
356 	devdata->server_change_state = false;
357 	devdata->server_down_complete_func = NULL;
358 }
359 
360 /* visornic_serverdown - Command has notified us that IOPART is down
361  * @devdata:	   Device managed by IOPART.
362  * @complete_func: Function to call when finished.
363  *
364  * Schedule the work needed to handle the server down request. Make sure we
365  * haven't already handled the server change state event.
366  *
367  * Return: 0 if we scheduled the work, negative integer on error.
368  */
visornic_serverdown(struct visornic_devdata * devdata,visorbus_state_complete_func complete_func)369 static int visornic_serverdown(struct visornic_devdata *devdata,
370 			       visorbus_state_complete_func complete_func)
371 {
372 	unsigned long flags;
373 	int err;
374 
375 	spin_lock_irqsave(&devdata->priv_lock, flags);
376 	if (devdata->server_change_state) {
377 		dev_dbg(&devdata->dev->device, "%s changing state\n",
378 			__func__);
379 		err = -EINVAL;
380 		goto err_unlock;
381 	}
382 	if (devdata->server_down) {
383 		dev_dbg(&devdata->dev->device, "%s already down\n",
384 			__func__);
385 		err = -EINVAL;
386 		goto err_unlock;
387 	}
388 	if (devdata->going_away) {
389 		dev_dbg(&devdata->dev->device,
390 			"%s aborting because device removal pending\n",
391 			__func__);
392 		err = -ENODEV;
393 		goto err_unlock;
394 	}
395 	devdata->server_change_state = true;
396 	devdata->server_down_complete_func = complete_func;
397 	spin_unlock_irqrestore(&devdata->priv_lock, flags);
398 
399 	visornic_serverdown_complete(devdata);
400 	return 0;
401 
402 err_unlock:
403 	spin_unlock_irqrestore(&devdata->priv_lock, flags);
404 	return err;
405 }
406 
407 /* alloc_rcv_buf - alloc rcv buffer to be given to the IO Partition
408  * @netdev: Network adapter the rcv bufs are attached too.
409  *
410  * Create an sk_buff (rcv_buf) that will be passed to the IO Partition
411  * so that it can write rcv data into our memory space.
412  *
413  * Return: Pointer to sk_buff.
414  */
alloc_rcv_buf(struct net_device * netdev)415 static struct sk_buff *alloc_rcv_buf(struct net_device *netdev)
416 {
417 	struct sk_buff *skb;
418 
419 	/* NOTE: the first fragment in each rcv buffer is pointed to by
420 	 * rcvskb->data. For now all rcv buffers will be RCVPOST_BUF_SIZE
421 	 * in length, so the first frag is large enough to hold 1514.
422 	 */
423 	skb = alloc_skb(RCVPOST_BUF_SIZE, GFP_ATOMIC);
424 	if (!skb)
425 		return NULL;
426 	skb->dev = netdev;
427 	/* current value of mtu doesn't come into play here; large
428 	 * packets will just end up using multiple rcv buffers all of
429 	 * same size.
430 	 */
431 	skb->len = RCVPOST_BUF_SIZE;
432 	/* alloc_skb already zeroes it out for clarification. */
433 	skb->data_len = 0;
434 	return skb;
435 }
436 
437 /* post_skb - post a skb to the IO Partition
438  * @cmdrsp:  Cmdrsp packet to be send to the IO Partition.
439  * @devdata: visornic_devdata to post the skb to.
440  * @skb:     Skb to give to the IO partition.
441  *
442  * Return: 0 on success, negative integer on error.
443  */
post_skb(struct uiscmdrsp * cmdrsp,struct visornic_devdata * devdata,struct sk_buff * skb)444 static int post_skb(struct uiscmdrsp *cmdrsp, struct visornic_devdata *devdata,
445 		    struct sk_buff *skb)
446 {
447 	int err;
448 
449 	cmdrsp->net.buf = skb;
450 	cmdrsp->net.rcvpost.frag.pi_pfn = page_to_pfn(virt_to_page(skb->data));
451 	cmdrsp->net.rcvpost.frag.pi_off =
452 		(unsigned long)skb->data & PI_PAGE_MASK;
453 	cmdrsp->net.rcvpost.frag.pi_len = skb->len;
454 	cmdrsp->net.rcvpost.unique_num = devdata->incarnation_id;
455 
456 	if ((cmdrsp->net.rcvpost.frag.pi_off + skb->len) > PI_PAGE_SIZE)
457 		return -EINVAL;
458 
459 	cmdrsp->net.type = NET_RCV_POST;
460 	cmdrsp->cmdtype = CMD_NET_TYPE;
461 	err = visorchannel_signalinsert(devdata->dev->visorchannel,
462 					IOCHAN_TO_IOPART,
463 					cmdrsp);
464 	if (err) {
465 		devdata->chstat.sent_post_failed++;
466 		return err;
467 	}
468 
469 	atomic_inc(&devdata->num_rcvbuf_in_iovm);
470 	devdata->chstat.sent_post++;
471 	return 0;
472 }
473 
474 /* send_enbdis - Send NET_RCV_ENBDIS to IO Partition
475  * @netdev:  Netdevice we are enabling/disabling, used as context return value.
476  * @state:   Enable = 1/disable = 0.
477  * @devdata: Visornic device we are enabling/disabling.
478  *
479  * Send the enable/disable message to the IO Partition.
480  *
481  * Return: 0 on success, negative integer on error.
482  */
send_enbdis(struct net_device * netdev,int state,struct visornic_devdata * devdata)483 static int send_enbdis(struct net_device *netdev, int state,
484 		       struct visornic_devdata *devdata)
485 {
486 	int err;
487 
488 	devdata->cmdrsp_rcv->net.enbdis.enable = state;
489 	devdata->cmdrsp_rcv->net.enbdis.context = netdev;
490 	devdata->cmdrsp_rcv->net.type = NET_RCV_ENBDIS;
491 	devdata->cmdrsp_rcv->cmdtype = CMD_NET_TYPE;
492 	err = visorchannel_signalinsert(devdata->dev->visorchannel,
493 					IOCHAN_TO_IOPART,
494 					devdata->cmdrsp_rcv);
495 	if (err)
496 		return err;
497 	devdata->chstat.sent_enbdis++;
498 	return 0;
499 }
500 
501 /* visornic_disable_with_timeout - disable network adapter
502  * @netdev:  netdevice to disable.
503  * @timeout: Timeout to wait for disable.
504  *
505  * Disable the network adapter and inform the IO Partition that we are disabled.
506  * Reclaim memory from rcv bufs.
507  *
508  * Return: 0 on success, negative integer on failure of IO Partition responding.
509  */
visornic_disable_with_timeout(struct net_device * netdev,const int timeout)510 static int visornic_disable_with_timeout(struct net_device *netdev,
511 					 const int timeout)
512 {
513 	struct visornic_devdata *devdata = netdev_priv(netdev);
514 	int i;
515 	unsigned long flags;
516 	int wait = 0;
517 	int err;
518 
519 	/* send a msg telling the other end we are stopping incoming pkts */
520 	spin_lock_irqsave(&devdata->priv_lock, flags);
521 	devdata->enabled = 0;
522 	/* must wait for ack */
523 	devdata->enab_dis_acked = 0;
524 	spin_unlock_irqrestore(&devdata->priv_lock, flags);
525 
526 	/* send disable and wait for ack -- don't hold lock when sending
527 	 * disable because if the queue is full, insert might sleep.
528 	 * If an error occurs, don't wait for the timeout.
529 	 */
530 	err = send_enbdis(netdev, 0, devdata);
531 	if (err)
532 		return err;
533 
534 	/* wait for ack to arrive before we try to free rcv buffers
535 	 * NOTE: the other end automatically unposts the rcv buffers
536 	 * when it gets a disable.
537 	 */
538 	spin_lock_irqsave(&devdata->priv_lock, flags);
539 	while ((timeout == VISORNIC_INFINITE_RSP_WAIT) ||
540 	       (wait < timeout)) {
541 		if (devdata->enab_dis_acked)
542 			break;
543 		if (devdata->server_down || devdata->server_change_state) {
544 			dev_dbg(&netdev->dev, "%s server went away\n",
545 				__func__);
546 			break;
547 		}
548 		set_current_state(TASK_INTERRUPTIBLE);
549 		spin_unlock_irqrestore(&devdata->priv_lock, flags);
550 		wait += schedule_timeout(msecs_to_jiffies(10));
551 		spin_lock_irqsave(&devdata->priv_lock, flags);
552 	}
553 
554 	/* Wait for usage to go to 1 (no other users) before freeing
555 	 * rcv buffers
556 	 */
557 	if (atomic_read(&devdata->usage) > 1) {
558 		while (1) {
559 			set_current_state(TASK_INTERRUPTIBLE);
560 			spin_unlock_irqrestore(&devdata->priv_lock, flags);
561 			schedule_timeout(msecs_to_jiffies(10));
562 			spin_lock_irqsave(&devdata->priv_lock, flags);
563 			if (atomic_read(&devdata->usage))
564 				break;
565 		}
566 	}
567 	/* we've set enabled to 0, so we can give up the lock. */
568 	spin_unlock_irqrestore(&devdata->priv_lock, flags);
569 
570 	/* stop the transmit queue so nothing more can be transmitted */
571 	netif_stop_queue(netdev);
572 
573 	napi_disable(&devdata->napi);
574 
575 	skb_queue_purge(&devdata->xmitbufhead);
576 
577 	/* Free rcv buffers - other end has automatically unposed them on
578 	 * disable
579 	 */
580 	for (i = 0; i < devdata->num_rcv_bufs; i++) {
581 		if (devdata->rcvbuf[i]) {
582 			kfree_skb(devdata->rcvbuf[i]);
583 			devdata->rcvbuf[i] = NULL;
584 		}
585 	}
586 
587 	return 0;
588 }
589 
590 /* init_rcv_bufs - initialize receive buffs and send them to the IO Partition
591  * @netdev:  struct netdevice.
592  * @devdata: visornic_devdata.
593  *
594  * Allocate rcv buffers and post them to the IO Partition.
595  *
596  * Return: 0 on success, negative integer on failure.
597  */
init_rcv_bufs(struct net_device * netdev,struct visornic_devdata * devdata)598 static int init_rcv_bufs(struct net_device *netdev,
599 			 struct visornic_devdata *devdata)
600 {
601 	int i, j, count, err;
602 
603 	/* allocate fixed number of receive buffers to post to uisnic
604 	 * post receive buffers after we've allocated a required amount
605 	 */
606 	for (i = 0; i < devdata->num_rcv_bufs; i++) {
607 		devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
608 		/* if we failed to allocate one let us stop */
609 		if (!devdata->rcvbuf[i])
610 			break;
611 	}
612 	/* couldn't even allocate one -- bail out */
613 	if (i == 0)
614 		return -ENOMEM;
615 	count = i;
616 
617 	/* Ensure we can alloc 2/3rd of the requested number of buffers.
618 	 * 2/3 is an arbitrary choice; used also in ndis init.c
619 	 */
620 	if (count < ((2 * devdata->num_rcv_bufs) / 3)) {
621 		/* free receive buffers we did alloc and then bail out */
622 		for (i = 0; i < count; i++) {
623 			kfree_skb(devdata->rcvbuf[i]);
624 			devdata->rcvbuf[i] = NULL;
625 		}
626 		return -ENOMEM;
627 	}
628 
629 	/* post receive buffers to receive incoming input - without holding
630 	 * lock - we've not enabled nor started the queue so there shouldn't
631 	 * be any rcv or xmit activity
632 	 */
633 	for (i = 0; i < count; i++) {
634 		err = post_skb(devdata->cmdrsp_rcv, devdata,
635 			       devdata->rcvbuf[i]);
636 		if (!err)
637 			continue;
638 
639 		/* Error handling -
640 		 * If we posted at least one skb, we should return success,
641 		 * but need to free the resources that we have not successfully
642 		 * posted.
643 		 */
644 		for (j = i; j < count; j++) {
645 			kfree_skb(devdata->rcvbuf[j]);
646 			devdata->rcvbuf[j] = NULL;
647 		}
648 		if (i == 0)
649 			return err;
650 		break;
651 	}
652 
653 	return 0;
654 }
655 
656 /* visornic_enable_with_timeout	- send enable to IO Partition
657  * @netdev:  struct net_device.
658  * @timeout: Time to wait for the ACK from the enable.
659  *
660  * Sends enable to IOVM and inits, and posts receive buffers to IOVM. Timeout is
661  * defined in msecs (timeout of 0 specifies infinite wait).
662  *
663  * Return: 0 on success, negative integer on failure.
664  */
visornic_enable_with_timeout(struct net_device * netdev,const int timeout)665 static int visornic_enable_with_timeout(struct net_device *netdev,
666 					const int timeout)
667 {
668 	int err = 0;
669 	struct visornic_devdata *devdata = netdev_priv(netdev);
670 	unsigned long flags;
671 	int wait = 0;
672 
673 	napi_enable(&devdata->napi);
674 
675 	/* NOTE: the other end automatically unposts the rcv buffers when it
676 	 * gets a disable.
677 	 */
678 	err = init_rcv_bufs(netdev, devdata);
679 	if (err < 0) {
680 		dev_err(&netdev->dev,
681 			"%s failed to init rcv bufs\n", __func__);
682 		return err;
683 	}
684 
685 	spin_lock_irqsave(&devdata->priv_lock, flags);
686 	devdata->enabled = 1;
687 	devdata->enab_dis_acked = 0;
688 
689 	/* now we're ready, let's send an ENB to uisnic but until we get
690 	 * an ACK back from uisnic, we'll drop the packets
691 	 */
692 	devdata->n_rcv_packets_not_accepted = 0;
693 	spin_unlock_irqrestore(&devdata->priv_lock, flags);
694 
695 	/* send enable and wait for ack -- don't hold lock when sending enable
696 	 * because if the queue is full, insert might sleep. If an error
697 	 * occurs error out.
698 	 */
699 	err = send_enbdis(netdev, 1, devdata);
700 	if (err)
701 		return err;
702 
703 	spin_lock_irqsave(&devdata->priv_lock, flags);
704 	while ((timeout == VISORNIC_INFINITE_RSP_WAIT) ||
705 	       (wait < timeout)) {
706 		if (devdata->enab_dis_acked)
707 			break;
708 		if (devdata->server_down || devdata->server_change_state) {
709 			dev_dbg(&netdev->dev, "%s server went away\n",
710 				__func__);
711 			break;
712 		}
713 		set_current_state(TASK_INTERRUPTIBLE);
714 		spin_unlock_irqrestore(&devdata->priv_lock, flags);
715 		wait += schedule_timeout(msecs_to_jiffies(10));
716 		spin_lock_irqsave(&devdata->priv_lock, flags);
717 	}
718 
719 	spin_unlock_irqrestore(&devdata->priv_lock, flags);
720 
721 	if (!devdata->enab_dis_acked) {
722 		dev_err(&netdev->dev, "%s missing ACK\n", __func__);
723 		return -EIO;
724 	}
725 
726 	netif_start_queue(netdev);
727 	return 0;
728 }
729 
730 /* visornic_timeout_reset - handle xmit timeout resets
731  * @work: Work item that scheduled the work.
732  *
733  * Transmit timeouts are typically handled by resetting the device for our
734  * virtual NIC; we will send a disable and enable to the IOVM. If it doesn't
735  * respond, we will trigger a serverdown.
736  */
visornic_timeout_reset(struct work_struct * work)737 static void visornic_timeout_reset(struct work_struct *work)
738 {
739 	struct visornic_devdata *devdata;
740 	struct net_device *netdev;
741 	int response = 0;
742 
743 	devdata = container_of(work, struct visornic_devdata, timeout_reset);
744 	netdev = devdata->netdev;
745 
746 	rtnl_lock();
747 	if (!netif_running(netdev)) {
748 		rtnl_unlock();
749 		return;
750 	}
751 
752 	response = visornic_disable_with_timeout(netdev,
753 						 VISORNIC_INFINITE_RSP_WAIT);
754 	if (response)
755 		goto call_serverdown;
756 
757 	response = visornic_enable_with_timeout(netdev,
758 						VISORNIC_INFINITE_RSP_WAIT);
759 	if (response)
760 		goto call_serverdown;
761 
762 	rtnl_unlock();
763 
764 	return;
765 
766 call_serverdown:
767 	visornic_serverdown(devdata, NULL);
768 	rtnl_unlock();
769 }
770 
771 /* visornic_open - enable the visornic device and mark the queue started
772  * @netdev: netdevice to start.
773  *
774  * Enable the device and start the transmit queue.
775  *
776  * Return: 0 on success.
777  */
visornic_open(struct net_device * netdev)778 static int visornic_open(struct net_device *netdev)
779 {
780 	visornic_enable_with_timeout(netdev, VISORNIC_INFINITE_RSP_WAIT);
781 	return 0;
782 }
783 
784 /* visornic_close - disables the visornic device and stops the queues
785  * @netdev: netdevice to stop.
786  *
787  * Disable the device and stop the transmit queue.
788  *
789  * Return 0 on success.
790  */
visornic_close(struct net_device * netdev)791 static int visornic_close(struct net_device *netdev)
792 {
793 	visornic_disable_with_timeout(netdev, VISORNIC_INFINITE_RSP_WAIT);
794 	return 0;
795 }
796 
797 /* devdata_xmits_outstanding - compute outstanding xmits
798  * @devdata: visornic_devdata for device
799  *
800  * Return: Long integer representing the number of outstanding xmits.
801  */
devdata_xmits_outstanding(struct visornic_devdata * devdata)802 static unsigned long devdata_xmits_outstanding(struct visornic_devdata *devdata)
803 {
804 	if (devdata->chstat.sent_xmit >= devdata->chstat.got_xmit_done)
805 		return devdata->chstat.sent_xmit -
806 			devdata->chstat.got_xmit_done;
807 	return (ULONG_MAX - devdata->chstat.got_xmit_done
808 		+ devdata->chstat.sent_xmit + 1);
809 }
810 
811 /* vnic_hit_high_watermark
812  * @devdata:	    Indicates visornic device we are checking.
813  * @high_watermark: Max num of unacked xmits we will tolerate before we will
814  *		    start throttling.
815  *
816  * Return: True iff the number of unacked xmits sent to the IO Partition is >=
817  *	   high_watermark. False otherwise.
818  */
vnic_hit_high_watermark(struct visornic_devdata * devdata,ulong high_watermark)819 static bool vnic_hit_high_watermark(struct visornic_devdata *devdata,
820 				    ulong high_watermark)
821 {
822 	return (devdata_xmits_outstanding(devdata) >= high_watermark);
823 }
824 
825 /* vnic_hit_low_watermark
826  * @devdata:	   Indicates visornic device we are checking.
827  * @low_watermark: We will wait until the num of unacked xmits drops to this
828  *		   value or lower before we start transmitting again.
829  *
830  * Return: True iff the number of unacked xmits sent to the IO Partition is <=
831  *	   low_watermark.
832  */
vnic_hit_low_watermark(struct visornic_devdata * devdata,ulong low_watermark)833 static bool vnic_hit_low_watermark(struct visornic_devdata *devdata,
834 				   ulong low_watermark)
835 {
836 	return (devdata_xmits_outstanding(devdata) <= low_watermark);
837 }
838 
839 /* visornic_xmit - send a packet to the IO Partition
840  * @skb:    Packet to be sent.
841  * @netdev: Net device the packet is being sent from.
842  *
843  * Convert the skb to a cmdrsp so the IO Partition can understand it, and send
844  * the XMIT command to the IO Partition for processing. This function is
845  * protected from concurrent calls by a spinlock xmit_lock in the net_device
846  * struct. As soon as the function returns, it can be called again.
847  *
848  * Return: NETDEV_TX_OK.
849  */
visornic_xmit(struct sk_buff * skb,struct net_device * netdev)850 static netdev_tx_t visornic_xmit(struct sk_buff *skb, struct net_device *netdev)
851 {
852 	struct visornic_devdata *devdata;
853 	int len, firstfraglen, padlen;
854 	struct uiscmdrsp *cmdrsp = NULL;
855 	unsigned long flags;
856 	int err;
857 
858 	devdata = netdev_priv(netdev);
859 	spin_lock_irqsave(&devdata->priv_lock, flags);
860 
861 	if (netif_queue_stopped(netdev) || devdata->server_down ||
862 	    devdata->server_change_state) {
863 		spin_unlock_irqrestore(&devdata->priv_lock, flags);
864 		devdata->busy_cnt++;
865 		dev_dbg(&netdev->dev,
866 			"%s busy - queue stopped\n", __func__);
867 		kfree_skb(skb);
868 		return NETDEV_TX_OK;
869 	}
870 
871 	/* sk_buff struct is used to host network data throughout all the
872 	 * linux network subsystems
873 	 */
874 	len = skb->len;
875 
876 	/* skb->len is the FULL length of data (including fragmentary portion)
877 	 * skb->data_len is the length of the fragment portion in frags
878 	 * skb->len - skb->data_len is size of the 1st fragment in skb->data
879 	 * calculate the length of the first fragment that skb->data is
880 	 * pointing to
881 	 */
882 	firstfraglen = skb->len - skb->data_len;
883 	if (firstfraglen < ETH_HLEN) {
884 		spin_unlock_irqrestore(&devdata->priv_lock, flags);
885 		devdata->busy_cnt++;
886 		dev_err(&netdev->dev,
887 			"%s busy - first frag too small (%d)\n",
888 			__func__, firstfraglen);
889 		kfree_skb(skb);
890 		return NETDEV_TX_OK;
891 	}
892 
893 	if (len < ETH_MIN_PACKET_SIZE &&
894 	    ((skb_end_pointer(skb) - skb->data) >= ETH_MIN_PACKET_SIZE)) {
895 		/* pad the packet out to minimum size */
896 		padlen = ETH_MIN_PACKET_SIZE - len;
897 		skb_put_zero(skb, padlen);
898 		len += padlen;
899 		firstfraglen += padlen;
900 	}
901 
902 	cmdrsp = devdata->xmit_cmdrsp;
903 	/* clear cmdrsp */
904 	memset(cmdrsp, 0, SIZEOF_CMDRSP);
905 	cmdrsp->net.type = NET_XMIT;
906 	cmdrsp->cmdtype = CMD_NET_TYPE;
907 
908 	/* save the pointer to skb -- we'll need it for completion */
909 	cmdrsp->net.buf = skb;
910 
911 	if (vnic_hit_high_watermark(devdata,
912 				    devdata->max_outstanding_net_xmits)) {
913 		/* extra NET_XMITs queued over to IOVM - need to wait */
914 		devdata->chstat.reject_count++;
915 		if (!devdata->queuefullmsg_logged &&
916 		    ((devdata->chstat.reject_count & 0x3ff) == 1))
917 			devdata->queuefullmsg_logged = 1;
918 		netif_stop_queue(netdev);
919 		spin_unlock_irqrestore(&devdata->priv_lock, flags);
920 		devdata->busy_cnt++;
921 		dev_dbg(&netdev->dev,
922 			"%s busy - waiting for iovm to catch up\n",
923 			__func__);
924 		kfree_skb(skb);
925 		return NETDEV_TX_OK;
926 	}
927 	if (devdata->queuefullmsg_logged)
928 		devdata->queuefullmsg_logged = 0;
929 
930 	if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
931 		cmdrsp->net.xmt.lincsum.valid = 1;
932 		cmdrsp->net.xmt.lincsum.protocol = skb->protocol;
933 		if (skb_transport_header(skb) > skb->data) {
934 			cmdrsp->net.xmt.lincsum.hrawoff =
935 				skb_transport_header(skb) - skb->data;
936 			cmdrsp->net.xmt.lincsum.hrawoff = 1;
937 		}
938 		if (skb_network_header(skb) > skb->data) {
939 			cmdrsp->net.xmt.lincsum.nhrawoff =
940 				skb_network_header(skb) - skb->data;
941 			cmdrsp->net.xmt.lincsum.nhrawoffv = 1;
942 		}
943 		cmdrsp->net.xmt.lincsum.csum = skb->csum;
944 	} else {
945 		cmdrsp->net.xmt.lincsum.valid = 0;
946 	}
947 
948 	/* save off the length of the entire data packet */
949 	cmdrsp->net.xmt.len = len;
950 
951 	/* copy ethernet header from first frag into ocmdrsp
952 	 * - everything else will be pass in frags & DMA'ed
953 	 */
954 	memcpy(cmdrsp->net.xmt.ethhdr, skb->data, ETH_HLEN);
955 
956 	/* copy frags info - from skb->data we need to only provide access
957 	 * beyond eth header
958 	 */
959 	cmdrsp->net.xmt.num_frags =
960 		visor_copy_fragsinfo_from_skb(skb, firstfraglen,
961 					      MAX_PHYS_INFO,
962 					      cmdrsp->net.xmt.frags);
963 	if (cmdrsp->net.xmt.num_frags < 0) {
964 		spin_unlock_irqrestore(&devdata->priv_lock, flags);
965 		devdata->busy_cnt++;
966 		dev_err(&netdev->dev,
967 			"%s busy - copy frags failed\n", __func__);
968 		kfree_skb(skb);
969 		return NETDEV_TX_OK;
970 	}
971 
972 	err = visorchannel_signalinsert(devdata->dev->visorchannel,
973 					IOCHAN_TO_IOPART, cmdrsp);
974 	if (err) {
975 		netif_stop_queue(netdev);
976 		spin_unlock_irqrestore(&devdata->priv_lock, flags);
977 		devdata->busy_cnt++;
978 		dev_dbg(&netdev->dev,
979 			"%s busy - signalinsert failed\n", __func__);
980 		kfree_skb(skb);
981 		return NETDEV_TX_OK;
982 	}
983 
984 	/* Track the skbs that have been sent to the IOVM for XMIT */
985 	skb_queue_head(&devdata->xmitbufhead, skb);
986 
987 	/* update xmt stats */
988 	devdata->net_stats.tx_packets++;
989 	devdata->net_stats.tx_bytes += skb->len;
990 	devdata->chstat.sent_xmit++;
991 
992 	/* check if we have hit the high watermark for netif_stop_queue() */
993 	if (vnic_hit_high_watermark(devdata,
994 				    devdata->upper_threshold_net_xmits)) {
995 		/* extra NET_XMITs queued over to IOVM - need to wait */
996 		/* stop queue - call netif_wake_queue() after lower threshold */
997 		netif_stop_queue(netdev);
998 		dev_dbg(&netdev->dev,
999 			"%s busy - invoking iovm flow control\n",
1000 			__func__);
1001 		devdata->flow_control_upper_hits++;
1002 	}
1003 	spin_unlock_irqrestore(&devdata->priv_lock, flags);
1004 
1005 	/* skb will be freed when we get back NET_XMIT_DONE */
1006 	return NETDEV_TX_OK;
1007 }
1008 
1009 /* visornic_get_stats - returns net_stats of the visornic device
1010  * @netdev: netdevice.
1011  *
1012  * Return: Pointer to the net_device_stats struct for the device.
1013  */
visornic_get_stats(struct net_device * netdev)1014 static struct net_device_stats *visornic_get_stats(struct net_device *netdev)
1015 {
1016 	struct visornic_devdata *devdata = netdev_priv(netdev);
1017 
1018 	return &devdata->net_stats;
1019 }
1020 
1021 /* visornic_change_mtu - changes mtu of device
1022  * @netdev: netdevice.
1023  * @new_mtu: Value of new mtu.
1024  *
1025  * The device's MTU cannot be changed by system; it must be changed via a
1026  * CONTROLVM message. All vnics and pnics in a switch have to have the same MTU
1027  * for everything to work. Currently not supported.
1028  *
1029  * Return: -EINVAL.
1030  */
visornic_change_mtu(struct net_device * netdev,int new_mtu)1031 static int visornic_change_mtu(struct net_device *netdev, int new_mtu)
1032 {
1033 	return -EINVAL;
1034 }
1035 
1036 /* visornic_set_multi - set visornic device flags
1037  * @netdev: netdevice.
1038  *
1039  * The only flag we currently support is IFF_PROMISC.
1040  */
visornic_set_multi(struct net_device * netdev)1041 static void visornic_set_multi(struct net_device *netdev)
1042 {
1043 	struct uiscmdrsp *cmdrsp;
1044 	struct visornic_devdata *devdata = netdev_priv(netdev);
1045 	int err = 0;
1046 
1047 	if (devdata->old_flags == netdev->flags)
1048 		return;
1049 
1050 	if ((netdev->flags & IFF_PROMISC) ==
1051 	    (devdata->old_flags & IFF_PROMISC))
1052 		goto out_save_flags;
1053 
1054 	cmdrsp = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
1055 	if (!cmdrsp)
1056 		return;
1057 	cmdrsp->cmdtype = CMD_NET_TYPE;
1058 	cmdrsp->net.type = NET_RCV_PROMISC;
1059 	cmdrsp->net.enbdis.context = netdev;
1060 	cmdrsp->net.enbdis.enable =
1061 		netdev->flags & IFF_PROMISC;
1062 	err = visorchannel_signalinsert(devdata->dev->visorchannel,
1063 					IOCHAN_TO_IOPART,
1064 					cmdrsp);
1065 	kfree(cmdrsp);
1066 	if (err)
1067 		return;
1068 
1069 out_save_flags:
1070 	devdata->old_flags = netdev->flags;
1071 }
1072 
1073 /* visornic_xmit_timeout - request to timeout the xmit
1074  * @netdev: netdevice.
1075  *
1076  * Queue the work and return. Make sure we have not already been informed that
1077  * the IO Partition is gone; if so, we will have already timed-out the xmits.
1078  */
visornic_xmit_timeout(struct net_device * netdev,unsigned int txqueue)1079 static void visornic_xmit_timeout(struct net_device *netdev, unsigned int txqueue)
1080 {
1081 	struct visornic_devdata *devdata = netdev_priv(netdev);
1082 	unsigned long flags;
1083 
1084 	spin_lock_irqsave(&devdata->priv_lock, flags);
1085 	if (devdata->going_away) {
1086 		spin_unlock_irqrestore(&devdata->priv_lock, flags);
1087 		dev_dbg(&devdata->dev->device,
1088 			"%s aborting because device removal pending\n",
1089 			__func__);
1090 		return;
1091 	}
1092 
1093 	/* Ensure that a ServerDown message hasn't been received */
1094 	if (!devdata->enabled ||
1095 	    (devdata->server_down && !devdata->server_change_state)) {
1096 		dev_dbg(&netdev->dev, "%s no processing\n",
1097 			__func__);
1098 		spin_unlock_irqrestore(&devdata->priv_lock, flags);
1099 		return;
1100 	}
1101 	schedule_work(&devdata->timeout_reset);
1102 	spin_unlock_irqrestore(&devdata->priv_lock, flags);
1103 }
1104 
1105 /* repost_return - repost rcv bufs that have come back
1106  * @cmdrsp: IO channel command struct to post.
1107  * @devdata: Visornic devdata for the device.
1108  * @skb: Socket buffer.
1109  * @netdev: netdevice.
1110  *
1111  * Repost rcv buffers that have been returned to us when we are finished
1112  * with them.
1113  *
1114  * Return: 0 for success, negative integer on error.
1115  */
repost_return(struct uiscmdrsp * cmdrsp,struct visornic_devdata * devdata,struct sk_buff * skb,struct net_device * netdev)1116 static int repost_return(struct uiscmdrsp *cmdrsp,
1117 			 struct visornic_devdata *devdata,
1118 			 struct sk_buff *skb, struct net_device *netdev)
1119 {
1120 	struct net_pkt_rcv copy;
1121 	int i = 0, cc, numreposted;
1122 	int found_skb = 0;
1123 	int status = 0;
1124 
1125 	copy = cmdrsp->net.rcv;
1126 	switch (copy.numrcvbufs) {
1127 	case 0:
1128 		devdata->n_rcv0++;
1129 		break;
1130 	case 1:
1131 		devdata->n_rcv1++;
1132 		break;
1133 	case 2:
1134 		devdata->n_rcv2++;
1135 		break;
1136 	default:
1137 		devdata->n_rcvx++;
1138 		break;
1139 	}
1140 	for (cc = 0, numreposted = 0; cc < copy.numrcvbufs; cc++) {
1141 		for (i = 0; i < devdata->num_rcv_bufs; i++) {
1142 			if (devdata->rcvbuf[i] != copy.rcvbuf[cc])
1143 				continue;
1144 
1145 			if ((skb) && devdata->rcvbuf[i] == skb) {
1146 				devdata->found_repost_rcvbuf_cnt++;
1147 				found_skb = 1;
1148 				devdata->repost_found_skb_cnt++;
1149 			}
1150 			devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
1151 			if (!devdata->rcvbuf[i]) {
1152 				devdata->num_rcv_bufs_could_not_alloc++;
1153 				devdata->alloc_failed_in_repost_rtn_cnt++;
1154 				status = -ENOMEM;
1155 				break;
1156 			}
1157 			status = post_skb(cmdrsp, devdata, devdata->rcvbuf[i]);
1158 			if (status) {
1159 				kfree_skb(devdata->rcvbuf[i]);
1160 				devdata->rcvbuf[i] = NULL;
1161 				break;
1162 			}
1163 			numreposted++;
1164 			break;
1165 		}
1166 	}
1167 	if (numreposted != copy.numrcvbufs) {
1168 		devdata->n_repost_deficit++;
1169 		status = -EINVAL;
1170 	}
1171 	if (skb) {
1172 		if (found_skb) {
1173 			kfree_skb(skb);
1174 		} else {
1175 			status = -EINVAL;
1176 			devdata->bad_rcv_buf++;
1177 		}
1178 	}
1179 	return status;
1180 }
1181 
1182 /* visornic_rx - handle receive packets coming back from IO Partition
1183  * @cmdrsp: Receive packet returned from IO Partition.
1184  *
1185  * Got a receive packet back from the IO Partition; handle it and send it up
1186  * the stack.
1187 
1188  * Return: 1 iff an skb was received, otherwise 0.
1189  */
visornic_rx(struct uiscmdrsp * cmdrsp)1190 static int visornic_rx(struct uiscmdrsp *cmdrsp)
1191 {
1192 	struct visornic_devdata *devdata;
1193 	struct sk_buff *skb, *prev, *curr;
1194 	struct net_device *netdev;
1195 	int cc, currsize, off;
1196 	struct ethhdr *eth;
1197 	unsigned long flags;
1198 
1199 	/* post new rcv buf to the other end using the cmdrsp we have at hand
1200 	 * post it without holding lock - but we'll use the signal lock to
1201 	 * synchronize the queue insert the cmdrsp that contains the net.rcv
1202 	 * is the one we are using to repost, so copy the info we need from it.
1203 	 */
1204 	skb = cmdrsp->net.buf;
1205 	netdev = skb->dev;
1206 
1207 	devdata = netdev_priv(netdev);
1208 
1209 	spin_lock_irqsave(&devdata->priv_lock, flags);
1210 	atomic_dec(&devdata->num_rcvbuf_in_iovm);
1211 
1212 	/* set length to how much was ACTUALLY received -
1213 	 * NOTE: rcv_done_len includes actual length of data rcvd
1214 	 * including ethhdr
1215 	 */
1216 	skb->len = cmdrsp->net.rcv.rcv_done_len;
1217 
1218 	/* update rcv stats - call it with priv_lock held */
1219 	devdata->net_stats.rx_packets++;
1220 	devdata->net_stats.rx_bytes += skb->len;
1221 
1222 	/* test enabled while holding lock */
1223 	if (!(devdata->enabled && devdata->enab_dis_acked)) {
1224 		/* don't process it unless we're in enable mode and until
1225 		 * we've gotten an ACK saying the other end got our RCV enable
1226 		 */
1227 		spin_unlock_irqrestore(&devdata->priv_lock, flags);
1228 		repost_return(cmdrsp, devdata, skb, netdev);
1229 		return 0;
1230 	}
1231 
1232 	spin_unlock_irqrestore(&devdata->priv_lock, flags);
1233 
1234 	/* when skb was allocated, skb->dev, skb->data, skb->len and
1235 	 * skb->data_len were setup. AND, data has already put into the
1236 	 * skb (both first frag and in frags pages)
1237 	 * NOTE: firstfragslen is the amount of data in skb->data and that
1238 	 * which is not in nr_frags or frag_list. This is now simply
1239 	 * RCVPOST_BUF_SIZE. bump tail to show how much data is in
1240 	 * firstfrag & set data_len to show rest see if we have to chain
1241 	 * frag_list.
1242 	 */
1243 	/* do PRECAUTIONARY check */
1244 	if (skb->len > RCVPOST_BUF_SIZE) {
1245 		if (cmdrsp->net.rcv.numrcvbufs < 2) {
1246 			if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1247 				dev_err(&devdata->netdev->dev,
1248 					"repost_return failed");
1249 			return 0;
1250 		}
1251 		/* length rcvd is greater than firstfrag in this skb rcv buf  */
1252 		/* amount in skb->data */
1253 		skb->tail += RCVPOST_BUF_SIZE;
1254 		/* amount that will be in frag_list */
1255 		skb->data_len = skb->len - RCVPOST_BUF_SIZE;
1256 	} else {
1257 		/* data fits in this skb - no chaining - do
1258 		 * PRECAUTIONARY check
1259 		 */
1260 		/* should be 1 */
1261 		if (cmdrsp->net.rcv.numrcvbufs != 1) {
1262 			if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1263 				dev_err(&devdata->netdev->dev,
1264 					"repost_return failed");
1265 			return 0;
1266 		}
1267 		skb->tail += skb->len;
1268 		/* nothing rcvd in frag_list */
1269 		skb->data_len = 0;
1270 	}
1271 	off = skb_tail_pointer(skb) - skb->data;
1272 
1273 	/* amount we bumped tail by in the head skb
1274 	 * it is used to calculate the size of each chained skb below
1275 	 * it is also used to index into bufline to continue the copy
1276 	 * (for chansocktwopc)
1277 	 * if necessary chain the rcv skbs together.
1278 	 * NOTE: index 0 has the same as cmdrsp->net.rcv.skb; we need to
1279 	 * chain the rest to that one.
1280 	 * - do PRECAUTIONARY check
1281 	 */
1282 	if (cmdrsp->net.rcv.rcvbuf[0] != skb) {
1283 		if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1284 			dev_err(&devdata->netdev->dev, "repost_return failed");
1285 		return 0;
1286 	}
1287 
1288 	if (cmdrsp->net.rcv.numrcvbufs > 1) {
1289 		/* chain the various rcv buffers into the skb's frag_list. */
1290 		/* Note: off was initialized above  */
1291 		for (cc = 1, prev = NULL;
1292 		     cc < cmdrsp->net.rcv.numrcvbufs; cc++) {
1293 			curr = (struct sk_buff *)cmdrsp->net.rcv.rcvbuf[cc];
1294 			curr->next = NULL;
1295 			/* start of list- set head */
1296 			if (!prev)
1297 				skb_shinfo(skb)->frag_list = curr;
1298 			else
1299 				prev->next = curr;
1300 			prev = curr;
1301 
1302 			/* should we set skb->len and skb->data_len for each
1303 			 * buffer being chained??? can't hurt!
1304 			 */
1305 			currsize = min(skb->len - off,
1306 				       (unsigned int)RCVPOST_BUF_SIZE);
1307 			curr->len = currsize;
1308 			curr->tail += currsize;
1309 			curr->data_len = 0;
1310 			off += currsize;
1311 		}
1312 		/* assert skb->len == off */
1313 		if (skb->len != off) {
1314 			netdev_err(devdata->netdev,
1315 				   "something wrong; skb->len:%d != off:%d\n",
1316 				   skb->len, off);
1317 		}
1318 	}
1319 
1320 	/* set up packet's protocol type using ethernet header - this
1321 	 * sets up skb->pkt_type & it also PULLS out the eth header
1322 	 */
1323 	skb->protocol = eth_type_trans(skb, netdev);
1324 	eth = eth_hdr(skb);
1325 	skb->csum = 0;
1326 	skb->ip_summed = CHECKSUM_NONE;
1327 
1328 	do {
1329 		/* accept all packets */
1330 		if (netdev->flags & IFF_PROMISC)
1331 			break;
1332 		if (skb->pkt_type == PACKET_BROADCAST) {
1333 			/* accept all broadcast packets */
1334 			if (netdev->flags & IFF_BROADCAST)
1335 				break;
1336 		} else if (skb->pkt_type == PACKET_MULTICAST) {
1337 			if ((netdev->flags & IFF_MULTICAST) &&
1338 			    (netdev_mc_count(netdev))) {
1339 				struct netdev_hw_addr *ha;
1340 				int found_mc = 0;
1341 
1342 				/* only accept multicast packets that we can
1343 				 * find in our multicast address list
1344 				 */
1345 				netdev_for_each_mc_addr(ha, netdev) {
1346 					if (ether_addr_equal(eth->h_dest,
1347 							     ha->addr)) {
1348 						found_mc = 1;
1349 						break;
1350 					}
1351 				}
1352 				/* accept pkt, dest matches a multicast addr */
1353 				if (found_mc)
1354 					break;
1355 			}
1356 		/* accept packet, h_dest must match vnic  mac address */
1357 		} else if (skb->pkt_type == PACKET_HOST) {
1358 			break;
1359 		} else if (skb->pkt_type == PACKET_OTHERHOST) {
1360 			/* something is not right */
1361 			dev_err(&devdata->netdev->dev,
1362 				"**** FAILED to deliver rcv packet to OS; name:%s Dest:%pM VNIC:%pM\n",
1363 				netdev->name, eth->h_dest, netdev->dev_addr);
1364 		}
1365 		/* drop packet - don't forward it up to OS */
1366 		devdata->n_rcv_packets_not_accepted++;
1367 		repost_return(cmdrsp, devdata, skb, netdev);
1368 		return 0;
1369 	} while (0);
1370 
1371 	netif_receive_skb(skb);
1372 	/* netif_rx returns various values, but "in practice most drivers
1373 	 * ignore the return value
1374 	 */
1375 
1376 	skb = NULL;
1377 	/* whether the packet got dropped or handled, the skb is freed by
1378 	 * kernel code, so we shouldn't free it. but we should repost a
1379 	 * new rcv buffer.
1380 	 */
1381 	repost_return(cmdrsp, devdata, skb, netdev);
1382 	return 1;
1383 }
1384 
1385 /* devdata_initialize - initialize devdata structure
1386  * @devdata: visornic_devdata structure to initialize.
1387  * @dev:     visorbus_device it belongs to.
1388  *
1389  * Setup initial values for the visornic, based on channel and default values.
1390  *
1391  * Return: A pointer to the devdata structure.
1392  */
devdata_initialize(struct visornic_devdata * devdata,struct visor_device * dev)1393 static struct visornic_devdata *devdata_initialize(
1394 					struct visornic_devdata *devdata,
1395 					struct visor_device *dev)
1396 {
1397 	devdata->dev = dev;
1398 	devdata->incarnation_id = get_jiffies_64();
1399 	return devdata;
1400 }
1401 
1402 /* devdata_release - free up references in devdata
1403  * @devdata: Struct to clean up.
1404  */
devdata_release(struct visornic_devdata * devdata)1405 static void devdata_release(struct visornic_devdata *devdata)
1406 {
1407 	kfree(devdata->rcvbuf);
1408 	kfree(devdata->cmdrsp_rcv);
1409 	kfree(devdata->xmit_cmdrsp);
1410 }
1411 
1412 static const struct net_device_ops visornic_dev_ops = {
1413 	.ndo_open = visornic_open,
1414 	.ndo_stop = visornic_close,
1415 	.ndo_start_xmit = visornic_xmit,
1416 	.ndo_get_stats = visornic_get_stats,
1417 	.ndo_change_mtu = visornic_change_mtu,
1418 	.ndo_tx_timeout = visornic_xmit_timeout,
1419 	.ndo_set_rx_mode = visornic_set_multi,
1420 };
1421 
1422 /* DebugFS code */
info_debugfs_read(struct file * file,char __user * buf,size_t len,loff_t * offset)1423 static ssize_t info_debugfs_read(struct file *file, char __user *buf,
1424 				 size_t len, loff_t *offset)
1425 {
1426 	ssize_t bytes_read = 0;
1427 	int str_pos = 0;
1428 	struct visornic_devdata *devdata;
1429 	struct net_device *dev;
1430 	char *vbuf;
1431 
1432 	if (len > MAX_BUF)
1433 		len = MAX_BUF;
1434 	vbuf = kzalloc(len, GFP_KERNEL);
1435 	if (!vbuf)
1436 		return -ENOMEM;
1437 
1438 	/* for each vnic channel dump out channel specific data */
1439 	rcu_read_lock();
1440 	for_each_netdev_rcu(current->nsproxy->net_ns, dev) {
1441 		/* Only consider netdevs that are visornic, and are open */
1442 		if (dev->netdev_ops != &visornic_dev_ops ||
1443 		    (!netif_queue_stopped(dev)))
1444 			continue;
1445 
1446 		devdata = netdev_priv(dev);
1447 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1448 				     "netdev = %s (0x%p), MAC Addr %pM\n",
1449 				     dev->name,
1450 				     dev,
1451 				     dev->dev_addr);
1452 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1453 				     "VisorNic Dev Info = 0x%p\n", devdata);
1454 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1455 				     " num_rcv_bufs = %d\n",
1456 				     devdata->num_rcv_bufs);
1457 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1458 				     " max_outstanding_next_xmits = %lu\n",
1459 				    devdata->max_outstanding_net_xmits);
1460 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1461 				     " upper_threshold_net_xmits = %lu\n",
1462 				     devdata->upper_threshold_net_xmits);
1463 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1464 				     " lower_threshold_net_xmits = %lu\n",
1465 				     devdata->lower_threshold_net_xmits);
1466 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1467 				     " queuefullmsg_logged = %d\n",
1468 				     devdata->queuefullmsg_logged);
1469 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1470 				     " chstat.got_rcv = %lu\n",
1471 				     devdata->chstat.got_rcv);
1472 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1473 				     " chstat.got_enbdisack = %lu\n",
1474 				     devdata->chstat.got_enbdisack);
1475 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1476 				     " chstat.got_xmit_done = %lu\n",
1477 				     devdata->chstat.got_xmit_done);
1478 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1479 				     " chstat.xmit_fail = %lu\n",
1480 				     devdata->chstat.xmit_fail);
1481 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1482 				     " chstat.sent_enbdis = %lu\n",
1483 				     devdata->chstat.sent_enbdis);
1484 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1485 				     " chstat.sent_promisc = %lu\n",
1486 				     devdata->chstat.sent_promisc);
1487 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1488 				     " chstat.sent_post = %lu\n",
1489 				     devdata->chstat.sent_post);
1490 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1491 				     " chstat.sent_post_failed = %lu\n",
1492 				     devdata->chstat.sent_post_failed);
1493 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1494 				     " chstat.sent_xmit = %lu\n",
1495 				     devdata->chstat.sent_xmit);
1496 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1497 				     " chstat.reject_count = %lu\n",
1498 				     devdata->chstat.reject_count);
1499 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1500 				     " chstat.extra_rcvbufs_sent = %lu\n",
1501 				     devdata->chstat.extra_rcvbufs_sent);
1502 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1503 				     " n_rcv0 = %lu\n", devdata->n_rcv0);
1504 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1505 				     " n_rcv1 = %lu\n", devdata->n_rcv1);
1506 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1507 				     " n_rcv2 = %lu\n", devdata->n_rcv2);
1508 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1509 				     " n_rcvx = %lu\n", devdata->n_rcvx);
1510 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1511 				     " num_rcvbuf_in_iovm = %d\n",
1512 				     atomic_read(&devdata->num_rcvbuf_in_iovm));
1513 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1514 				     " alloc_failed_in_if_needed_cnt = %lu\n",
1515 				     devdata->alloc_failed_in_if_needed_cnt);
1516 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1517 				     " alloc_failed_in_repost_rtn_cnt = %lu\n",
1518 				     devdata->alloc_failed_in_repost_rtn_cnt);
1519 		/* str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1520 		 *		     " inner_loop_limit_reached_cnt = %lu\n",
1521 		 *		     devdata->inner_loop_limit_reached_cnt);
1522 		 */
1523 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1524 				     " found_repost_rcvbuf_cnt = %lu\n",
1525 				     devdata->found_repost_rcvbuf_cnt);
1526 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1527 				     " repost_found_skb_cnt = %lu\n",
1528 				     devdata->repost_found_skb_cnt);
1529 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1530 				     " n_repost_deficit = %lu\n",
1531 				     devdata->n_repost_deficit);
1532 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1533 				     " bad_rcv_buf = %lu\n",
1534 				     devdata->bad_rcv_buf);
1535 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1536 				     " n_rcv_packets_not_accepted = %lu\n",
1537 				     devdata->n_rcv_packets_not_accepted);
1538 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1539 				     " interrupts_rcvd = %llu\n",
1540 				     devdata->interrupts_rcvd);
1541 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1542 				     " interrupts_notme = %llu\n",
1543 				     devdata->interrupts_notme);
1544 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1545 				     " interrupts_disabled = %llu\n",
1546 				     devdata->interrupts_disabled);
1547 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1548 				     " busy_cnt = %llu\n",
1549 				     devdata->busy_cnt);
1550 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1551 				     " flow_control_upper_hits = %llu\n",
1552 				     devdata->flow_control_upper_hits);
1553 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1554 				     " flow_control_lower_hits = %llu\n",
1555 				     devdata->flow_control_lower_hits);
1556 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1557 				     " netif_queue = %s\n",
1558 				     netif_queue_stopped(devdata->netdev) ?
1559 				     "stopped" : "running");
1560 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1561 				     " xmits_outstanding = %lu\n",
1562 				     devdata_xmits_outstanding(devdata));
1563 	}
1564 	rcu_read_unlock();
1565 	bytes_read = simple_read_from_buffer(buf, len, offset, vbuf, str_pos);
1566 	kfree(vbuf);
1567 	return bytes_read;
1568 }
1569 
1570 static struct dentry *visornic_debugfs_dir;
1571 static const struct file_operations debugfs_info_fops = {
1572 	.read = info_debugfs_read,
1573 };
1574 
1575 /* send_rcv_posts_if_needed - send receive buffers to the IO Partition.
1576  * @devdata: Visornic device.
1577  */
send_rcv_posts_if_needed(struct visornic_devdata * devdata)1578 static void send_rcv_posts_if_needed(struct visornic_devdata *devdata)
1579 {
1580 	int i;
1581 	struct net_device *netdev;
1582 	struct uiscmdrsp *cmdrsp = devdata->cmdrsp_rcv;
1583 	int cur_num_rcv_bufs_to_alloc, rcv_bufs_allocated;
1584 	int err;
1585 
1586 	/* don't do this until vnic is marked ready */
1587 	if (!(devdata->enabled && devdata->enab_dis_acked))
1588 		return;
1589 
1590 	netdev = devdata->netdev;
1591 	rcv_bufs_allocated = 0;
1592 	/* this code is trying to prevent getting stuck here forever,
1593 	 * but still retry it if you cant allocate them all this time.
1594 	 */
1595 	cur_num_rcv_bufs_to_alloc = devdata->num_rcv_bufs_could_not_alloc;
1596 	while (cur_num_rcv_bufs_to_alloc > 0) {
1597 		cur_num_rcv_bufs_to_alloc--;
1598 		for (i = 0; i < devdata->num_rcv_bufs; i++) {
1599 			if (devdata->rcvbuf[i])
1600 				continue;
1601 			devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
1602 			if (!devdata->rcvbuf[i]) {
1603 				devdata->alloc_failed_in_if_needed_cnt++;
1604 				break;
1605 			}
1606 			rcv_bufs_allocated++;
1607 			err = post_skb(cmdrsp, devdata, devdata->rcvbuf[i]);
1608 			if (err) {
1609 				kfree_skb(devdata->rcvbuf[i]);
1610 				devdata->rcvbuf[i] = NULL;
1611 				break;
1612 			}
1613 			devdata->chstat.extra_rcvbufs_sent++;
1614 		}
1615 	}
1616 	devdata->num_rcv_bufs_could_not_alloc -= rcv_bufs_allocated;
1617 }
1618 
1619 /* drain_resp_queue - drains and ignores all messages from the resp queue
1620  * @cmdrsp:  IO channel command response message.
1621  * @devdata: Visornic device to drain.
1622  */
drain_resp_queue(struct uiscmdrsp * cmdrsp,struct visornic_devdata * devdata)1623 static void drain_resp_queue(struct uiscmdrsp *cmdrsp,
1624 			     struct visornic_devdata *devdata)
1625 {
1626 	while (!visorchannel_signalremove(devdata->dev->visorchannel,
1627 					  IOCHAN_FROM_IOPART,
1628 					  cmdrsp))
1629 		;
1630 }
1631 
1632 /* service_resp_queue - drain the response queue
1633  * @cmdrsp:  IO channel command response message.
1634  * @devdata: Visornic device to drain.
1635  * @rx_work_done:
1636  * @budget:
1637  *
1638  * Drain the response queue of any responses from the IO Partition. Process the
1639  * responses as we get them.
1640  */
service_resp_queue(struct uiscmdrsp * cmdrsp,struct visornic_devdata * devdata,int * rx_work_done,int budget)1641 static void service_resp_queue(struct uiscmdrsp *cmdrsp,
1642 			       struct visornic_devdata *devdata,
1643 			       int *rx_work_done, int budget)
1644 {
1645 	unsigned long flags;
1646 	struct net_device *netdev;
1647 
1648 	while (*rx_work_done < budget) {
1649 		/* TODO: CLIENT ACQUIRE -- Don't really need this at the
1650 		 * moment
1651 		 */
1652 		/* queue empty */
1653 		if (visorchannel_signalremove(devdata->dev->visorchannel,
1654 					      IOCHAN_FROM_IOPART,
1655 					      cmdrsp))
1656 			break;
1657 
1658 		switch (cmdrsp->net.type) {
1659 		case NET_RCV:
1660 			devdata->chstat.got_rcv++;
1661 			/* process incoming packet */
1662 			*rx_work_done += visornic_rx(cmdrsp);
1663 			break;
1664 		case NET_XMIT_DONE:
1665 			spin_lock_irqsave(&devdata->priv_lock, flags);
1666 			devdata->chstat.got_xmit_done++;
1667 			if (cmdrsp->net.xmtdone.xmt_done_result)
1668 				devdata->chstat.xmit_fail++;
1669 			/* only call queue wake if we stopped it */
1670 			netdev = ((struct sk_buff *)cmdrsp->net.buf)->dev;
1671 			/* ASSERT netdev == vnicinfo->netdev; */
1672 			if (netdev == devdata->netdev &&
1673 			    netif_queue_stopped(netdev)) {
1674 				/* check if we have crossed the lower watermark
1675 				 * for netif_wake_queue()
1676 				 */
1677 				if (vnic_hit_low_watermark
1678 				    (devdata,
1679 				     devdata->lower_threshold_net_xmits)) {
1680 					/* enough NET_XMITs completed
1681 					 * so can restart netif queue
1682 					 */
1683 					netif_wake_queue(netdev);
1684 					devdata->flow_control_lower_hits++;
1685 				}
1686 			}
1687 			skb_unlink(cmdrsp->net.buf, &devdata->xmitbufhead);
1688 			spin_unlock_irqrestore(&devdata->priv_lock, flags);
1689 			kfree_skb(cmdrsp->net.buf);
1690 			break;
1691 		case NET_RCV_ENBDIS_ACK:
1692 			devdata->chstat.got_enbdisack++;
1693 			netdev = (struct net_device *)
1694 			cmdrsp->net.enbdis.context;
1695 			spin_lock_irqsave(&devdata->priv_lock, flags);
1696 			devdata->enab_dis_acked = 1;
1697 			spin_unlock_irqrestore(&devdata->priv_lock, flags);
1698 
1699 			if (devdata->server_down &&
1700 			    devdata->server_change_state) {
1701 				/* Inform Linux that the link is up */
1702 				devdata->server_down = false;
1703 				devdata->server_change_state = false;
1704 				netif_wake_queue(netdev);
1705 				netif_carrier_on(netdev);
1706 			}
1707 			break;
1708 		case NET_CONNECT_STATUS:
1709 			netdev = devdata->netdev;
1710 			if (cmdrsp->net.enbdis.enable == 1) {
1711 				spin_lock_irqsave(&devdata->priv_lock, flags);
1712 				devdata->enabled = cmdrsp->net.enbdis.enable;
1713 				spin_unlock_irqrestore(&devdata->priv_lock,
1714 						       flags);
1715 				netif_wake_queue(netdev);
1716 				netif_carrier_on(netdev);
1717 			} else {
1718 				netif_stop_queue(netdev);
1719 				netif_carrier_off(netdev);
1720 				spin_lock_irqsave(&devdata->priv_lock, flags);
1721 				devdata->enabled = cmdrsp->net.enbdis.enable;
1722 				spin_unlock_irqrestore(&devdata->priv_lock,
1723 						       flags);
1724 			}
1725 			break;
1726 		default:
1727 			break;
1728 		}
1729 		/* cmdrsp is now available for reuse  */
1730 	}
1731 }
1732 
visornic_poll(struct napi_struct * napi,int budget)1733 static int visornic_poll(struct napi_struct *napi, int budget)
1734 {
1735 	struct visornic_devdata *devdata = container_of(napi,
1736 							struct visornic_devdata,
1737 							napi);
1738 	int rx_count = 0;
1739 
1740 	send_rcv_posts_if_needed(devdata);
1741 	service_resp_queue(devdata->cmdrsp, devdata, &rx_count, budget);
1742 
1743 	/* If there aren't any more packets to receive stop the poll */
1744 	if (rx_count < budget)
1745 		napi_complete_done(napi, rx_count);
1746 
1747 	return rx_count;
1748 }
1749 
1750 /* visornic_channel_interrupt	- checks the status of the response queue
1751  *
1752  * Main function of the vnic_incoming thread. Periodically check the response
1753  * queue and drain it if needed.
1754  */
visornic_channel_interrupt(struct visor_device * dev)1755 static void visornic_channel_interrupt(struct visor_device *dev)
1756 {
1757 	struct visornic_devdata *devdata = dev_get_drvdata(&dev->device);
1758 
1759 	if (!devdata)
1760 		return;
1761 
1762 	if (!visorchannel_signalempty(
1763 				   devdata->dev->visorchannel,
1764 				   IOCHAN_FROM_IOPART))
1765 		napi_schedule(&devdata->napi);
1766 
1767 	atomic_set(&devdata->interrupt_rcvd, 0);
1768 
1769 }
1770 
1771 /* visornic_probe - probe function for visornic devices
1772  * @dev: The visor device discovered.
1773  *
1774  * Called when visorbus discovers a visornic device on its bus. It creates a new
1775  * visornic ethernet adapter.
1776  *
1777  * Return: 0 on success, or negative integer on error.
1778  */
visornic_probe(struct visor_device * dev)1779 static int visornic_probe(struct visor_device *dev)
1780 {
1781 	struct visornic_devdata *devdata = NULL;
1782 	struct net_device *netdev = NULL;
1783 	int err;
1784 	int channel_offset = 0;
1785 	u64 features;
1786 
1787 	netdev = alloc_etherdev(sizeof(struct visornic_devdata));
1788 	if (!netdev) {
1789 		dev_err(&dev->device,
1790 			"%s alloc_etherdev failed\n", __func__);
1791 		return -ENOMEM;
1792 	}
1793 
1794 	netdev->netdev_ops = &visornic_dev_ops;
1795 	netdev->watchdog_timeo = 5 * HZ;
1796 	SET_NETDEV_DEV(netdev, &dev->device);
1797 
1798 	/* Get MAC address from channel and read it into the device. */
1799 	netdev->addr_len = ETH_ALEN;
1800 	channel_offset = offsetof(struct visor_io_channel, vnic.macaddr);
1801 	err = visorbus_read_channel(dev, channel_offset, netdev->dev_addr,
1802 				    ETH_ALEN);
1803 	if (err < 0) {
1804 		dev_err(&dev->device,
1805 			"%s failed to get mac addr from chan (%d)\n",
1806 			__func__, err);
1807 		goto cleanup_netdev;
1808 	}
1809 
1810 	devdata = devdata_initialize(netdev_priv(netdev), dev);
1811 	if (!devdata) {
1812 		dev_err(&dev->device,
1813 			"%s devdata_initialize failed\n", __func__);
1814 		err = -ENOMEM;
1815 		goto cleanup_netdev;
1816 	}
1817 	/* don't trust messages laying around in the channel */
1818 	drain_resp_queue(devdata->cmdrsp, devdata);
1819 
1820 	devdata->netdev = netdev;
1821 	dev_set_drvdata(&dev->device, devdata);
1822 	init_waitqueue_head(&devdata->rsp_queue);
1823 	spin_lock_init(&devdata->priv_lock);
1824 	/* not yet */
1825 	devdata->enabled = 0;
1826 	atomic_set(&devdata->usage, 1);
1827 
1828 	/* Setup rcv bufs */
1829 	channel_offset = offsetof(struct visor_io_channel, vnic.num_rcv_bufs);
1830 	err = visorbus_read_channel(dev, channel_offset,
1831 				    &devdata->num_rcv_bufs, 4);
1832 	if (err) {
1833 		dev_err(&dev->device,
1834 			"%s failed to get #rcv bufs from chan (%d)\n",
1835 			__func__, err);
1836 		goto cleanup_netdev;
1837 	}
1838 
1839 	devdata->rcvbuf = kcalloc(devdata->num_rcv_bufs,
1840 				  sizeof(struct sk_buff *), GFP_KERNEL);
1841 	if (!devdata->rcvbuf) {
1842 		err = -ENOMEM;
1843 		goto cleanup_netdev;
1844 	}
1845 
1846 	/* set the net_xmit outstanding threshold
1847 	 * always leave two slots open but you should have 3 at a minimum
1848 	 * note that max_outstanding_net_xmits must be > 0
1849 	 */
1850 	devdata->max_outstanding_net_xmits =
1851 		max_t(unsigned long, 3, ((devdata->num_rcv_bufs / 3) - 2));
1852 	devdata->upper_threshold_net_xmits =
1853 		max_t(unsigned long,
1854 		      2, (devdata->max_outstanding_net_xmits - 1));
1855 	devdata->lower_threshold_net_xmits =
1856 		max_t(unsigned long,
1857 		      1, (devdata->max_outstanding_net_xmits / 2));
1858 
1859 	skb_queue_head_init(&devdata->xmitbufhead);
1860 
1861 	/* create a cmdrsp we can use to post and unpost rcv buffers */
1862 	devdata->cmdrsp_rcv = kmalloc(SIZEOF_CMDRSP, GFP_KERNEL);
1863 	if (!devdata->cmdrsp_rcv) {
1864 		err = -ENOMEM;
1865 		goto cleanup_rcvbuf;
1866 	}
1867 	devdata->xmit_cmdrsp = kmalloc(SIZEOF_CMDRSP, GFP_KERNEL);
1868 	if (!devdata->xmit_cmdrsp) {
1869 		err = -ENOMEM;
1870 		goto cleanup_cmdrsp_rcv;
1871 	}
1872 	INIT_WORK(&devdata->timeout_reset, visornic_timeout_reset);
1873 	devdata->server_down = false;
1874 	devdata->server_change_state = false;
1875 
1876 	/*set the default mtu */
1877 	channel_offset = offsetof(struct visor_io_channel, vnic.mtu);
1878 	err = visorbus_read_channel(dev, channel_offset, &netdev->mtu, 4);
1879 	if (err) {
1880 		dev_err(&dev->device,
1881 			"%s failed to get mtu from chan (%d)\n",
1882 			__func__, err);
1883 		goto cleanup_xmit_cmdrsp;
1884 	}
1885 
1886 	/* TODO: Setup Interrupt information */
1887 	/* Let's start our threads to get responses */
1888 	netif_napi_add(netdev, &devdata->napi, visornic_poll, NAPI_WEIGHT);
1889 
1890 	channel_offset = offsetof(struct visor_io_channel,
1891 				  channel_header.features);
1892 	err = visorbus_read_channel(dev, channel_offset, &features, 8);
1893 	if (err) {
1894 		dev_err(&dev->device,
1895 			"%s failed to get features from chan (%d)\n",
1896 			__func__, err);
1897 		goto cleanup_napi_add;
1898 	}
1899 
1900 	features |= VISOR_CHANNEL_IS_POLLING;
1901 	features |= VISOR_DRIVER_ENHANCED_RCVBUF_CHECKING;
1902 	err = visorbus_write_channel(dev, channel_offset, &features, 8);
1903 	if (err) {
1904 		dev_err(&dev->device,
1905 			"%s failed to set features in chan (%d)\n",
1906 			__func__, err);
1907 		goto cleanup_napi_add;
1908 	}
1909 
1910 	/* Note: Interrupts have to be enable before the while
1911 	 * loop below because the napi routine is responsible for
1912 	 * setting enab_dis_acked
1913 	 */
1914 	visorbus_enable_channel_interrupts(dev);
1915 
1916 	err = register_netdev(netdev);
1917 	if (err) {
1918 		dev_err(&dev->device,
1919 			"%s register_netdev failed (%d)\n", __func__, err);
1920 		goto cleanup_napi_add;
1921 	}
1922 
1923 	/* create debug/sysfs directories */
1924 	devdata->eth_debugfs_dir = debugfs_create_dir(netdev->name,
1925 						      visornic_debugfs_dir);
1926 	if (!devdata->eth_debugfs_dir) {
1927 		dev_err(&dev->device,
1928 			"%s debugfs_create_dir %s failed\n",
1929 			__func__, netdev->name);
1930 		err = -ENOMEM;
1931 		goto cleanup_register_netdev;
1932 	}
1933 
1934 	dev_info(&dev->device, "%s success netdev=%s\n",
1935 		 __func__, netdev->name);
1936 	return 0;
1937 
1938 cleanup_register_netdev:
1939 	unregister_netdev(netdev);
1940 
1941 cleanup_napi_add:
1942 	visorbus_disable_channel_interrupts(dev);
1943 	netif_napi_del(&devdata->napi);
1944 
1945 cleanup_xmit_cmdrsp:
1946 	kfree(devdata->xmit_cmdrsp);
1947 
1948 cleanup_cmdrsp_rcv:
1949 	kfree(devdata->cmdrsp_rcv);
1950 
1951 cleanup_rcvbuf:
1952 	kfree(devdata->rcvbuf);
1953 
1954 cleanup_netdev:
1955 	free_netdev(netdev);
1956 	return err;
1957 }
1958 
1959 /* host_side_disappeared - IO Partition is gone
1960  * @devdata: Device object.
1961  *
1962  * IO partition servicing this device is gone; do cleanup.
1963  */
host_side_disappeared(struct visornic_devdata * devdata)1964 static void host_side_disappeared(struct visornic_devdata *devdata)
1965 {
1966 	unsigned long flags;
1967 
1968 	spin_lock_irqsave(&devdata->priv_lock, flags);
1969 	/* indicate device destroyed */
1970 	devdata->dev = NULL;
1971 	spin_unlock_irqrestore(&devdata->priv_lock, flags);
1972 }
1973 
1974 /* visornic_remove - called when visornic dev goes away
1975  * @dev: Visornic device that is being removed.
1976  *
1977  * Called when DEVICE_DESTROY gets called to remove device.
1978  */
visornic_remove(struct visor_device * dev)1979 static void visornic_remove(struct visor_device *dev)
1980 {
1981 	struct visornic_devdata *devdata = dev_get_drvdata(&dev->device);
1982 	struct net_device *netdev;
1983 	unsigned long flags;
1984 
1985 	if (!devdata) {
1986 		dev_err(&dev->device, "%s no devdata\n", __func__);
1987 		return;
1988 	}
1989 	spin_lock_irqsave(&devdata->priv_lock, flags);
1990 	if (devdata->going_away) {
1991 		spin_unlock_irqrestore(&devdata->priv_lock, flags);
1992 		dev_err(&dev->device, "%s already being removed\n", __func__);
1993 		return;
1994 	}
1995 	devdata->going_away = true;
1996 	spin_unlock_irqrestore(&devdata->priv_lock, flags);
1997 	netdev = devdata->netdev;
1998 	if (!netdev) {
1999 		dev_err(&dev->device, "%s not net device\n", __func__);
2000 		return;
2001 	}
2002 
2003 	/* going_away prevents new items being added to the workqueues */
2004 	cancel_work_sync(&devdata->timeout_reset);
2005 
2006 	debugfs_remove_recursive(devdata->eth_debugfs_dir);
2007 	/* this will call visornic_close() */
2008 	unregister_netdev(netdev);
2009 
2010 	visorbus_disable_channel_interrupts(devdata->dev);
2011 	netif_napi_del(&devdata->napi);
2012 
2013 	dev_set_drvdata(&dev->device, NULL);
2014 	host_side_disappeared(devdata);
2015 	devdata_release(devdata);
2016 	free_netdev(netdev);
2017 }
2018 
2019 /* visornic_pause - called when IO Part disappears
2020  * @dev:	   Visornic device that is being serviced.
2021  * @complete_func: Call when finished.
2022  *
2023  * Called when the IO Partition has gone down. Need to free up resources and
2024  * wait for IO partition to come back. Mark link as down and don't attempt any
2025  * DMA. When we have freed memory, call the complete_func so that Command knows
2026  * we are done. If we don't call complete_func, the IO Partition will never
2027  * come back.
2028  *
2029  * Return: 0 on success.
2030  */
visornic_pause(struct visor_device * dev,visorbus_state_complete_func complete_func)2031 static int visornic_pause(struct visor_device *dev,
2032 			  visorbus_state_complete_func complete_func)
2033 {
2034 	struct visornic_devdata *devdata = dev_get_drvdata(&dev->device);
2035 
2036 	visornic_serverdown(devdata, complete_func);
2037 	return 0;
2038 }
2039 
2040 /* visornic_resume - called when IO Partition has recovered
2041  * @dev:	   Visornic device that is being serviced.
2042  * @compelte_func: Call when finished.
2043  *
2044  * Called when the IO partition has recovered. Re-establish connection to the IO
2045  * Partition and set the link up. Okay to do DMA again.
2046  *
2047  * Returns 0 for success, negative integer on error.
2048  */
visornic_resume(struct visor_device * dev,visorbus_state_complete_func complete_func)2049 static int visornic_resume(struct visor_device *dev,
2050 			   visorbus_state_complete_func complete_func)
2051 {
2052 	struct visornic_devdata *devdata;
2053 	struct net_device *netdev;
2054 	unsigned long flags;
2055 
2056 	devdata = dev_get_drvdata(&dev->device);
2057 	if (!devdata) {
2058 		dev_err(&dev->device, "%s no devdata\n", __func__);
2059 		return -EINVAL;
2060 	}
2061 
2062 	netdev = devdata->netdev;
2063 
2064 	spin_lock_irqsave(&devdata->priv_lock, flags);
2065 	if (devdata->server_change_state) {
2066 		spin_unlock_irqrestore(&devdata->priv_lock, flags);
2067 		dev_err(&dev->device, "%s server already changing state\n",
2068 			__func__);
2069 		return -EINVAL;
2070 	}
2071 	if (!devdata->server_down) {
2072 		spin_unlock_irqrestore(&devdata->priv_lock, flags);
2073 		dev_err(&dev->device, "%s server not down\n", __func__);
2074 		complete_func(dev, 0);
2075 		return 0;
2076 	}
2077 	devdata->server_change_state = true;
2078 	spin_unlock_irqrestore(&devdata->priv_lock, flags);
2079 
2080 	/* Must transition channel to ATTACHED state BEFORE
2081 	 * we can start using the device again.
2082 	 * TODO: State transitions
2083 	 */
2084 	visorbus_enable_channel_interrupts(dev);
2085 
2086 	rtnl_lock();
2087 	dev_open(netdev, NULL);
2088 	rtnl_unlock();
2089 
2090 	complete_func(dev, 0);
2091 	return 0;
2092 }
2093 
2094 /* This is used to tell the visorbus driver which types of visor devices
2095  * we support, and what functions to call when a visor device that we support
2096  * is attached or removed.
2097  */
2098 static struct visor_driver visornic_driver = {
2099 	.name = "visornic",
2100 	.owner = THIS_MODULE,
2101 	.channel_types = visornic_channel_types,
2102 	.probe = visornic_probe,
2103 	.remove = visornic_remove,
2104 	.pause = visornic_pause,
2105 	.resume = visornic_resume,
2106 	.channel_interrupt = visornic_channel_interrupt,
2107 };
2108 
2109 /* visornic_init - init function
2110  *
2111  * Init function for the visornic driver. Do initial driver setup and wait
2112  * for devices.
2113  *
2114  * Return: 0 on success, negative integer on error.
2115  */
visornic_init(void)2116 static int visornic_init(void)
2117 {
2118 	int err;
2119 
2120 	visornic_debugfs_dir = debugfs_create_dir("visornic", NULL);
2121 
2122 	debugfs_create_file("info", 0400, visornic_debugfs_dir, NULL,
2123 			    &debugfs_info_fops);
2124 	debugfs_create_file("enable_ints", 0200, visornic_debugfs_dir, NULL,
2125 			    &debugfs_enable_ints_fops);
2126 
2127 	err = visorbus_register_visor_driver(&visornic_driver);
2128 	if (err)
2129 		debugfs_remove_recursive(visornic_debugfs_dir);
2130 
2131 	return err;
2132 }
2133 
2134 /* visornic_cleanup - driver exit routine
2135  *
2136  * Unregister driver from the bus and free up memory.
2137  */
visornic_cleanup(void)2138 static void visornic_cleanup(void)
2139 {
2140 	visorbus_unregister_visor_driver(&visornic_driver);
2141 	debugfs_remove_recursive(visornic_debugfs_dir);
2142 }
2143 
2144 module_init(visornic_init);
2145 module_exit(visornic_cleanup);
2146 
2147 MODULE_AUTHOR("Unisys");
2148 MODULE_LICENSE("GPL");
2149 MODULE_DESCRIPTION("s-Par NIC driver for virtual network devices");
2150