xref: /freebsd/sys/dev/mlx4/mlx4_core/mlx4_eq.c (revision f56f82e0)
1 /*
2  * Copyright (c) 2005, 2006, 2007, 2008, 2014 Mellanox Technologies. All rights reserved.
3  * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses.  You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * OpenIB.org BSD license below:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *	- Redistributions of source code must retain the above
16  *	  copyright notice, this list of conditions and the following
17  *	  disclaimer.
18  *
19  *	- Redistributions in binary form must reproduce the above
20  *	  copyright notice, this list of conditions and the following
21  *	  disclaimer in the documentation and/or other materials
22  *	  provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  */
33 
34 #include <linux/interrupt.h>
35 #include <linux/slab.h>
36 #include <linux/module.h>
37 #include <linux/mm.h>
38 #include <linux/dma-mapping.h>
39 
40 #include <dev/mlx4/cmd.h>
41 
42 #include "mlx4.h"
43 #include "fw.h"
44 
45 enum {
46 	MLX4_IRQNAME_SIZE	= 32
47 };
48 
49 enum {
50 	MLX4_NUM_ASYNC_EQE	= 0x100,
51 	MLX4_NUM_SPARE_EQE	= 0x80,
52 	MLX4_EQ_ENTRY_SIZE	= 0x20
53 };
54 
55 #define MLX4_EQ_STATUS_OK	   ( 0 << 28)
56 #define MLX4_EQ_STATUS_WRITE_FAIL  (10 << 28)
57 #define MLX4_EQ_OWNER_SW	   ( 0 << 24)
58 #define MLX4_EQ_OWNER_HW	   ( 1 << 24)
59 #define MLX4_EQ_FLAG_EC		   ( 1 << 18)
60 #define MLX4_EQ_FLAG_OI		   ( 1 << 17)
61 #define MLX4_EQ_STATE_ARMED	   ( 9 <<  8)
62 #define MLX4_EQ_STATE_FIRED	   (10 <<  8)
63 #define MLX4_EQ_STATE_ALWAYS_ARMED (11 <<  8)
64 
65 #define MLX4_ASYNC_EVENT_MASK ((1ull << MLX4_EVENT_TYPE_PATH_MIG)	    | \
66 			       (1ull << MLX4_EVENT_TYPE_COMM_EST)	    | \
67 			       (1ull << MLX4_EVENT_TYPE_SQ_DRAINED)	    | \
68 			       (1ull << MLX4_EVENT_TYPE_CQ_ERROR)	    | \
69 			       (1ull << MLX4_EVENT_TYPE_WQ_CATAS_ERROR)	    | \
70 			       (1ull << MLX4_EVENT_TYPE_EEC_CATAS_ERROR)    | \
71 			       (1ull << MLX4_EVENT_TYPE_PATH_MIG_FAILED)    | \
72 			       (1ull << MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \
73 			       (1ull << MLX4_EVENT_TYPE_WQ_ACCESS_ERROR)    | \
74 			       (1ull << MLX4_EVENT_TYPE_PORT_CHANGE)	    | \
75 			       (1ull << MLX4_EVENT_TYPE_ECC_DETECT)	    | \
76 			       (1ull << MLX4_EVENT_TYPE_SRQ_CATAS_ERROR)    | \
77 			       (1ull << MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE)    | \
78 			       (1ull << MLX4_EVENT_TYPE_SRQ_LIMIT)	    | \
79 			       (1ull << MLX4_EVENT_TYPE_CMD)		    | \
80 			       (1ull << MLX4_EVENT_TYPE_OP_REQUIRED)	    | \
81 			       (1ull << MLX4_EVENT_TYPE_COMM_CHANNEL)       | \
82 			       (1ull << MLX4_EVENT_TYPE_FLR_EVENT)	    | \
83 			       (1ull << MLX4_EVENT_TYPE_FATAL_WARNING))
84 
85 static u64 get_async_ev_mask(struct mlx4_dev *dev)
86 {
87 	u64 async_ev_mask = MLX4_ASYNC_EVENT_MASK;
88 	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV)
89 		async_ev_mask |= (1ull << MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT);
90 	if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT)
91 		async_ev_mask |= (1ull << MLX4_EVENT_TYPE_RECOVERABLE_ERROR_EVENT);
92 
93 	return async_ev_mask;
94 }
95 
96 static void eq_set_ci(struct mlx4_eq *eq, int req_not)
97 {
98 	__raw_writel((__force u32) cpu_to_be32((eq->cons_index & 0xffffff) |
99 					       req_not << 31),
100 		     eq->doorbell);
101 	/* We still want ordering, just not swabbing, so add a barrier */
102 	mb();
103 }
104 
105 static struct mlx4_eqe *get_eqe(struct mlx4_eq *eq, u32 entry, u8 eqe_factor)
106 {
107 	/* (entry & (eq->nent - 1)) gives us a cyclic array */
108 	unsigned long offset = (entry & (eq->nent - 1)) * (MLX4_EQ_ENTRY_SIZE << eqe_factor);
109 	/* CX3 is capable of extending the EQE from 32 to 64 bytes.
110 	 * When this feature is enabled, the first (in the lower addresses)
111 	 * 32 bytes in the 64 byte EQE are reserved and the next 32 bytes
112 	 * contain the legacy EQE information.
113 	 */
114 	return eq->page_list[offset / PAGE_SIZE].buf + (offset + (eqe_factor ? MLX4_EQ_ENTRY_SIZE : 0)) % PAGE_SIZE;
115 }
116 
117 static struct mlx4_eqe *next_eqe_sw(struct mlx4_eq *eq, u8 eqe_factor)
118 {
119 	struct mlx4_eqe *eqe = get_eqe(eq, eq->cons_index, eqe_factor);
120 	return !!(eqe->owner & 0x80) ^ !!(eq->cons_index & eq->nent) ? NULL : eqe;
121 }
122 
123 static struct mlx4_eqe *next_slave_event_eqe(struct mlx4_slave_event_eq *slave_eq)
124 {
125 	struct mlx4_eqe *eqe =
126 		&slave_eq->event_eqe[slave_eq->cons & (SLAVE_EVENT_EQ_SIZE - 1)];
127 	return (!!(eqe->owner & 0x80) ^
128 		!!(slave_eq->cons & SLAVE_EVENT_EQ_SIZE)) ?
129 		eqe : NULL;
130 }
131 
132 void mlx4_gen_slave_eqe(struct work_struct *work)
133 {
134 	struct mlx4_mfunc_master_ctx *master =
135 		container_of(work, struct mlx4_mfunc_master_ctx,
136 			     slave_event_work);
137 	struct mlx4_mfunc *mfunc =
138 		container_of(master, struct mlx4_mfunc, master);
139 	struct mlx4_priv *priv = container_of(mfunc, struct mlx4_priv, mfunc);
140 	struct mlx4_dev *dev = &priv->dev;
141 	struct mlx4_slave_event_eq *slave_eq = &mfunc->master.slave_eq;
142 	struct mlx4_eqe *eqe;
143 	u8 slave;
144 	int i;
145 
146 	for (eqe = next_slave_event_eqe(slave_eq); eqe;
147 	      eqe = next_slave_event_eqe(slave_eq)) {
148 		slave = eqe->slave_id;
149 
150 		/* All active slaves need to receive the event */
151 		if (slave == ALL_SLAVES) {
152 			for (i = 0; i < dev->num_slaves; i++) {
153 				if (mlx4_GEN_EQE(dev, i, eqe))
154 					mlx4_warn(dev, "Failed to generate "
155 						  "event for slave %d\n", i);
156 			}
157 		} else {
158 			if (mlx4_GEN_EQE(dev, slave, eqe))
159 				mlx4_warn(dev, "Failed to generate event "
160 					       "for slave %d\n", slave);
161 		}
162 		++slave_eq->cons;
163 	}
164 }
165 
166 
167 static void slave_event(struct mlx4_dev *dev, u8 slave, struct mlx4_eqe *eqe)
168 {
169 	struct mlx4_priv *priv = mlx4_priv(dev);
170 	struct mlx4_slave_event_eq *slave_eq = &priv->mfunc.master.slave_eq;
171 	struct mlx4_eqe *s_eqe;
172 	unsigned long flags;
173 
174 	spin_lock_irqsave(&slave_eq->event_lock, flags);
175 	s_eqe = &slave_eq->event_eqe[slave_eq->prod & (SLAVE_EVENT_EQ_SIZE - 1)];
176 	if ((!!(s_eqe->owner & 0x80)) ^
177 	    (!!(slave_eq->prod & SLAVE_EVENT_EQ_SIZE))) {
178 		mlx4_warn(dev, "Master failed to generate an EQE for slave: %d. "
179 			  "No free EQE on slave events queue\n", slave);
180 		spin_unlock_irqrestore(&slave_eq->event_lock, flags);
181 		return;
182 	}
183 
184 	memcpy(s_eqe, eqe, dev->caps.eqe_size - 1);
185 	s_eqe->slave_id = slave;
186 	/* ensure all information is written before setting the ownersip bit */
187 	wmb();
188 	s_eqe->owner = !!(slave_eq->prod & SLAVE_EVENT_EQ_SIZE) ? 0x0 : 0x80;
189 	++slave_eq->prod;
190 
191 	queue_work(priv->mfunc.master.comm_wq,
192 		   &priv->mfunc.master.slave_event_work);
193 	spin_unlock_irqrestore(&slave_eq->event_lock, flags);
194 }
195 
196 static void mlx4_slave_event(struct mlx4_dev *dev, int slave,
197 			     struct mlx4_eqe *eqe)
198 {
199 	struct mlx4_priv *priv = mlx4_priv(dev);
200 
201 	if (slave < 0 || slave >= dev->num_slaves ||
202 	    slave == dev->caps.function)
203 		return;
204 
205 	if (!priv->mfunc.master.slave_state[slave].active)
206 		return;
207 
208 	slave_event(dev, slave, eqe);
209 }
210 
211 int mlx4_gen_pkey_eqe(struct mlx4_dev *dev, int slave, u8 port)
212 {
213 	struct mlx4_eqe eqe;
214 
215 	struct mlx4_priv *priv = mlx4_priv(dev);
216 	struct mlx4_slave_state *s_slave = &priv->mfunc.master.slave_state[slave];
217 
218 	if (!s_slave->active)
219 		return 0;
220 
221 	memset(&eqe, 0, sizeof eqe);
222 
223 	eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT;
224 	eqe.subtype = MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE;
225 	eqe.event.port_mgmt_change.port = port;
226 
227 	return mlx4_GEN_EQE(dev, slave, &eqe);
228 }
229 EXPORT_SYMBOL(mlx4_gen_pkey_eqe);
230 
231 int mlx4_gen_guid_change_eqe(struct mlx4_dev *dev, int slave, u8 port)
232 {
233 	struct mlx4_eqe eqe;
234 
235 	/*don't send if we don't have the that slave */
236 	if (dev->num_vfs < slave)
237 		return 0;
238 	memset(&eqe, 0, sizeof eqe);
239 
240 	eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT;
241 	eqe.subtype = MLX4_DEV_PMC_SUBTYPE_GUID_INFO;
242 	eqe.event.port_mgmt_change.port = port;
243 
244 	return mlx4_GEN_EQE(dev, slave, &eqe);
245 }
246 EXPORT_SYMBOL(mlx4_gen_guid_change_eqe);
247 
248 int mlx4_gen_port_state_change_eqe(struct mlx4_dev *dev, int slave, u8 port,
249 				   u8 port_subtype_change)
250 {
251 	struct mlx4_eqe eqe;
252 
253 	/*don't send if we don't have the that slave */
254 	if (dev->num_vfs < slave)
255 		return 0;
256 	memset(&eqe, 0, sizeof eqe);
257 
258 	eqe.type = MLX4_EVENT_TYPE_PORT_CHANGE;
259 	eqe.subtype = port_subtype_change;
260 	eqe.event.port_change.port = cpu_to_be32(port << 28);
261 
262 	mlx4_dbg(dev, "%s: sending: %d to slave: %d on port: %d\n", __func__,
263 		 port_subtype_change, slave, port);
264 	return mlx4_GEN_EQE(dev, slave, &eqe);
265 }
266 EXPORT_SYMBOL(mlx4_gen_port_state_change_eqe);
267 
268 enum slave_port_state mlx4_get_slave_port_state(struct mlx4_dev *dev, int slave, u8 port)
269 {
270 	struct mlx4_priv *priv = mlx4_priv(dev);
271 	struct mlx4_slave_state *s_state = priv->mfunc.master.slave_state;
272 	if (slave >= dev->num_slaves || port > MLX4_MAX_PORTS) {
273 		pr_err("%s: Error: asking for slave:%d, port:%d\n",
274 		       __func__, slave, port);
275 		return SLAVE_PORT_DOWN;
276 	}
277 	return s_state[slave].port_state[port];
278 }
279 EXPORT_SYMBOL(mlx4_get_slave_port_state);
280 
281 static int mlx4_set_slave_port_state(struct mlx4_dev *dev, int slave, u8 port,
282 				     enum slave_port_state state)
283 {
284 	struct mlx4_priv *priv = mlx4_priv(dev);
285 	struct mlx4_slave_state *s_state = priv->mfunc.master.slave_state;
286 
287 	if (slave >= dev->num_slaves || port > MLX4_MAX_PORTS || port == 0) {
288 		pr_err("%s: Error: asking for slave:%d, port:%d\n",
289 		       __func__, slave, port);
290 		return -1;
291 	}
292 	s_state[slave].port_state[port] = state;
293 
294 	return 0;
295 }
296 
297 static void set_all_slave_state(struct mlx4_dev *dev, u8 port, int event)
298 {
299 	int i;
300 	enum slave_port_gen_event gen_event;
301 
302 	for (i = 0; i < dev->num_slaves; i++)
303 		set_and_calc_slave_port_state(dev, i, port, event, &gen_event);
304 }
305 /**************************************************************************
306 	The function get as input the new event to that port,
307 	and according to the prev state change the slave's port state.
308 	The events are:
309 		MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN,
310 		MLX4_PORT_STATE_DEV_EVENT_PORT_UP
311 		MLX4_PORT_STATE_IB_EVENT_GID_VALID
312 		MLX4_PORT_STATE_IB_EVENT_GID_INVALID
313 ***************************************************************************/
314 int set_and_calc_slave_port_state(struct mlx4_dev *dev, int slave,
315 				  u8 port, int event,
316 				  enum slave_port_gen_event *gen_event)
317 {
318 	struct mlx4_priv *priv = mlx4_priv(dev);
319 	struct mlx4_slave_state *ctx = NULL;
320 	unsigned long flags;
321 	int ret = -1;
322 	enum slave_port_state cur_state =
323 		mlx4_get_slave_port_state(dev, slave, port);
324 
325 	*gen_event = SLAVE_PORT_GEN_EVENT_NONE;
326 
327 	if (slave >= dev->num_slaves || port > MLX4_MAX_PORTS || port == 0) {
328 		pr_err("%s: Error: asking for slave:%d, port:%d\n",
329 		       __func__, slave, port);
330 		return ret;
331 	}
332 
333 	ctx = &priv->mfunc.master.slave_state[slave];
334 	spin_lock_irqsave(&ctx->lock, flags);
335 
336 	switch (cur_state) {
337 	case SLAVE_PORT_DOWN:
338 		if (MLX4_PORT_STATE_DEV_EVENT_PORT_UP == event)
339 			mlx4_set_slave_port_state(dev, slave, port,
340 						  SLAVE_PENDING_UP);
341 		break;
342 	case SLAVE_PENDING_UP:
343 		if (MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN == event)
344 			mlx4_set_slave_port_state(dev, slave, port,
345 						  SLAVE_PORT_DOWN);
346 		else if (MLX4_PORT_STATE_IB_PORT_STATE_EVENT_GID_VALID == event) {
347 			mlx4_set_slave_port_state(dev, slave, port,
348 						  SLAVE_PORT_UP);
349 			*gen_event = SLAVE_PORT_GEN_EVENT_UP;
350 		}
351 		break;
352 	case SLAVE_PORT_UP:
353 		if (MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN == event) {
354 			mlx4_set_slave_port_state(dev, slave, port,
355 						  SLAVE_PORT_DOWN);
356 			*gen_event = SLAVE_PORT_GEN_EVENT_DOWN;
357 		} else if (MLX4_PORT_STATE_IB_EVENT_GID_INVALID ==
358 				event) {
359 			mlx4_set_slave_port_state(dev, slave, port,
360 						  SLAVE_PENDING_UP);
361 			*gen_event = SLAVE_PORT_GEN_EVENT_DOWN;
362 		}
363 		break;
364 	default:
365 		pr_err("%s: BUG!!! UNKNOWN state: "
366 		       "slave:%d, port:%d\n", __func__, slave, port);
367 			goto out;
368 	}
369 	ret = mlx4_get_slave_port_state(dev, slave, port);
370 
371 out:
372 	spin_unlock_irqrestore(&ctx->lock, flags);
373 	return ret;
374 }
375 
376 EXPORT_SYMBOL(set_and_calc_slave_port_state);
377 
378 int mlx4_gen_slaves_port_mgt_ev(struct mlx4_dev *dev, u8 port, int attr, u16 sm_lid, u8 sm_sl)
379 {
380 	struct mlx4_eqe eqe;
381 
382 	memset(&eqe, 0, sizeof eqe);
383 
384 	eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT;
385 	eqe.subtype = MLX4_DEV_PMC_SUBTYPE_PORT_INFO;
386 	eqe.event.port_mgmt_change.port = port;
387 	eqe.event.port_mgmt_change.params.port_info.changed_attr =
388 		cpu_to_be32((u32) attr);
389 	if (attr & MSTR_SM_CHANGE_MASK) {
390 		eqe.event.port_mgmt_change.params.port_info.mstr_sm_lid =
391 			cpu_to_be16(sm_lid);
392 		eqe.event.port_mgmt_change.params.port_info.mstr_sm_sl =
393 			sm_sl;
394 	}
395 
396 	slave_event(dev, ALL_SLAVES, &eqe);
397 	return 0;
398 }
399 EXPORT_SYMBOL(mlx4_gen_slaves_port_mgt_ev);
400 
401 void mlx4_master_handle_slave_flr(struct work_struct *work)
402 {
403 	struct mlx4_mfunc_master_ctx *master =
404 		container_of(work, struct mlx4_mfunc_master_ctx,
405 			     slave_flr_event_work);
406 	struct mlx4_mfunc *mfunc =
407 		container_of(master, struct mlx4_mfunc, master);
408 	struct mlx4_priv *priv =
409 		container_of(mfunc, struct mlx4_priv, mfunc);
410 	struct mlx4_dev *dev = &priv->dev;
411 	struct mlx4_slave_state *slave_state = priv->mfunc.master.slave_state;
412 	int i;
413 	int err;
414 	unsigned long flags;
415 
416 	mlx4_dbg(dev, "mlx4_handle_slave_flr\n");
417 
418 	for (i = 0 ; i < dev->num_slaves; i++) {
419 
420 		if (MLX4_COMM_CMD_FLR == slave_state[i].last_cmd) {
421 			mlx4_dbg(dev, "mlx4_handle_slave_flr: "
422 				 "clean slave: %d\n", i);
423 
424 			mlx4_delete_all_resources_for_slave(dev, i);
425 			/*return the slave to running mode*/
426 			spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags);
427 			slave_state[i].last_cmd = MLX4_COMM_CMD_RESET;
428 			slave_state[i].is_slave_going_down = 0;
429 			spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags);
430 			/*notify the FW:*/
431 			err = mlx4_cmd(dev, 0, i, 0, MLX4_CMD_INFORM_FLR_DONE,
432 				       MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
433 			if (err)
434 				mlx4_warn(dev, "Failed to notify FW on "
435 					  "FLR done (slave:%d)\n", i);
436 		}
437 	}
438 }
439 
440 static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
441 {
442 	struct mlx4_priv *priv = mlx4_priv(dev);
443 	struct mlx4_eqe *eqe;
444 	int cqn;
445 	int eqes_found = 0;
446 	int set_ci = 0;
447 	int port;
448 	int slave = 0;
449 	int ret;
450 	u32 flr_slave;
451 	u8 update_slave_state;
452 	int i;
453 	enum slave_port_gen_event gen_event;
454 	unsigned long flags;
455 	struct mlx4_vport_state *s_info;
456 
457 	while ((eqe = next_eqe_sw(eq, dev->caps.eqe_factor))) {
458 		/*
459 		 * Make sure we read EQ entry contents after we've
460 		 * checked the ownership bit.
461 		 */
462 		rmb();
463 
464 		switch (eqe->type) {
465 		case MLX4_EVENT_TYPE_COMP:
466 			cqn = be32_to_cpu(eqe->event.comp.cqn) & 0xffffff;
467 			mlx4_cq_completion(dev, cqn);
468 			break;
469 
470 		case MLX4_EVENT_TYPE_PATH_MIG:
471 		case MLX4_EVENT_TYPE_COMM_EST:
472 		case MLX4_EVENT_TYPE_SQ_DRAINED:
473 		case MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE:
474 		case MLX4_EVENT_TYPE_WQ_CATAS_ERROR:
475 		case MLX4_EVENT_TYPE_PATH_MIG_FAILED:
476 		case MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
477 		case MLX4_EVENT_TYPE_WQ_ACCESS_ERROR:
478 			mlx4_dbg(dev, "event %d arrived\n", eqe->type);
479 			if (mlx4_is_master(dev)) {
480 				/* forward only to slave owning the QP */
481 				ret = mlx4_get_slave_from_resource_id(dev,
482 						RES_QP,
483 						be32_to_cpu(eqe->event.qp.qpn)
484 						& 0xffffff, &slave);
485 				if (ret && ret != -ENOENT) {
486 					mlx4_dbg(dev, "QP event %02x(%02x) on "
487 						 "EQ %d at index %u: could "
488 						 "not get slave id (%d)\n",
489 						 eqe->type, eqe->subtype,
490 						 eq->eqn, eq->cons_index, ret);
491 					break;
492 				}
493 
494 				if (!ret && slave != dev->caps.function) {
495 					mlx4_slave_event(dev, slave, eqe);
496 					break;
497 				}
498 
499 			}
500 			mlx4_qp_event(dev, be32_to_cpu(eqe->event.qp.qpn) &
501 				      0xffffff, eqe->type);
502 			break;
503 
504 		case MLX4_EVENT_TYPE_SRQ_LIMIT:
505 			mlx4_dbg(dev, "%s: MLX4_EVENT_TYPE_SRQ_LIMIT\n",
506 				 __func__);
507 		/* fall through */
508 		case MLX4_EVENT_TYPE_SRQ_CATAS_ERROR:
509 			if (mlx4_is_master(dev)) {
510 				/* forward only to slave owning the SRQ */
511 				ret = mlx4_get_slave_from_resource_id(dev,
512 						RES_SRQ,
513 						be32_to_cpu(eqe->event.srq.srqn)
514 						& 0xffffff,
515 						&slave);
516 				if (ret && ret != -ENOENT) {
517 					mlx4_warn(dev, "SRQ event %02x(%02x) "
518 						  "on EQ %d at index %u: could"
519 						  " not get slave id (%d)\n",
520 						  eqe->type, eqe->subtype,
521 						  eq->eqn, eq->cons_index, ret);
522 					break;
523 				}
524 				mlx4_dbg(dev, "%s: slave:%d, srq_no:0x%x, event: %02x(%02x)\n",
525 					 __func__, slave,
526 					 be32_to_cpu(eqe->event.srq.srqn),
527 					 eqe->type, eqe->subtype);
528 
529 				if (!ret && slave != dev->caps.function) {
530 					mlx4_dbg(dev, "%s: sending event %02x(%02x) to slave:%d\n",
531 						 __func__, eqe->type,
532 						 eqe->subtype, slave);
533 					mlx4_slave_event(dev, slave, eqe);
534 					break;
535 				}
536 			}
537 			mlx4_srq_event(dev, be32_to_cpu(eqe->event.srq.srqn) &
538 				       0xffffff, eqe->type);
539 			break;
540 
541 		case MLX4_EVENT_TYPE_CMD:
542 			mlx4_cmd_event(dev,
543 				       be16_to_cpu(eqe->event.cmd.token),
544 				       eqe->event.cmd.status,
545 				       be64_to_cpu(eqe->event.cmd.out_param));
546 			break;
547 
548 		case MLX4_EVENT_TYPE_PORT_CHANGE:
549 			port = be32_to_cpu(eqe->event.port_change.port) >> 28;
550 			if (eqe->subtype == MLX4_PORT_CHANGE_SUBTYPE_DOWN) {
551 				mlx4_dispatch_event(dev, MLX4_DEV_EVENT_PORT_DOWN,
552 						    port);
553 				mlx4_priv(dev)->sense.do_sense_port[port] = 1;
554 				if (!mlx4_is_master(dev))
555 					break;
556 				for (i = 0; i < dev->num_slaves; i++) {
557 					if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) {
558 						if (i == mlx4_master_func_num(dev))
559 							continue;
560 						mlx4_dbg(dev, "%s: Sending MLX4_PORT_CHANGE_SUBTYPE_DOWN"
561 							 " to slave: %d, port:%d\n",
562 							 __func__, i, port);
563 						s_info = &priv->mfunc.master.vf_oper[slave].vport[port].state;
564 						if (IFLA_VF_LINK_STATE_AUTO == s_info->link_state)
565 							mlx4_slave_event(dev, i, eqe);
566 					} else {  /* IB port */
567 						set_and_calc_slave_port_state(dev, i, port,
568 									      MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN,
569 									      &gen_event);
570 						/*we can be in pending state, then do not send port_down event*/
571 						if (SLAVE_PORT_GEN_EVENT_DOWN ==  gen_event) {
572 							if (i == mlx4_master_func_num(dev))
573 								continue;
574 							mlx4_slave_event(dev, i, eqe);
575 						}
576 					}
577 				}
578 			} else {
579 				mlx4_dispatch_event(dev, MLX4_DEV_EVENT_PORT_UP, port);
580 
581 				mlx4_priv(dev)->sense.do_sense_port[port] = 0;
582 
583 				if (!mlx4_is_master(dev))
584 					break;
585 				if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH)
586 					for (i = 0; i < dev->num_slaves; i++) {
587 						if (i == mlx4_master_func_num(dev))
588 							continue;
589 						s_info = &priv->mfunc.master.vf_oper[slave].vport[port].state;
590 						if (IFLA_VF_LINK_STATE_AUTO == s_info->link_state)
591 							mlx4_slave_event(dev, i, eqe);
592 					}
593 				else /* IB port */
594 					/* port-up event will be sent to a slave when the
595 					 * slave's alias-guid is set. This is done in alias_GUID.c
596 					 */
597 					set_all_slave_state(dev, port, MLX4_DEV_EVENT_PORT_UP);
598 			}
599 			break;
600 
601 		case MLX4_EVENT_TYPE_CQ_ERROR:
602 			mlx4_warn(dev, "CQ %s on CQN %06x\n",
603 				  eqe->event.cq_err.syndrome == 1 ?
604 				  "overrun" : "access violation",
605 				  be32_to_cpu(eqe->event.cq_err.cqn) & 0xffffff);
606 			if (mlx4_is_master(dev)) {
607 				ret = mlx4_get_slave_from_resource_id(dev,
608 					RES_CQ,
609 					be32_to_cpu(eqe->event.cq_err.cqn)
610 					& 0xffffff, &slave);
611 				if (ret && ret != -ENOENT) {
612 					mlx4_dbg(dev, "CQ event %02x(%02x) on "
613 						 "EQ %d at index %u: could "
614 						  "not get slave id (%d)\n",
615 						  eqe->type, eqe->subtype,
616 						  eq->eqn, eq->cons_index, ret);
617 					break;
618 				}
619 
620 				if (!ret && slave != dev->caps.function) {
621 					mlx4_slave_event(dev, slave, eqe);
622 					break;
623 				}
624 			}
625 			mlx4_cq_event(dev,
626 				      be32_to_cpu(eqe->event.cq_err.cqn)
627 				      & 0xffffff,
628 				      eqe->type);
629 			break;
630 
631 		case MLX4_EVENT_TYPE_EQ_OVERFLOW:
632 			mlx4_warn(dev, "EQ overrun on EQN %d\n", eq->eqn);
633 			break;
634 
635 		case MLX4_EVENT_TYPE_OP_REQUIRED:
636 			atomic_inc(&priv->opreq_count);
637 			/* FW commands can't be executed from interrupt context
638 			   working in deferred task */
639 			queue_work(mlx4_wq, &priv->opreq_task);
640 			break;
641 
642 		case MLX4_EVENT_TYPE_COMM_CHANNEL:
643 			if (!mlx4_is_master(dev)) {
644 				mlx4_warn(dev, "Received comm channel event "
645 					       "for non master device\n");
646 				break;
647 			}
648 
649 			memcpy(&priv->mfunc.master.comm_arm_bit_vector,
650 			       eqe->event.comm_channel_arm.bit_vec,
651 			       sizeof eqe->event.comm_channel_arm.bit_vec);
652 
653 			if (!queue_work(priv->mfunc.master.comm_wq,
654 				   &priv->mfunc.master.comm_work))
655 				mlx4_warn(dev, "Failed to queue comm channel work\n");
656 
657 			if (!queue_work(priv->mfunc.master.comm_wq,
658 				   &priv->mfunc.master.arm_comm_work))
659 				mlx4_warn(dev, "Failed to queue arm comm channel work\n");
660 			break;
661 
662 		case MLX4_EVENT_TYPE_FLR_EVENT:
663 			flr_slave = be32_to_cpu(eqe->event.flr_event.slave_id);
664 			if (!mlx4_is_master(dev)) {
665 				mlx4_warn(dev, "Non-master function received"
666 					       "FLR event\n");
667 				break;
668 			}
669 
670 			mlx4_dbg(dev, "FLR event for slave: %d\n", flr_slave);
671 
672 			if (flr_slave >= dev->num_slaves) {
673 				mlx4_warn(dev,
674 					  "Got FLR for unknown function: %d\n",
675 					  flr_slave);
676 				update_slave_state = 0;
677 			} else
678 				update_slave_state = 1;
679 
680 			spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags);
681 			if (update_slave_state) {
682 				priv->mfunc.master.slave_state[flr_slave].active = false;
683 				priv->mfunc.master.slave_state[flr_slave].last_cmd = MLX4_COMM_CMD_FLR;
684 				priv->mfunc.master.slave_state[flr_slave].is_slave_going_down = 1;
685 			}
686 			spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags);
687 			queue_work(priv->mfunc.master.comm_wq,
688 				   &priv->mfunc.master.slave_flr_event_work);
689 			break;
690 
691 		case MLX4_EVENT_TYPE_FATAL_WARNING:
692 			if (eqe->subtype == MLX4_FATAL_WARNING_SUBTYPE_WARMING) {
693 				if (mlx4_is_master(dev))
694 					for (i = 0; i < dev->num_slaves; i++) {
695 						mlx4_dbg(dev, "%s: Sending "
696 							"MLX4_FATAL_WARNING_SUBTYPE_WARMING"
697 							" to slave: %d\n", __func__, i);
698 						if (i == dev->caps.function)
699 							continue;
700 						mlx4_slave_event(dev, i, eqe);
701 					}
702 				mlx4_err(dev, "Temperature Threshold was reached! "
703 					"Threshold: %d celsius degrees; "
704 					"Current Temperature: %d\n",
705 					be16_to_cpu(eqe->event.warming.warning_threshold),
706 					be16_to_cpu(eqe->event.warming.current_temperature));
707 			} else
708 				mlx4_warn(dev, "Unhandled event FATAL WARNING (%02x), "
709 					  "subtype %02x on EQ %d at index %u. owner=%x, "
710 					  "nent=0x%x, slave=%x, ownership=%s\n",
711 					  eqe->type, eqe->subtype, eq->eqn,
712 					  eq->cons_index, eqe->owner, eq->nent,
713 					  eqe->slave_id,
714 					  !!(eqe->owner & 0x80) ^
715 					  !!(eq->cons_index & eq->nent) ? "HW" : "SW");
716 
717 			break;
718 
719 		case MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT:
720 			mlx4_dispatch_event(dev, MLX4_DEV_EVENT_PORT_MGMT_CHANGE,
721 					    (unsigned long) eqe);
722 			break;
723 
724 		case MLX4_EVENT_TYPE_RECOVERABLE_ERROR_EVENT:
725 			switch (eqe->subtype) {
726 			case MLX4_RECOVERABLE_ERROR_EVENT_SUBTYPE_BAD_CABLE:
727 				mlx4_warn(dev, "Bad cable detected on port %u\n",
728 					  eqe->event.bad_cable.port);
729 				break;
730 			case MLX4_RECOVERABLE_ERROR_EVENT_SUBTYPE_UNSUPPORTED_CABLE:
731 				mlx4_warn(dev, "Unsupported cable detected\n");
732 				break;
733 			default:
734 				mlx4_dbg(dev, "Unhandled recoverable error event "
735 					 "detected: %02x(%02x) on EQ %d at index %u. "
736 					 "owner=%x, nent=0x%x, ownership=%s\n",
737 					 eqe->type, eqe->subtype, eq->eqn,
738 					 eq->cons_index, eqe->owner, eq->nent,
739 					 !!(eqe->owner & 0x80) ^
740 					 !!(eq->cons_index & eq->nent) ? "HW" : "SW");
741 				break;
742 			}
743 			break;
744 
745 		case MLX4_EVENT_TYPE_EEC_CATAS_ERROR:
746 		case MLX4_EVENT_TYPE_ECC_DETECT:
747 		default:
748 			mlx4_warn(dev, "Unhandled event %02x(%02x) on EQ %d at "
749 				  "index %u. owner=%x, nent=0x%x, slave=%x, "
750 				  "ownership=%s\n",
751 				  eqe->type, eqe->subtype, eq->eqn,
752 				  eq->cons_index, eqe->owner, eq->nent,
753 				  eqe->slave_id,
754 				  !!(eqe->owner & 0x80) ^
755 				  !!(eq->cons_index & eq->nent) ? "HW" : "SW");
756 			break;
757 		}
758 
759 		++eq->cons_index;
760 		eqes_found = 1;
761 		++set_ci;
762 
763 		/*
764 		 * The HCA will think the queue has overflowed if we
765 		 * don't tell it we've been processing events.  We
766 		 * create our EQs with MLX4_NUM_SPARE_EQE extra
767 		 * entries, so we must update our consumer index at
768 		 * least that often.
769 		 */
770 		if (unlikely(set_ci >= MLX4_NUM_SPARE_EQE)) {
771 			eq_set_ci(eq, 0);
772 			set_ci = 0;
773 		}
774 	}
775 
776 	eq_set_ci(eq, 1);
777 
778 	return eqes_found;
779 }
780 
781 static irqreturn_t mlx4_interrupt(int irq, void *dev_ptr)
782 {
783 	struct mlx4_dev *dev = dev_ptr;
784 	struct mlx4_priv *priv = mlx4_priv(dev);
785 	int work = 0;
786 	int i;
787 
788 	writel(priv->eq_table.clr_mask, priv->eq_table.clr_int);
789 
790 	for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i)
791 		work |= mlx4_eq_int(dev, &priv->eq_table.eq[i]);
792 
793 	return IRQ_RETVAL(work);
794 }
795 
796 static irqreturn_t mlx4_msi_x_interrupt(int irq, void *eq_ptr)
797 {
798 	struct mlx4_eq  *eq  = eq_ptr;
799 	struct mlx4_dev *dev = eq->dev;
800 
801 	mlx4_eq_int(dev, eq);
802 
803 	/* MSI-X vectors always belong to us */
804 	return IRQ_HANDLED;
805 }
806 
807 int mlx4_MAP_EQ_wrapper(struct mlx4_dev *dev, int slave,
808 			struct mlx4_vhcr *vhcr,
809 			struct mlx4_cmd_mailbox *inbox,
810 			struct mlx4_cmd_mailbox *outbox,
811 			struct mlx4_cmd_info *cmd)
812 {
813 	struct mlx4_priv *priv = mlx4_priv(dev);
814 	struct mlx4_slave_event_eq_info *event_eq =
815 		priv->mfunc.master.slave_state[slave].event_eq;
816 	u32 in_modifier = vhcr->in_modifier;
817 	u32 eqn = in_modifier & 0x3FF;
818 	u64 in_param =  vhcr->in_param;
819 	int err = 0;
820 	int i;
821 
822 	if (slave == dev->caps.function)
823 		err = mlx4_cmd(dev, in_param, (in_modifier & 0x80000000) | eqn,
824 			       0, MLX4_CMD_MAP_EQ, MLX4_CMD_TIME_CLASS_B,
825 			       MLX4_CMD_NATIVE);
826 	if (!err)
827 		for (i = 0; i < MLX4_EVENT_TYPES_NUM; ++i)
828 			if (in_param & (1LL << i))
829 				event_eq[i].eqn = in_modifier >> 31 ? -1 : eqn;
830 
831 	return err;
832 }
833 
834 static int mlx4_MAP_EQ(struct mlx4_dev *dev, u64 event_mask, int unmap,
835 			int eq_num)
836 {
837 	return mlx4_cmd(dev, event_mask, (unmap << 31) | eq_num,
838 			0, MLX4_CMD_MAP_EQ, MLX4_CMD_TIME_CLASS_B,
839 			MLX4_CMD_WRAPPED);
840 }
841 
842 static int mlx4_SW2HW_EQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
843 			 int eq_num)
844 {
845 	return mlx4_cmd(dev, mailbox->dma, eq_num, 0,
846 			MLX4_CMD_SW2HW_EQ, MLX4_CMD_TIME_CLASS_A,
847 			MLX4_CMD_WRAPPED);
848 }
849 
850 static int mlx4_HW2SW_EQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
851 			 int eq_num)
852 {
853 	return mlx4_cmd_box(dev, 0, mailbox->dma, eq_num,
854 			    0, MLX4_CMD_HW2SW_EQ, MLX4_CMD_TIME_CLASS_A,
855 			    MLX4_CMD_WRAPPED);
856 }
857 
858 static int mlx4_num_eq_uar(struct mlx4_dev *dev)
859 {
860 	/*
861 	 * Each UAR holds 4 EQ doorbells.  To figure out how many UARs
862 	 * we need to map, take the difference of highest index and
863 	 * the lowest index we'll use and add 1.
864 	 */
865 	return (dev->caps.num_comp_vectors + 1 + dev->caps.reserved_eqs +
866 		 dev->caps.comp_pool)/4 - dev->caps.reserved_eqs/4 + 1;
867 }
868 
869 static void __iomem *mlx4_get_eq_uar(struct mlx4_dev *dev, struct mlx4_eq *eq)
870 {
871 	struct mlx4_priv *priv = mlx4_priv(dev);
872 	int index;
873 
874 	index = eq->eqn / 4 - dev->caps.reserved_eqs / 4;
875 
876 	if (!priv->eq_table.uar_map[index]) {
877 		priv->eq_table.uar_map[index] =
878 			ioremap(pci_resource_start(dev->pdev, 2) +
879 				((eq->eqn / 4) << PAGE_SHIFT),
880 				PAGE_SIZE);
881 		if (!priv->eq_table.uar_map[index]) {
882 			mlx4_err(dev, "Couldn't map EQ doorbell for EQN 0x%06x\n",
883 				 eq->eqn);
884 			return NULL;
885 		}
886 	}
887 
888 	return priv->eq_table.uar_map[index] + 0x800 + 8 * (eq->eqn % 4);
889 }
890 
891 static void mlx4_unmap_uar(struct mlx4_dev *dev)
892 {
893 	struct mlx4_priv *priv = mlx4_priv(dev);
894 	int i;
895 
896 	for (i = 0; i < mlx4_num_eq_uar(dev); ++i)
897 		if (priv->eq_table.uar_map[i]) {
898 			iounmap(priv->eq_table.uar_map[i]);
899 			priv->eq_table.uar_map[i] = NULL;
900 		}
901 }
902 
903 static int mlx4_create_eq(struct mlx4_dev *dev, int nent,
904 			  u8 intr, struct mlx4_eq *eq)
905 {
906 	struct mlx4_priv *priv = mlx4_priv(dev);
907 	struct mlx4_cmd_mailbox *mailbox;
908 	struct mlx4_eq_context *eq_context;
909 	int npages;
910 	u64 *dma_list = NULL;
911 	dma_addr_t t;
912 	u64 mtt_addr;
913 	int err = -ENOMEM;
914 	int i;
915 
916 	eq->dev   = dev;
917 	eq->nent  = roundup_pow_of_two(max(nent, 2));
918 	/* CX3 is capable of extending the CQE\EQE from 32 to 64 bytes */
919 	npages = PAGE_ALIGN(eq->nent * (MLX4_EQ_ENTRY_SIZE << dev->caps.eqe_factor)) / PAGE_SIZE;
920 
921 	eq->page_list = kmalloc(npages * sizeof *eq->page_list,
922 				GFP_KERNEL);
923 	if (!eq->page_list)
924 		goto err_out;
925 
926 	for (i = 0; i < npages; ++i)
927 		eq->page_list[i].buf = NULL;
928 
929 	dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL);
930 	if (!dma_list)
931 		goto err_out_free;
932 
933 	mailbox = mlx4_alloc_cmd_mailbox(dev);
934 	if (IS_ERR(mailbox))
935 		goto err_out_free;
936 	eq_context = mailbox->buf;
937 
938 	for (i = 0; i < npages; ++i) {
939 		eq->page_list[i].buf = dma_alloc_coherent(&dev->pdev->dev,
940 							  PAGE_SIZE, &t, GFP_KERNEL);
941 		if (!eq->page_list[i].buf)
942 			goto err_out_free_pages;
943 
944 		dma_list[i] = t;
945 		eq->page_list[i].map = t;
946 
947 		memset(eq->page_list[i].buf, 0, PAGE_SIZE);
948 	}
949 
950 	eq->eqn = mlx4_bitmap_alloc(&priv->eq_table.bitmap);
951 	if (eq->eqn == -1)
952 		goto err_out_free_pages;
953 
954 	eq->doorbell = mlx4_get_eq_uar(dev, eq);
955 	if (!eq->doorbell) {
956 		err = -ENOMEM;
957 		goto err_out_free_eq;
958 	}
959 
960 	err = mlx4_mtt_init(dev, npages, PAGE_SHIFT, &eq->mtt);
961 	if (err)
962 		goto err_out_free_eq;
963 
964 	err = mlx4_write_mtt(dev, &eq->mtt, 0, npages, dma_list);
965 	if (err)
966 		goto err_out_free_mtt;
967 
968 	memset(eq_context, 0, sizeof *eq_context);
969 	eq_context->flags	  = cpu_to_be32(MLX4_EQ_STATUS_OK   |
970 						MLX4_EQ_STATE_ARMED);
971 	eq_context->log_eq_size	  = ilog2(eq->nent);
972 	eq_context->intr	  = intr;
973 	eq_context->log_page_size = PAGE_SHIFT - MLX4_ICM_PAGE_SHIFT;
974 
975 	mtt_addr = mlx4_mtt_addr(dev, &eq->mtt);
976 	eq_context->mtt_base_addr_h = mtt_addr >> 32;
977 	eq_context->mtt_base_addr_l = cpu_to_be32(mtt_addr & 0xffffffff);
978 
979 	err = mlx4_SW2HW_EQ(dev, mailbox, eq->eqn);
980 	if (err) {
981 		mlx4_warn(dev, "SW2HW_EQ failed (%d)\n", err);
982 		goto err_out_free_mtt;
983 	}
984 
985 	kfree(dma_list);
986 	mlx4_free_cmd_mailbox(dev, mailbox);
987 
988 	eq->cons_index = 0;
989 
990 	return err;
991 
992 err_out_free_mtt:
993 	mlx4_mtt_cleanup(dev, &eq->mtt);
994 
995 err_out_free_eq:
996 	mlx4_bitmap_free(&priv->eq_table.bitmap, eq->eqn, MLX4_USE_RR);
997 
998 err_out_free_pages:
999 	for (i = 0; i < npages; ++i)
1000 		if (eq->page_list[i].buf)
1001 			dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
1002 					  eq->page_list[i].buf,
1003 					  eq->page_list[i].map);
1004 
1005 	mlx4_free_cmd_mailbox(dev, mailbox);
1006 
1007 err_out_free:
1008 	kfree(eq->page_list);
1009 	kfree(dma_list);
1010 
1011 err_out:
1012 	return err;
1013 }
1014 
1015 static void mlx4_free_eq(struct mlx4_dev *dev,
1016 			 struct mlx4_eq *eq)
1017 {
1018 	struct mlx4_priv *priv = mlx4_priv(dev);
1019 	struct mlx4_cmd_mailbox *mailbox;
1020 	int err;
1021 	int i;
1022 	/* CX3 is capable of extending the CQE\EQE from 32 to 64 bytes */
1023 	int npages = PAGE_ALIGN((MLX4_EQ_ENTRY_SIZE << dev->caps.eqe_factor) * eq->nent) / PAGE_SIZE;
1024 
1025 	mailbox = mlx4_alloc_cmd_mailbox(dev);
1026 	if (IS_ERR(mailbox))
1027 		return;
1028 
1029 	err = mlx4_HW2SW_EQ(dev, mailbox, eq->eqn);
1030 	if (err)
1031 		mlx4_warn(dev, "HW2SW_EQ failed (%d)\n", err);
1032 
1033 	if (0) {
1034 		mlx4_dbg(dev, "Dumping EQ context %02x:\n", eq->eqn);
1035 		for (i = 0; i < sizeof (struct mlx4_eq_context) / 4; ++i) {
1036 			if (i % 4 == 0)
1037 				pr_cont("[%02x] ", i * 4);
1038 			pr_cont(" %08x", be32_to_cpup(mailbox->buf + i * 4));
1039 			if ((i + 1) % 4 == 0)
1040 				pr_cont("\n");
1041 		}
1042 	}
1043 
1044 	mlx4_mtt_cleanup(dev, &eq->mtt);
1045 	for (i = 0; i < npages; ++i)
1046 		dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
1047 				    eq->page_list[i].buf,
1048 				    eq->page_list[i].map);
1049 
1050 	kfree(eq->page_list);
1051 	mlx4_bitmap_free(&priv->eq_table.bitmap, eq->eqn, MLX4_USE_RR);
1052 	mlx4_free_cmd_mailbox(dev, mailbox);
1053 }
1054 
1055 static void mlx4_free_irqs(struct mlx4_dev *dev)
1056 {
1057 	struct mlx4_eq_table *eq_table = &mlx4_priv(dev)->eq_table;
1058 	struct mlx4_priv *priv = mlx4_priv(dev);
1059 	int	i, vec;
1060 
1061 	if (eq_table->have_irq)
1062 		free_irq(dev->pdev->irq, dev);
1063 
1064 	for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i)
1065 		if (eq_table->eq[i].have_irq) {
1066 			free_irq(eq_table->eq[i].irq, eq_table->eq + i);
1067 			eq_table->eq[i].have_irq = 0;
1068 		}
1069 
1070 	for (i = 0; i < dev->caps.comp_pool; i++) {
1071 		/*
1072 		 * Freeing the assigned irq's
1073 		 * all bits should be 0, but we need to validate
1074 		 */
1075 		if (priv->msix_ctl.pool_bm & 1ULL << i) {
1076 			/* NO need protecting*/
1077 			vec = dev->caps.num_comp_vectors + 1 + i;
1078 			free_irq(priv->eq_table.eq[vec].irq,
1079 				 &priv->eq_table.eq[vec]);
1080 		}
1081 	}
1082 
1083 
1084 	kfree(eq_table->irq_names);
1085 }
1086 
1087 static int mlx4_map_clr_int(struct mlx4_dev *dev)
1088 {
1089 	struct mlx4_priv *priv = mlx4_priv(dev);
1090 
1091 	priv->clr_base = ioremap(pci_resource_start(dev->pdev, priv->fw.clr_int_bar) +
1092 				 priv->fw.clr_int_base, MLX4_CLR_INT_SIZE);
1093 	if (!priv->clr_base) {
1094 		mlx4_err(dev, "Couldn't map interrupt clear register, aborting.\n");
1095 		return -ENOMEM;
1096 	}
1097 
1098 	return 0;
1099 }
1100 
1101 static void mlx4_unmap_clr_int(struct mlx4_dev *dev)
1102 {
1103 	struct mlx4_priv *priv = mlx4_priv(dev);
1104 
1105 	iounmap(priv->clr_base);
1106 }
1107 
1108 int mlx4_alloc_eq_table(struct mlx4_dev *dev)
1109 {
1110 	struct mlx4_priv *priv = mlx4_priv(dev);
1111 
1112 	priv->eq_table.eq = kcalloc(dev->caps.num_eqs - dev->caps.reserved_eqs,
1113 				    sizeof *priv->eq_table.eq, GFP_KERNEL);
1114 	if (!priv->eq_table.eq)
1115 		return -ENOMEM;
1116 
1117 	return 0;
1118 }
1119 
1120 void mlx4_free_eq_table(struct mlx4_dev *dev)
1121 {
1122 	kfree(mlx4_priv(dev)->eq_table.eq);
1123 }
1124 
1125 int mlx4_init_eq_table(struct mlx4_dev *dev)
1126 {
1127 	struct mlx4_priv *priv = mlx4_priv(dev);
1128 	int err;
1129 	int i;
1130 
1131 	priv->eq_table.uar_map = kcalloc(mlx4_num_eq_uar(dev),
1132 					 sizeof *priv->eq_table.uar_map,
1133 					 GFP_KERNEL);
1134 	if (!priv->eq_table.uar_map) {
1135 		err = -ENOMEM;
1136 		goto err_out_free;
1137 	}
1138 
1139 	err = mlx4_bitmap_init(&priv->eq_table.bitmap,
1140 				roundup_pow_of_two(dev->caps.num_eqs),
1141 				dev->caps.num_eqs - 1,
1142 				dev->caps.reserved_eqs,
1143 				roundup_pow_of_two(dev->caps.num_eqs) -
1144 					dev->caps.num_eqs);
1145 	if (err)
1146 		goto err_out_free;
1147 
1148 	for (i = 0; i < mlx4_num_eq_uar(dev); ++i)
1149 		priv->eq_table.uar_map[i] = NULL;
1150 
1151 	if (!mlx4_is_slave(dev)) {
1152 		err = mlx4_map_clr_int(dev);
1153 		if (err)
1154 			goto err_out_bitmap;
1155 
1156 		priv->eq_table.clr_mask =
1157 			swab32(1 << (priv->eq_table.inta_pin & 31));
1158 		priv->eq_table.clr_int  = priv->clr_base +
1159 			(priv->eq_table.inta_pin < 32 ? 4 : 0);
1160 	}
1161 
1162 	priv->eq_table.irq_names =
1163 		kmalloc(MLX4_IRQNAME_SIZE * (dev->caps.num_comp_vectors + 1 +
1164 					     dev->caps.comp_pool),
1165 			GFP_KERNEL);
1166 	if (!priv->eq_table.irq_names) {
1167 		err = -ENOMEM;
1168 		goto err_out_clr_int;
1169 	}
1170 
1171 	for (i = 0; i < dev->caps.num_comp_vectors; ++i) {
1172 		err = mlx4_create_eq(dev, dev->quotas.cq +
1173 					  MLX4_NUM_SPARE_EQE,
1174 				     (dev->flags & MLX4_FLAG_MSI_X) ? i : 0,
1175 				     &priv->eq_table.eq[i]);
1176 		if (err) {
1177 			--i;
1178 			goto err_out_unmap;
1179 		}
1180 	}
1181 
1182 	err = mlx4_create_eq(dev, MLX4_NUM_ASYNC_EQE + MLX4_NUM_SPARE_EQE,
1183 			     (dev->flags & MLX4_FLAG_MSI_X) ? dev->caps.num_comp_vectors : 0,
1184 			     &priv->eq_table.eq[dev->caps.num_comp_vectors]);
1185 	if (err)
1186 		goto err_out_comp;
1187 
1188 	/*if additional completion vectors poolsize is 0 this loop will not run*/
1189 	for (i = dev->caps.num_comp_vectors + 1;
1190 	      i < dev->caps.num_comp_vectors + dev->caps.comp_pool + 1; ++i) {
1191 
1192 		err = mlx4_create_eq(dev, dev->quotas.cq +
1193 					  MLX4_NUM_SPARE_EQE,
1194 				     (dev->flags & MLX4_FLAG_MSI_X) ? i : 0,
1195 				     &priv->eq_table.eq[i]);
1196 		if (err) {
1197 			--i;
1198 			goto err_out_unmap;
1199 		}
1200 	}
1201 
1202 
1203 	if (dev->flags & MLX4_FLAG_MSI_X) {
1204 		const char *eq_name;
1205 
1206 		for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) {
1207 			if (i < dev->caps.num_comp_vectors) {
1208 				snprintf(priv->eq_table.irq_names +
1209 					 i * MLX4_IRQNAME_SIZE,
1210 					 MLX4_IRQNAME_SIZE,
1211 					 "mlx4-comp-%d@pci:%s", i,
1212 					 pci_name(dev->pdev));
1213 			} else {
1214 				snprintf(priv->eq_table.irq_names +
1215 					 i * MLX4_IRQNAME_SIZE,
1216 					 MLX4_IRQNAME_SIZE,
1217 					 "mlx4-async@pci:%s",
1218 					 pci_name(dev->pdev));
1219 			}
1220 
1221 			eq_name = priv->eq_table.irq_names +
1222 				  i * MLX4_IRQNAME_SIZE;
1223 			err = request_irq(priv->eq_table.eq[i].irq,
1224 					  mlx4_msi_x_interrupt, 0, eq_name,
1225 					  priv->eq_table.eq + i);
1226 			if (err)
1227 				goto err_out_async;
1228 
1229 			priv->eq_table.eq[i].have_irq = 1;
1230 		}
1231 	} else {
1232 		snprintf(priv->eq_table.irq_names,
1233 			 MLX4_IRQNAME_SIZE,
1234 			 DRV_NAME "@pci:%s",
1235 			 pci_name(dev->pdev));
1236 		err = request_irq(dev->pdev->irq, mlx4_interrupt,
1237 				  IRQF_SHARED, priv->eq_table.irq_names, dev);
1238 		if (err)
1239 			goto err_out_async;
1240 
1241 		priv->eq_table.have_irq = 1;
1242 	}
1243 
1244 	err = mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 0,
1245 			  priv->eq_table.eq[dev->caps.num_comp_vectors].eqn);
1246 	if (err)
1247 		mlx4_warn(dev, "MAP_EQ for async EQ %d failed (%d)\n",
1248 			   priv->eq_table.eq[dev->caps.num_comp_vectors].eqn, err);
1249 
1250 	for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i)
1251 		eq_set_ci(&priv->eq_table.eq[i], 1);
1252 
1253 	return 0;
1254 
1255 err_out_async:
1256 	mlx4_free_eq(dev, &priv->eq_table.eq[dev->caps.num_comp_vectors]);
1257 
1258 err_out_comp:
1259 	i = dev->caps.num_comp_vectors - 1;
1260 
1261 err_out_unmap:
1262 	while (i >= 0) {
1263 		mlx4_free_eq(dev, &priv->eq_table.eq[i]);
1264 		--i;
1265 	}
1266 	mlx4_free_irqs(dev);
1267 
1268 err_out_clr_int:
1269 	if (!mlx4_is_slave(dev))
1270 		mlx4_unmap_clr_int(dev);
1271 
1272 err_out_bitmap:
1273 	mlx4_unmap_uar(dev);
1274 	mlx4_bitmap_cleanup(&priv->eq_table.bitmap);
1275 
1276 err_out_free:
1277 	kfree(priv->eq_table.uar_map);
1278 
1279 	return err;
1280 }
1281 
1282 void mlx4_cleanup_eq_table(struct mlx4_dev *dev)
1283 {
1284 	struct mlx4_priv *priv = mlx4_priv(dev);
1285 	int i;
1286 
1287 	mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 1,
1288 		    priv->eq_table.eq[dev->caps.num_comp_vectors].eqn);
1289 
1290 	mlx4_free_irqs(dev);
1291 
1292 	for (i = 0; i < dev->caps.num_comp_vectors + dev->caps.comp_pool + 1; ++i)
1293 		mlx4_free_eq(dev, &priv->eq_table.eq[i]);
1294 
1295 	if (!mlx4_is_slave(dev))
1296 		mlx4_unmap_clr_int(dev);
1297 
1298 	mlx4_unmap_uar(dev);
1299 	mlx4_bitmap_cleanup(&priv->eq_table.bitmap);
1300 
1301 	kfree(priv->eq_table.uar_map);
1302 }
1303 
1304 /* A test that verifies that we can accept interrupts on all
1305  * the irq vectors of the device.
1306  * Interrupts are checked using the NOP command.
1307  */
1308 int mlx4_test_interrupts(struct mlx4_dev *dev)
1309 {
1310 	struct mlx4_priv *priv = mlx4_priv(dev);
1311 	int i;
1312 	int err;
1313 
1314 	err = mlx4_NOP(dev);
1315 	/* When not in MSI_X, there is only one irq to check */
1316 	if (!(dev->flags & MLX4_FLAG_MSI_X) || mlx4_is_slave(dev))
1317 		return err;
1318 
1319 	/* A loop over all completion vectors, for each vector we will check
1320 	 * whether it works by mapping command completions to that vector
1321 	 * and performing a NOP command
1322 	 */
1323 	for(i = 0; !err && (i < dev->caps.num_comp_vectors); ++i) {
1324 		/* Temporary use polling for command completions */
1325 		mlx4_cmd_use_polling(dev);
1326 
1327 		/* Map the new eq to handle all asyncronous events */
1328 		err = mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 0,
1329 				  priv->eq_table.eq[i].eqn);
1330 		if (err) {
1331 			mlx4_warn(dev, "Failed mapping eq for interrupt test\n");
1332 			mlx4_cmd_use_events(dev);
1333 			break;
1334 		}
1335 
1336 		/* Go back to using events */
1337 		mlx4_cmd_use_events(dev);
1338 		err = mlx4_NOP(dev);
1339 	}
1340 
1341 	/* Return to default */
1342 	mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 0,
1343 		    priv->eq_table.eq[dev->caps.num_comp_vectors].eqn);
1344 	return err;
1345 }
1346 EXPORT_SYMBOL(mlx4_test_interrupts);
1347 
1348 int mlx4_assign_eq(struct mlx4_dev *dev, char* name, int * vector)
1349 {
1350 
1351 	struct mlx4_priv *priv = mlx4_priv(dev);
1352 	int vec = 0, err = 0, i;
1353 
1354 	mutex_lock(&priv->msix_ctl.pool_lock);
1355 	for (i = 0; !vec && i < dev->caps.comp_pool; i++) {
1356 		if (~priv->msix_ctl.pool_bm & 1ULL << i) {
1357 			priv->msix_ctl.pool_bm |= 1ULL << i;
1358 			vec = dev->caps.num_comp_vectors + 1 + i;
1359 			snprintf(priv->eq_table.irq_names +
1360 					vec * MLX4_IRQNAME_SIZE,
1361 					MLX4_IRQNAME_SIZE, "%s", name);
1362 			err = request_irq(priv->eq_table.eq[vec].irq,
1363 					  mlx4_msi_x_interrupt, 0,
1364 					  &priv->eq_table.irq_names[vec<<5],
1365 					  priv->eq_table.eq + vec);
1366 			if (err) {
1367 				/*zero out bit by fliping it*/
1368 				priv->msix_ctl.pool_bm ^= 1 << i;
1369 				vec = 0;
1370 				continue;
1371 				/*we dont want to break here*/
1372 			}
1373 			eq_set_ci(&priv->eq_table.eq[vec], 1);
1374 		}
1375 	}
1376 	mutex_unlock(&priv->msix_ctl.pool_lock);
1377 
1378 	if (vec) {
1379 		*vector = vec;
1380 	} else {
1381 		*vector = 0;
1382 		err = (i == dev->caps.comp_pool) ? -ENOSPC : err;
1383 	}
1384 	return err;
1385 }
1386 EXPORT_SYMBOL(mlx4_assign_eq);
1387 
1388 void mlx4_release_eq(struct mlx4_dev *dev, int vec)
1389 {
1390 	struct mlx4_priv *priv = mlx4_priv(dev);
1391 	/*bm index*/
1392 	int i = vec - dev->caps.num_comp_vectors - 1;
1393 
1394 	if (likely(i >= 0)) {
1395 		/*sanity check , making sure were not trying to free irq's
1396 		  Belonging to a legacy EQ*/
1397 		mutex_lock(&priv->msix_ctl.pool_lock);
1398 		if (priv->msix_ctl.pool_bm & 1ULL << i) {
1399 			free_irq(priv->eq_table.eq[vec].irq,
1400 				 &priv->eq_table.eq[vec]);
1401 			priv->msix_ctl.pool_bm &= ~(1ULL << i);
1402 		}
1403 		mutex_unlock(&priv->msix_ctl.pool_lock);
1404 	}
1405 
1406 }
1407 EXPORT_SYMBOL(mlx4_release_eq);
1408 
1409