xref: /freebsd/sys/dev/mlx5/mlx5_fpga/mlx5fpga_core.c (revision 9768746b)
1 /*-
2  * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  *
32  * $FreeBSD$
33  */
34 
35 #include <linux/module.h>
36 #include <linux/etherdevice.h>
37 #include <dev/mlx5/driver.h>
38 #include <dev/mlx5/mlx5_core/mlx5_core.h>
39 #include <dev/mlx5/mlx5_lib/mlx5.h>
40 #include <dev/mlx5/mlx5_fpga/core.h>
41 #include <dev/mlx5/mlx5_fpga/conn.h>
42 #include <dev/mlx5/mlx5_fpga/trans.h>
43 
44 static LIST_HEAD(mlx5_fpga_devices);
45 static LIST_HEAD(mlx5_fpga_clients);
46 /* protects access between client un/registration and device add/remove calls */
47 static DEFINE_MUTEX(mlx5_fpga_mutex);
48 
49 static const char *const mlx5_fpga_error_strings[] = {
50 	"Null Syndrome",
51 	"Corrupted DDR",
52 	"Flash Timeout",
53 	"Internal Link Error",
54 	"Watchdog HW Failure",
55 	"I2C Failure",
56 	"Image Changed",
57 	"Temperature Critical",
58 };
59 
60 static const char * const mlx5_fpga_qp_error_strings[] = {
61 	"Null Syndrome",
62 	"Retry Counter Expired",
63 	"RNR Expired",
64 };
65 
66 static void client_context_destroy(struct mlx5_fpga_device *fdev,
67 				   struct mlx5_fpga_client_data *context)
68 {
69 	mlx5_fpga_dbg(fdev, "Deleting client context %p of client %p\n",
70 		      context, context->client);
71 	if (context->client->destroy)
72 		context->client->destroy(fdev);
73 	list_del(&context->list);
74 	kfree(context);
75 }
76 
77 static int client_context_create(struct mlx5_fpga_device *fdev,
78 				 struct mlx5_fpga_client *client,
79 				 struct mlx5_fpga_client_data **pctx)
80 {
81 	struct mlx5_fpga_client_data *context;
82 
83 	context = kmalloc(sizeof(*context), GFP_KERNEL);
84 	if (!context)
85 		return -ENOMEM;
86 
87 	context->client = client;
88 	context->data = NULL;
89 	context->added  = false;
90 	list_add(&context->list, &fdev->client_data_list);
91 
92 	mlx5_fpga_dbg(fdev, "Adding client context %p client %p\n",
93 		      context, client);
94 
95 	if (client->create)
96 		client->create(fdev);
97 
98 	if (pctx)
99 		*pctx = context;
100 	return 0;
101 }
102 
103 static struct mlx5_fpga_device *mlx5_fpga_device_alloc(void)
104 {
105 	struct mlx5_fpga_device *fdev = NULL;
106 
107 	fdev = kzalloc(sizeof(*fdev), GFP_KERNEL);
108 	if (!fdev)
109 		return NULL;
110 
111 	spin_lock_init(&fdev->state_lock);
112 	init_completion(&fdev->load_event);
113 	fdev->fdev_state = MLX5_FDEV_STATE_NONE;
114 	INIT_LIST_HEAD(&fdev->client_data_list);
115 	return fdev;
116 }
117 
118 static const char *mlx5_fpga_image_name(enum mlx5_fpga_image image)
119 {
120 	switch (image) {
121 	case MLX5_FPGA_IMAGE_USER:
122 		return "user";
123 	case MLX5_FPGA_IMAGE_FACTORY:
124 		return "factory";
125 	default:
126 		return "unknown";
127 	}
128 }
129 
130 static const char *mlx5_fpga_name(u32 fpga_id)
131 {
132 	static char ret[32];
133 
134 	switch (fpga_id) {
135 	case MLX5_FPGA_NEWTON:
136 		return "Newton";
137 	case MLX5_FPGA_EDISON:
138 		return "Edison";
139 	case MLX5_FPGA_MORSE:
140 		return "Morse";
141 	case MLX5_FPGA_MORSEQ:
142 		return "MorseQ";
143 	}
144 
145 	snprintf(ret, sizeof(ret), "Unknown %d", fpga_id);
146 	return ret;
147 }
148 
149 static int mlx5_fpga_device_load_check(struct mlx5_fpga_device *fdev)
150 {
151 	struct mlx5_fpga_query query;
152 	int err;
153 	u32 fpga_id;
154 
155 	err = mlx5_fpga_query(fdev->mdev, &query);
156 	if (err) {
157 		mlx5_fpga_err(fdev, "Failed to query status: %d\n", err);
158 		return err;
159 	}
160 
161 	fdev->last_admin_image = query.admin_image;
162 	fdev->last_oper_image = query.oper_image;
163 	fdev->image_status = query.image_status;
164 
165 	mlx5_fpga_info(fdev, "Status %u; Admin image %u; Oper image %u\n",
166 		      query.image_status, query.admin_image, query.oper_image);
167 
168 	/* For Morse projects FPGA has no influence to network functionality */
169 	fpga_id = MLX5_CAP_FPGA(fdev->mdev, fpga_id);
170 	if (fpga_id == MLX5_FPGA_MORSE || fpga_id == MLX5_FPGA_MORSEQ)
171 		return 0;
172 
173 	if (query.image_status != MLX5_FPGA_STATUS_SUCCESS) {
174 		mlx5_fpga_err(fdev, "%s image failed to load; status %u\n",
175 			      mlx5_fpga_image_name(fdev->last_oper_image),
176 			      query.image_status);
177 		return -EIO;
178 	}
179 
180 	return 0;
181 }
182 
183 static int mlx5_fpga_device_brb(struct mlx5_fpga_device *fdev)
184 {
185 	int err;
186 	struct mlx5_core_dev *mdev = fdev->mdev;
187 
188 	err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON);
189 	if (err) {
190 		mlx5_fpga_err(fdev, "Failed to set bypass on: %d\n", err);
191 		return err;
192 	}
193 	err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_RESET_SANDBOX);
194 	if (err) {
195 		mlx5_fpga_err(fdev, "Failed to reset SBU: %d\n", err);
196 		return err;
197 	}
198 	err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_OFF);
199 	if (err) {
200 		mlx5_fpga_err(fdev, "Failed to set bypass off: %d\n", err);
201 		return err;
202 	}
203 	return 0;
204 }
205 
206 int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
207 {
208 	struct mlx5_fpga_client_data *client_context;
209 	struct mlx5_fpga_device *fdev = mdev->fpga;
210 	struct mlx5_fpga_conn_attr conn_attr = {0};
211 	struct mlx5_fpga_conn *conn;
212 	unsigned int max_num_qps;
213 	unsigned long flags;
214 	u32 fpga_id;
215 	u32 vid;
216 	u16 pid;
217 	int err;
218 
219 	if (!fdev)
220 		return 0;
221 
222 	err = mlx5_fpga_caps(fdev->mdev);
223 	if (err)
224 		goto out;
225 
226 	err = mlx5_fpga_device_load_check(fdev);
227 	if (err)
228 		goto out;
229 
230 	fpga_id = MLX5_CAP_FPGA(fdev->mdev, fpga_id);
231 	mlx5_fpga_info(fdev, "FPGA card %s\n", mlx5_fpga_name(fpga_id));
232 
233 	if (fpga_id == MLX5_FPGA_MORSE || fpga_id == MLX5_FPGA_MORSEQ)
234 		goto out;
235 
236 	mlx5_fpga_info(fdev, "%s(%d) image, version %u; SBU %06x:%04x version %d\n",
237 		       mlx5_fpga_image_name(fdev->last_oper_image),
238 		       fdev->last_oper_image,
239 		       MLX5_CAP_FPGA(fdev->mdev, image_version),
240 		       MLX5_CAP_FPGA(fdev->mdev, ieee_vendor_id),
241 		       MLX5_CAP_FPGA(fdev->mdev, sandbox_product_id),
242 		       MLX5_CAP_FPGA(fdev->mdev, sandbox_product_version));
243 
244 	max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps);
245 	err = mlx5_core_reserve_gids(mdev, max_num_qps);
246 	if (err)
247 		goto out;
248 
249 #ifdef NOT_YET
250 	/* XXXKIB */
251 	err = mlx5_fpga_conn_device_init(fdev);
252 #else
253 	err = 0;
254 #endif
255 	if (err)
256 		goto err_rsvd_gid;
257 
258 	err = mlx5_fpga_trans_device_init(fdev);
259 	if (err) {
260 		mlx5_fpga_err(fdev, "Failed to init transaction: %d\n",
261 			      err);
262 		goto err_conn_init;
263 	}
264 
265 	conn_attr.tx_size = MLX5_FPGA_TID_COUNT;
266 	conn_attr.rx_size = MLX5_FPGA_TID_COUNT;
267 	conn_attr.recv_cb = mlx5_fpga_trans_recv;
268 	conn_attr.cb_arg = fdev;
269 #ifdef NOT_YET
270 	/* XXXKIB */
271 	conn = mlx5_fpga_conn_create(fdev, &conn_attr,
272 				     MLX5_FPGA_QPC_QP_TYPE_SHELL_QP);
273 	if (IS_ERR(conn)) {
274 		err = PTR_ERR(conn);
275 		mlx5_fpga_err(fdev, "Failed to create shell conn: %d\n", err);
276 		goto err_trans;
277 	}
278 #else
279 	conn = NULL;
280 #endif
281 	fdev->shell_conn = conn;
282 
283 	if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) {
284 		err = mlx5_fpga_device_brb(fdev);
285 		if (err)
286 			goto err_shell_conn;
287 
288 		vid = MLX5_CAP_FPGA(fdev->mdev, ieee_vendor_id);
289 		pid = MLX5_CAP_FPGA(fdev->mdev, sandbox_product_id);
290 		mutex_lock(&mlx5_fpga_mutex);
291 		list_for_each_entry(client_context, &fdev->client_data_list,
292 				    list) {
293 			if (client_context->client->add(fdev, vid, pid))
294 				continue;
295 			client_context->added = true;
296 		}
297 		mutex_unlock(&mlx5_fpga_mutex);
298 	}
299 
300 	goto out;
301 
302 err_shell_conn:
303 	if (fdev->shell_conn) {
304 #ifdef NOT_YET
305 		/* XXXKIB */
306 		mlx5_fpga_conn_destroy(fdev->shell_conn);
307 #endif
308 		fdev->shell_conn = NULL;
309 	}
310 
311 #ifdef NOT_YET
312 		/* XXXKIB */
313 err_trans:
314 #endif
315 	mlx5_fpga_trans_device_cleanup(fdev);
316 
317 err_conn_init:
318 #ifdef NOT_YET
319 	/* XXXKIB */
320 	mlx5_fpga_conn_device_cleanup(fdev);
321 #endif
322 
323 err_rsvd_gid:
324 	mlx5_core_unreserve_gids(mdev, max_num_qps);
325 out:
326 	spin_lock_irqsave(&fdev->state_lock, flags);
327 	fdev->fdev_state = err ? MLX5_FDEV_STATE_FAILURE : MLX5_FDEV_STATE_SUCCESS;
328 	spin_unlock_irqrestore(&fdev->state_lock, flags);
329 	return err;
330 }
331 
332 int mlx5_fpga_init(struct mlx5_core_dev *mdev)
333 {
334 	struct mlx5_fpga_device *fdev = NULL;
335 	struct mlx5_fpga_client *client;
336 
337 	if (!MLX5_CAP_GEN(mdev, fpga)) {
338 		mlx5_core_dbg(mdev, "FPGA capability not present\n");
339 		return 0;
340 	}
341 
342 	mlx5_core_dbg(mdev, "Initializing FPGA\n");
343 
344 	fdev = mlx5_fpga_device_alloc();
345 	if (!fdev)
346 		return -ENOMEM;
347 
348 	fdev->mdev = mdev;
349 	mdev->fpga = fdev;
350 
351 	mutex_lock(&mlx5_fpga_mutex);
352 
353 	list_add_tail(&fdev->list, &mlx5_fpga_devices);
354 	list_for_each_entry(client, &mlx5_fpga_clients, list)
355 		client_context_create(fdev, client, NULL);
356 
357 	mutex_unlock(&mlx5_fpga_mutex);
358 	return 0;
359 }
360 
361 void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev)
362 {
363 	struct mlx5_fpga_client_data *client_context;
364 	struct mlx5_fpga_device *fdev = mdev->fpga;
365 	unsigned int max_num_qps;
366 	unsigned long flags;
367 	int err;
368 	u32 fpga_id;
369 
370 	if (!fdev)
371 		return;
372 
373 	fpga_id = MLX5_CAP_FPGA(mdev, fpga_id);
374 	if (fpga_id == MLX5_FPGA_MORSE || fpga_id == MLX5_FPGA_MORSEQ)
375 		return;
376 
377 	spin_lock_irqsave(&fdev->state_lock, flags);
378 
379 	if (fdev->fdev_state != MLX5_FDEV_STATE_SUCCESS) {
380 		spin_unlock_irqrestore(&fdev->state_lock, flags);
381 		return;
382 	}
383 	fdev->fdev_state = MLX5_FDEV_STATE_NONE;
384 	spin_unlock_irqrestore(&fdev->state_lock, flags);
385 
386 	if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) {
387 		err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON);
388 		if (err)
389 			mlx5_fpga_err(fdev, "Failed to re-set SBU bypass on: %d\n",
390 				      err);
391 	}
392 
393 	mutex_lock(&mlx5_fpga_mutex);
394 	list_for_each_entry(client_context, &fdev->client_data_list, list) {
395 		if (!client_context->added)
396 			continue;
397 		client_context->client->remove(fdev);
398 		client_context->added = false;
399 	}
400 	mutex_unlock(&mlx5_fpga_mutex);
401 
402 	if (fdev->shell_conn) {
403 #ifdef NOT_YET
404 		/* XXXKIB */
405 		mlx5_fpga_conn_destroy(fdev->shell_conn);
406 #endif
407 		fdev->shell_conn = NULL;
408 		mlx5_fpga_trans_device_cleanup(fdev);
409 	}
410 #ifdef NOT_YET
411 	/* XXXKIB */
412 	mlx5_fpga_conn_device_cleanup(fdev);
413 #endif
414 	max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps);
415 	mlx5_core_unreserve_gids(mdev, max_num_qps);
416 }
417 
418 void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev)
419 {
420 	struct mlx5_fpga_client_data *context, *tmp;
421 	struct mlx5_fpga_device *fdev = mdev->fpga;
422 
423 	if (!fdev)
424 		return;
425 
426 	mutex_lock(&mlx5_fpga_mutex);
427 
428 	mlx5_fpga_device_stop(mdev);
429 
430 	list_for_each_entry_safe(context, tmp, &fdev->client_data_list, list)
431 		client_context_destroy(fdev, context);
432 
433 	list_del(&fdev->list);
434 	kfree(fdev);
435 	mdev->fpga = NULL;
436 
437 	mutex_unlock(&mlx5_fpga_mutex);
438 }
439 
440 static const char *mlx5_fpga_syndrome_to_string(u8 syndrome)
441 {
442 	if (syndrome < ARRAY_SIZE(mlx5_fpga_error_strings))
443 		return mlx5_fpga_error_strings[syndrome];
444 	return "Unknown";
445 }
446 
447 static const char *mlx5_fpga_qp_syndrome_to_string(u8 syndrome)
448 {
449 	if (syndrome < ARRAY_SIZE(mlx5_fpga_qp_error_strings))
450 		return mlx5_fpga_qp_error_strings[syndrome];
451 	return "Unknown";
452 }
453 
454 void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data)
455 {
456 	struct mlx5_fpga_device *fdev = mdev->fpga;
457 	const char *event_name;
458 	bool teardown = false;
459 	unsigned long flags;
460 	u32 fpga_qpn;
461 	u8 syndrome;
462 
463 	switch (event) {
464 	case MLX5_EVENT_TYPE_FPGA_ERROR:
465 		syndrome = MLX5_GET(fpga_error_event, data, syndrome);
466 		event_name = mlx5_fpga_syndrome_to_string(syndrome);
467 		break;
468 	case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
469 		syndrome = MLX5_GET(fpga_qp_error_event, data, syndrome);
470 		event_name = mlx5_fpga_qp_syndrome_to_string(syndrome);
471 		fpga_qpn = MLX5_GET(fpga_qp_error_event, data, fpga_qpn);
472 		mlx5_fpga_err(fdev, "Error %u on QP %u: %s\n",
473 			      syndrome, fpga_qpn, event_name);
474 		break;
475 	default:
476 		mlx5_fpga_warn_ratelimited(fdev, "Unexpected event %u\n",
477 					   event);
478 		return;
479 	}
480 
481 	spin_lock_irqsave(&fdev->state_lock, flags);
482 	switch (fdev->fdev_state) {
483 	case MLX5_FDEV_STATE_SUCCESS:
484 		mlx5_fpga_warn(fdev, "Error %u: %s\n", syndrome, event_name);
485 		teardown = true;
486 		break;
487 	case MLX5_FDEV_STATE_IN_PROGRESS:
488 		if (syndrome != MLX5_FPGA_ERROR_EVENT_SYNDROME_IMAGE_CHANGED)
489 			mlx5_fpga_warn(fdev, "Error while loading %u: %s\n",
490 				       syndrome, event_name);
491 		complete(&fdev->load_event);
492 		break;
493 	default:
494 		mlx5_fpga_warn_ratelimited(fdev, "Unexpected error event %u: %s\n",
495 					   syndrome, event_name);
496 	}
497 	spin_unlock_irqrestore(&fdev->state_lock, flags);
498 	/* We tear-down the card's interfaces and functionality because
499 	 * the FPGA bump-on-the-wire is misbehaving and we lose ability
500 	 * to communicate with the network. User may still be able to
501 	 * recover by re-programming or debugging the FPGA
502 	 */
503 	if (teardown)
504 		mlx5_trigger_health_work(fdev->mdev);
505 }
506 
507 void mlx5_fpga_client_register(struct mlx5_fpga_client *client)
508 {
509 	struct mlx5_fpga_client_data *context;
510 	struct mlx5_fpga_device *fdev;
511 	bool call_add = false;
512 	unsigned long flags;
513 	u32 vid;
514 	u16 pid;
515 	int err;
516 
517 	pr_debug("Client register %s\n", client->name);
518 
519 	mutex_lock(&mlx5_fpga_mutex);
520 
521 	list_add_tail(&client->list, &mlx5_fpga_clients);
522 
523 	list_for_each_entry(fdev, &mlx5_fpga_devices, list) {
524 		err = client_context_create(fdev, client, &context);
525 		if (err)
526 			continue;
527 
528 		spin_lock_irqsave(&fdev->state_lock, flags);
529 		call_add = (fdev->fdev_state == MLX5_FDEV_STATE_SUCCESS);
530 		spin_unlock_irqrestore(&fdev->state_lock, flags);
531 
532 		if (call_add) {
533 			vid = MLX5_CAP_FPGA(fdev->mdev, ieee_vendor_id);
534 			pid = MLX5_CAP_FPGA(fdev->mdev, sandbox_product_id);
535 			if (!client->add(fdev, vid, pid))
536 				context->added = true;
537 		}
538 	}
539 
540 	mutex_unlock(&mlx5_fpga_mutex);
541 }
542 EXPORT_SYMBOL(mlx5_fpga_client_register);
543 
544 void mlx5_fpga_client_unregister(struct mlx5_fpga_client *client)
545 {
546 	struct mlx5_fpga_client_data *context, *tmp_context;
547 	struct mlx5_fpga_device *fdev;
548 
549 	pr_debug("Client unregister %s\n", client->name);
550 
551 	mutex_lock(&mlx5_fpga_mutex);
552 
553 	list_for_each_entry(fdev, &mlx5_fpga_devices, list) {
554 		list_for_each_entry_safe(context, tmp_context,
555 					 &fdev->client_data_list,
556 					 list) {
557 			if (context->client != client)
558 				continue;
559 			if (context->added)
560 				client->remove(fdev);
561 			client_context_destroy(fdev, context);
562 			break;
563 		}
564 	}
565 
566 	list_del(&client->list);
567 	mutex_unlock(&mlx5_fpga_mutex);
568 }
569 EXPORT_SYMBOL(mlx5_fpga_client_unregister);
570 
571 MODULE_DEPEND(mlx5fpga, linuxkpi, 1, 1, 1);
572 MODULE_DEPEND(mlx5fpga, mlx5, 1, 1, 1);
573 MODULE_VERSION(mlx5fpga, 1);
574