xref: /freebsd/sys/dev/mlx5/mlx5_fpga/mlx5fpga_core.c (revision 1d386b48)
1 /*-
2  * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #include <linux/module.h>
34 #include <linux/etherdevice.h>
35 #include <dev/mlx5/driver.h>
36 #include <dev/mlx5/mlx5_core/mlx5_core.h>
37 #include <dev/mlx5/mlx5_lib/mlx5.h>
38 #include <dev/mlx5/mlx5_fpga/core.h>
39 #include <dev/mlx5/mlx5_fpga/conn.h>
40 #include <dev/mlx5/mlx5_fpga/trans.h>
41 
42 static LIST_HEAD(mlx5_fpga_devices);
43 static LIST_HEAD(mlx5_fpga_clients);
44 /* protects access between client un/registration and device add/remove calls */
45 static DEFINE_MUTEX(mlx5_fpga_mutex);
46 
47 static const char *const mlx5_fpga_error_strings[] = {
48 	"Null Syndrome",
49 	"Corrupted DDR",
50 	"Flash Timeout",
51 	"Internal Link Error",
52 	"Watchdog HW Failure",
53 	"I2C Failure",
54 	"Image Changed",
55 	"Temperature Critical",
56 };
57 
58 static const char * const mlx5_fpga_qp_error_strings[] = {
59 	"Null Syndrome",
60 	"Retry Counter Expired",
61 	"RNR Expired",
62 };
63 
64 static void client_context_destroy(struct mlx5_fpga_device *fdev,
65 				   struct mlx5_fpga_client_data *context)
66 {
67 	mlx5_fpga_dbg(fdev, "Deleting client context %p of client %p\n",
68 		      context, context->client);
69 	if (context->client->destroy)
70 		context->client->destroy(fdev);
71 	list_del(&context->list);
72 	kfree(context);
73 }
74 
75 static int client_context_create(struct mlx5_fpga_device *fdev,
76 				 struct mlx5_fpga_client *client,
77 				 struct mlx5_fpga_client_data **pctx)
78 {
79 	struct mlx5_fpga_client_data *context;
80 
81 	context = kmalloc(sizeof(*context), GFP_KERNEL);
82 	if (!context)
83 		return -ENOMEM;
84 
85 	context->client = client;
86 	context->data = NULL;
87 	context->added  = false;
88 	list_add(&context->list, &fdev->client_data_list);
89 
90 	mlx5_fpga_dbg(fdev, "Adding client context %p client %p\n",
91 		      context, client);
92 
93 	if (client->create)
94 		client->create(fdev);
95 
96 	if (pctx)
97 		*pctx = context;
98 	return 0;
99 }
100 
101 static struct mlx5_fpga_device *mlx5_fpga_device_alloc(void)
102 {
103 	struct mlx5_fpga_device *fdev = NULL;
104 
105 	fdev = kzalloc(sizeof(*fdev), GFP_KERNEL);
106 	if (!fdev)
107 		return NULL;
108 
109 	spin_lock_init(&fdev->state_lock);
110 	init_completion(&fdev->load_event);
111 	fdev->fdev_state = MLX5_FDEV_STATE_NONE;
112 	INIT_LIST_HEAD(&fdev->client_data_list);
113 	return fdev;
114 }
115 
116 static const char *mlx5_fpga_image_name(enum mlx5_fpga_image image)
117 {
118 	switch (image) {
119 	case MLX5_FPGA_IMAGE_USER:
120 		return "user";
121 	case MLX5_FPGA_IMAGE_FACTORY:
122 		return "factory";
123 	default:
124 		return "unknown";
125 	}
126 }
127 
128 static const char *mlx5_fpga_name(u32 fpga_id)
129 {
130 	static char ret[32];
131 
132 	switch (fpga_id) {
133 	case MLX5_FPGA_NEWTON:
134 		return "Newton";
135 	case MLX5_FPGA_EDISON:
136 		return "Edison";
137 	case MLX5_FPGA_MORSE:
138 		return "Morse";
139 	case MLX5_FPGA_MORSEQ:
140 		return "MorseQ";
141 	}
142 
143 	snprintf(ret, sizeof(ret), "Unknown %d", fpga_id);
144 	return ret;
145 }
146 
147 static int mlx5_fpga_device_load_check(struct mlx5_fpga_device *fdev)
148 {
149 	struct mlx5_fpga_query query;
150 	int err;
151 	u32 fpga_id;
152 
153 	err = mlx5_fpga_query(fdev->mdev, &query);
154 	if (err) {
155 		mlx5_fpga_err(fdev, "Failed to query status: %d\n", err);
156 		return err;
157 	}
158 
159 	fdev->last_admin_image = query.admin_image;
160 	fdev->last_oper_image = query.oper_image;
161 	fdev->image_status = query.image_status;
162 
163 	mlx5_fpga_info(fdev, "Status %u; Admin image %u; Oper image %u\n",
164 		      query.image_status, query.admin_image, query.oper_image);
165 
166 	/* For Morse projects FPGA has no influence to network functionality */
167 	fpga_id = MLX5_CAP_FPGA(fdev->mdev, fpga_id);
168 	if (fpga_id == MLX5_FPGA_MORSE || fpga_id == MLX5_FPGA_MORSEQ)
169 		return 0;
170 
171 	if (query.image_status != MLX5_FPGA_STATUS_SUCCESS) {
172 		mlx5_fpga_err(fdev, "%s image failed to load; status %u\n",
173 			      mlx5_fpga_image_name(fdev->last_oper_image),
174 			      query.image_status);
175 		return -EIO;
176 	}
177 
178 	return 0;
179 }
180 
181 static int mlx5_fpga_device_brb(struct mlx5_fpga_device *fdev)
182 {
183 	int err;
184 	struct mlx5_core_dev *mdev = fdev->mdev;
185 
186 	err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON);
187 	if (err) {
188 		mlx5_fpga_err(fdev, "Failed to set bypass on: %d\n", err);
189 		return err;
190 	}
191 	err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_RESET_SANDBOX);
192 	if (err) {
193 		mlx5_fpga_err(fdev, "Failed to reset SBU: %d\n", err);
194 		return err;
195 	}
196 	err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_OFF);
197 	if (err) {
198 		mlx5_fpga_err(fdev, "Failed to set bypass off: %d\n", err);
199 		return err;
200 	}
201 	return 0;
202 }
203 
204 int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
205 {
206 	struct mlx5_fpga_client_data *client_context;
207 	struct mlx5_fpga_device *fdev = mdev->fpga;
208 	struct mlx5_fpga_conn_attr conn_attr = {0};
209 	struct mlx5_fpga_conn *conn;
210 	unsigned int max_num_qps;
211 	unsigned long flags;
212 	u32 fpga_id;
213 	u32 vid;
214 	u16 pid;
215 	int err;
216 
217 	if (!fdev)
218 		return 0;
219 
220 	err = mlx5_fpga_caps(fdev->mdev);
221 	if (err)
222 		goto out;
223 
224 	err = mlx5_fpga_device_load_check(fdev);
225 	if (err)
226 		goto out;
227 
228 	fpga_id = MLX5_CAP_FPGA(fdev->mdev, fpga_id);
229 	mlx5_fpga_info(fdev, "FPGA card %s\n", mlx5_fpga_name(fpga_id));
230 
231 	if (fpga_id == MLX5_FPGA_MORSE || fpga_id == MLX5_FPGA_MORSEQ)
232 		goto out;
233 
234 	mlx5_fpga_info(fdev, "%s(%d) image, version %u; SBU %06x:%04x version %d\n",
235 		       mlx5_fpga_image_name(fdev->last_oper_image),
236 		       fdev->last_oper_image,
237 		       MLX5_CAP_FPGA(fdev->mdev, image_version),
238 		       MLX5_CAP_FPGA(fdev->mdev, ieee_vendor_id),
239 		       MLX5_CAP_FPGA(fdev->mdev, sandbox_product_id),
240 		       MLX5_CAP_FPGA(fdev->mdev, sandbox_product_version));
241 
242 	max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps);
243 	err = mlx5_core_reserve_gids(mdev, max_num_qps);
244 	if (err)
245 		goto out;
246 
247 #ifdef NOT_YET
248 	/* XXXKIB */
249 	err = mlx5_fpga_conn_device_init(fdev);
250 #else
251 	err = 0;
252 #endif
253 	if (err)
254 		goto err_rsvd_gid;
255 
256 	err = mlx5_fpga_trans_device_init(fdev);
257 	if (err) {
258 		mlx5_fpga_err(fdev, "Failed to init transaction: %d\n",
259 			      err);
260 		goto err_conn_init;
261 	}
262 
263 	conn_attr.tx_size = MLX5_FPGA_TID_COUNT;
264 	conn_attr.rx_size = MLX5_FPGA_TID_COUNT;
265 	conn_attr.recv_cb = mlx5_fpga_trans_recv;
266 	conn_attr.cb_arg = fdev;
267 #ifdef NOT_YET
268 	/* XXXKIB */
269 	conn = mlx5_fpga_conn_create(fdev, &conn_attr,
270 				     MLX5_FPGA_QPC_QP_TYPE_SHELL_QP);
271 	if (IS_ERR(conn)) {
272 		err = PTR_ERR(conn);
273 		mlx5_fpga_err(fdev, "Failed to create shell conn: %d\n", err);
274 		goto err_trans;
275 	}
276 #else
277 	conn = NULL;
278 #endif
279 	fdev->shell_conn = conn;
280 
281 	if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) {
282 		err = mlx5_fpga_device_brb(fdev);
283 		if (err)
284 			goto err_shell_conn;
285 
286 		vid = MLX5_CAP_FPGA(fdev->mdev, ieee_vendor_id);
287 		pid = MLX5_CAP_FPGA(fdev->mdev, sandbox_product_id);
288 		mutex_lock(&mlx5_fpga_mutex);
289 		list_for_each_entry(client_context, &fdev->client_data_list,
290 				    list) {
291 			if (client_context->client->add(fdev, vid, pid))
292 				continue;
293 			client_context->added = true;
294 		}
295 		mutex_unlock(&mlx5_fpga_mutex);
296 	}
297 
298 	goto out;
299 
300 err_shell_conn:
301 	if (fdev->shell_conn) {
302 #ifdef NOT_YET
303 		/* XXXKIB */
304 		mlx5_fpga_conn_destroy(fdev->shell_conn);
305 #endif
306 		fdev->shell_conn = NULL;
307 	}
308 
309 #ifdef NOT_YET
310 		/* XXXKIB */
311 err_trans:
312 #endif
313 	mlx5_fpga_trans_device_cleanup(fdev);
314 
315 err_conn_init:
316 #ifdef NOT_YET
317 	/* XXXKIB */
318 	mlx5_fpga_conn_device_cleanup(fdev);
319 #endif
320 
321 err_rsvd_gid:
322 	mlx5_core_unreserve_gids(mdev, max_num_qps);
323 out:
324 	spin_lock_irqsave(&fdev->state_lock, flags);
325 	fdev->fdev_state = err ? MLX5_FDEV_STATE_FAILURE : MLX5_FDEV_STATE_SUCCESS;
326 	spin_unlock_irqrestore(&fdev->state_lock, flags);
327 	return err;
328 }
329 
330 int mlx5_fpga_init(struct mlx5_core_dev *mdev)
331 {
332 	struct mlx5_fpga_device *fdev = NULL;
333 	struct mlx5_fpga_client *client;
334 
335 	if (!MLX5_CAP_GEN(mdev, fpga)) {
336 		mlx5_core_dbg(mdev, "FPGA capability not present\n");
337 		return 0;
338 	}
339 
340 	mlx5_core_dbg(mdev, "Initializing FPGA\n");
341 
342 	fdev = mlx5_fpga_device_alloc();
343 	if (!fdev)
344 		return -ENOMEM;
345 
346 	fdev->mdev = mdev;
347 	mdev->fpga = fdev;
348 
349 	mutex_lock(&mlx5_fpga_mutex);
350 
351 	list_add_tail(&fdev->list, &mlx5_fpga_devices);
352 	list_for_each_entry(client, &mlx5_fpga_clients, list)
353 		client_context_create(fdev, client, NULL);
354 
355 	mutex_unlock(&mlx5_fpga_mutex);
356 	return 0;
357 }
358 
359 void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev)
360 {
361 	struct mlx5_fpga_client_data *client_context;
362 	struct mlx5_fpga_device *fdev = mdev->fpga;
363 	unsigned int max_num_qps;
364 	unsigned long flags;
365 	int err;
366 	u32 fpga_id;
367 
368 	if (!fdev)
369 		return;
370 
371 	fpga_id = MLX5_CAP_FPGA(mdev, fpga_id);
372 	if (fpga_id == MLX5_FPGA_MORSE || fpga_id == MLX5_FPGA_MORSEQ)
373 		return;
374 
375 	spin_lock_irqsave(&fdev->state_lock, flags);
376 
377 	if (fdev->fdev_state != MLX5_FDEV_STATE_SUCCESS) {
378 		spin_unlock_irqrestore(&fdev->state_lock, flags);
379 		return;
380 	}
381 	fdev->fdev_state = MLX5_FDEV_STATE_NONE;
382 	spin_unlock_irqrestore(&fdev->state_lock, flags);
383 
384 	if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) {
385 		err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON);
386 		if (err)
387 			mlx5_fpga_err(fdev, "Failed to re-set SBU bypass on: %d\n",
388 				      err);
389 	}
390 
391 	mutex_lock(&mlx5_fpga_mutex);
392 	list_for_each_entry(client_context, &fdev->client_data_list, list) {
393 		if (!client_context->added)
394 			continue;
395 		client_context->client->remove(fdev);
396 		client_context->added = false;
397 	}
398 	mutex_unlock(&mlx5_fpga_mutex);
399 
400 	if (fdev->shell_conn) {
401 #ifdef NOT_YET
402 		/* XXXKIB */
403 		mlx5_fpga_conn_destroy(fdev->shell_conn);
404 #endif
405 		fdev->shell_conn = NULL;
406 		mlx5_fpga_trans_device_cleanup(fdev);
407 	}
408 #ifdef NOT_YET
409 	/* XXXKIB */
410 	mlx5_fpga_conn_device_cleanup(fdev);
411 #endif
412 	max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps);
413 	mlx5_core_unreserve_gids(mdev, max_num_qps);
414 }
415 
416 void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev)
417 {
418 	struct mlx5_fpga_client_data *context, *tmp;
419 	struct mlx5_fpga_device *fdev = mdev->fpga;
420 
421 	if (!fdev)
422 		return;
423 
424 	mutex_lock(&mlx5_fpga_mutex);
425 
426 	mlx5_fpga_device_stop(mdev);
427 
428 	list_for_each_entry_safe(context, tmp, &fdev->client_data_list, list)
429 		client_context_destroy(fdev, context);
430 
431 	list_del(&fdev->list);
432 	kfree(fdev);
433 	mdev->fpga = NULL;
434 
435 	mutex_unlock(&mlx5_fpga_mutex);
436 }
437 
438 static const char *mlx5_fpga_syndrome_to_string(u8 syndrome)
439 {
440 	if (syndrome < ARRAY_SIZE(mlx5_fpga_error_strings))
441 		return mlx5_fpga_error_strings[syndrome];
442 	return "Unknown";
443 }
444 
445 static const char *mlx5_fpga_qp_syndrome_to_string(u8 syndrome)
446 {
447 	if (syndrome < ARRAY_SIZE(mlx5_fpga_qp_error_strings))
448 		return mlx5_fpga_qp_error_strings[syndrome];
449 	return "Unknown";
450 }
451 
452 void mlx5_fpga_event(struct mlx5_core_dev *mdev, u8 event, void *data)
453 {
454 	struct mlx5_fpga_device *fdev = mdev->fpga;
455 	const char *event_name;
456 	bool teardown = false;
457 	unsigned long flags;
458 	u32 fpga_qpn;
459 	u8 syndrome;
460 
461 	switch (event) {
462 	case MLX5_EVENT_TYPE_FPGA_ERROR:
463 		syndrome = MLX5_GET(fpga_error_event, data, syndrome);
464 		event_name = mlx5_fpga_syndrome_to_string(syndrome);
465 		break;
466 	case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
467 		syndrome = MLX5_GET(fpga_qp_error_event, data, syndrome);
468 		event_name = mlx5_fpga_qp_syndrome_to_string(syndrome);
469 		fpga_qpn = MLX5_GET(fpga_qp_error_event, data, fpga_qpn);
470 		mlx5_fpga_err(fdev, "Error %u on QP %u: %s\n",
471 			      syndrome, fpga_qpn, event_name);
472 		break;
473 	default:
474 		mlx5_fpga_warn_ratelimited(fdev, "Unexpected event %u\n",
475 					   event);
476 		return;
477 	}
478 
479 	spin_lock_irqsave(&fdev->state_lock, flags);
480 	switch (fdev->fdev_state) {
481 	case MLX5_FDEV_STATE_SUCCESS:
482 		mlx5_fpga_warn(fdev, "Error %u: %s\n", syndrome, event_name);
483 		teardown = true;
484 		break;
485 	case MLX5_FDEV_STATE_IN_PROGRESS:
486 		if (syndrome != MLX5_FPGA_ERROR_EVENT_SYNDROME_IMAGE_CHANGED)
487 			mlx5_fpga_warn(fdev, "Error while loading %u: %s\n",
488 				       syndrome, event_name);
489 		complete(&fdev->load_event);
490 		break;
491 	default:
492 		mlx5_fpga_warn_ratelimited(fdev, "Unexpected error event %u: %s\n",
493 					   syndrome, event_name);
494 	}
495 	spin_unlock_irqrestore(&fdev->state_lock, flags);
496 	/* We tear-down the card's interfaces and functionality because
497 	 * the FPGA bump-on-the-wire is misbehaving and we lose ability
498 	 * to communicate with the network. User may still be able to
499 	 * recover by re-programming or debugging the FPGA
500 	 */
501 	if (teardown)
502 		mlx5_trigger_health_work(fdev->mdev);
503 }
504 
505 void mlx5_fpga_client_register(struct mlx5_fpga_client *client)
506 {
507 	struct mlx5_fpga_client_data *context;
508 	struct mlx5_fpga_device *fdev;
509 	bool call_add = false;
510 	unsigned long flags;
511 	u32 vid;
512 	u16 pid;
513 	int err;
514 
515 	pr_debug("Client register %s\n", client->name);
516 
517 	mutex_lock(&mlx5_fpga_mutex);
518 
519 	list_add_tail(&client->list, &mlx5_fpga_clients);
520 
521 	list_for_each_entry(fdev, &mlx5_fpga_devices, list) {
522 		err = client_context_create(fdev, client, &context);
523 		if (err)
524 			continue;
525 
526 		spin_lock_irqsave(&fdev->state_lock, flags);
527 		call_add = (fdev->fdev_state == MLX5_FDEV_STATE_SUCCESS);
528 		spin_unlock_irqrestore(&fdev->state_lock, flags);
529 
530 		if (call_add) {
531 			vid = MLX5_CAP_FPGA(fdev->mdev, ieee_vendor_id);
532 			pid = MLX5_CAP_FPGA(fdev->mdev, sandbox_product_id);
533 			if (!client->add(fdev, vid, pid))
534 				context->added = true;
535 		}
536 	}
537 
538 	mutex_unlock(&mlx5_fpga_mutex);
539 }
540 EXPORT_SYMBOL(mlx5_fpga_client_register);
541 
542 void mlx5_fpga_client_unregister(struct mlx5_fpga_client *client)
543 {
544 	struct mlx5_fpga_client_data *context, *tmp_context;
545 	struct mlx5_fpga_device *fdev;
546 
547 	pr_debug("Client unregister %s\n", client->name);
548 
549 	mutex_lock(&mlx5_fpga_mutex);
550 
551 	list_for_each_entry(fdev, &mlx5_fpga_devices, list) {
552 		list_for_each_entry_safe(context, tmp_context,
553 					 &fdev->client_data_list,
554 					 list) {
555 			if (context->client != client)
556 				continue;
557 			if (context->added)
558 				client->remove(fdev);
559 			client_context_destroy(fdev, context);
560 			break;
561 		}
562 	}
563 
564 	list_del(&client->list);
565 	mutex_unlock(&mlx5_fpga_mutex);
566 }
567 EXPORT_SYMBOL(mlx5_fpga_client_unregister);
568 
569 MODULE_DEPEND(mlx5fpga, linuxkpi, 1, 1, 1);
570 MODULE_DEPEND(mlx5fpga, mlx5, 1, 1, 1);
571 MODULE_VERSION(mlx5fpga, 1);
572