1 /*
2  * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #include <linux/module.h>
34 #include <linux/etherdevice.h>
35 #include <linux/mlx5/driver.h>
36 
37 #include "mlx5_core.h"
38 #include "lib/mlx5.h"
39 #include "lib/eq.h"
40 #include "fpga/core.h"
41 #include "fpga/conn.h"
42 
43 static const char *const mlx5_fpga_error_strings[] = {
44 	"Null Syndrome",
45 	"Corrupted DDR",
46 	"Flash Timeout",
47 	"Internal Link Error",
48 	"Watchdog HW Failure",
49 	"I2C Failure",
50 	"Image Changed",
51 	"Temperature Critical",
52 };
53 
54 static const char * const mlx5_fpga_qp_error_strings[] = {
55 	"Null Syndrome",
56 	"Retry Counter Expired",
57 	"RNR Expired",
58 };
59 static struct mlx5_fpga_device *mlx5_fpga_device_alloc(void)
60 {
61 	struct mlx5_fpga_device *fdev = NULL;
62 
63 	fdev = kzalloc(sizeof(*fdev), GFP_KERNEL);
64 	if (!fdev)
65 		return NULL;
66 
67 	spin_lock_init(&fdev->state_lock);
68 	fdev->state = MLX5_FPGA_STATUS_NONE;
69 	return fdev;
70 }
71 
72 static const char *mlx5_fpga_image_name(enum mlx5_fpga_image image)
73 {
74 	switch (image) {
75 	case MLX5_FPGA_IMAGE_USER:
76 		return "user";
77 	case MLX5_FPGA_IMAGE_FACTORY:
78 		return "factory";
79 	default:
80 		return "unknown";
81 	}
82 }
83 
84 static const char *mlx5_fpga_device_name(u32 device)
85 {
86 	switch (device) {
87 	case MLX5_FPGA_DEVICE_KU040:
88 		return "ku040";
89 	case MLX5_FPGA_DEVICE_KU060:
90 		return "ku060";
91 	case MLX5_FPGA_DEVICE_KU060_2:
92 		return "ku060_2";
93 	case MLX5_FPGA_DEVICE_UNKNOWN:
94 	default:
95 		return "unknown";
96 	}
97 }
98 
99 static int mlx5_fpga_device_load_check(struct mlx5_fpga_device *fdev)
100 {
101 	struct mlx5_fpga_query query;
102 	int err;
103 
104 	err = mlx5_fpga_query(fdev->mdev, &query);
105 	if (err) {
106 		mlx5_fpga_err(fdev, "Failed to query status: %d\n", err);
107 		return err;
108 	}
109 
110 	fdev->last_admin_image = query.admin_image;
111 	fdev->last_oper_image = query.oper_image;
112 
113 	mlx5_fpga_dbg(fdev, "Status %u; Admin image %u; Oper image %u\n",
114 		      query.status, query.admin_image, query.oper_image);
115 
116 	if (query.status != MLX5_FPGA_STATUS_SUCCESS) {
117 		mlx5_fpga_err(fdev, "%s image failed to load; status %u\n",
118 			      mlx5_fpga_image_name(fdev->last_oper_image),
119 			      query.status);
120 		return -EIO;
121 	}
122 
123 	return 0;
124 }
125 
126 static int mlx5_fpga_device_brb(struct mlx5_fpga_device *fdev)
127 {
128 	int err;
129 	struct mlx5_core_dev *mdev = fdev->mdev;
130 
131 	err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON);
132 	if (err) {
133 		mlx5_fpga_err(fdev, "Failed to set bypass on: %d\n", err);
134 		return err;
135 	}
136 	err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_RESET_SANDBOX);
137 	if (err) {
138 		mlx5_fpga_err(fdev, "Failed to reset SBU: %d\n", err);
139 		return err;
140 	}
141 	err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_OFF);
142 	if (err) {
143 		mlx5_fpga_err(fdev, "Failed to set bypass off: %d\n", err);
144 		return err;
145 	}
146 	return 0;
147 }
148 
149 static int mlx5_fpga_event(struct mlx5_fpga_device *, unsigned long, void *);
150 
151 static int fpga_err_event(struct notifier_block *nb, unsigned long event, void *eqe)
152 {
153 	struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_err_nb);
154 
155 	return mlx5_fpga_event(fdev, event, eqe);
156 }
157 
158 static int fpga_qp_err_event(struct notifier_block *nb, unsigned long event, void *eqe)
159 {
160 	struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_qp_err_nb);
161 
162 	return mlx5_fpga_event(fdev, event, eqe);
163 }
164 
165 int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
166 {
167 	struct mlx5_fpga_device *fdev = mdev->fpga;
168 	unsigned int max_num_qps;
169 	unsigned long flags;
170 	u32 fpga_device_id;
171 	int err;
172 
173 	if (!fdev)
174 		return 0;
175 
176 	err = mlx5_fpga_device_load_check(fdev);
177 	if (err)
178 		goto out;
179 
180 	err = mlx5_fpga_caps(fdev->mdev);
181 	if (err)
182 		goto out;
183 
184 	fpga_device_id = MLX5_CAP_FPGA(fdev->mdev, fpga_device);
185 	mlx5_fpga_info(fdev, "%s:%u; %s image, version %u; SBU %06x:%04x version %d\n",
186 		       mlx5_fpga_device_name(fpga_device_id),
187 		       fpga_device_id,
188 		       mlx5_fpga_image_name(fdev->last_oper_image),
189 		       MLX5_CAP_FPGA(fdev->mdev, image_version),
190 		       MLX5_CAP_FPGA(fdev->mdev, ieee_vendor_id),
191 		       MLX5_CAP_FPGA(fdev->mdev, sandbox_product_id),
192 		       MLX5_CAP_FPGA(fdev->mdev, sandbox_product_version));
193 
194 	max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps);
195 	if (!max_num_qps) {
196 		mlx5_fpga_err(fdev, "FPGA reports 0 QPs in SHELL_CAPS\n");
197 		err = -ENOTSUPP;
198 		goto out;
199 	}
200 
201 	err = mlx5_core_reserve_gids(mdev, max_num_qps);
202 	if (err)
203 		goto out;
204 
205 	MLX5_NB_INIT(&fdev->fpga_err_nb, fpga_err_event, FPGA_ERROR);
206 	MLX5_NB_INIT(&fdev->fpga_qp_err_nb, fpga_qp_err_event, FPGA_QP_ERROR);
207 	mlx5_eq_notifier_register(fdev->mdev, &fdev->fpga_err_nb);
208 	mlx5_eq_notifier_register(fdev->mdev, &fdev->fpga_qp_err_nb);
209 
210 	err = mlx5_fpga_conn_device_init(fdev);
211 	if (err)
212 		goto err_rsvd_gid;
213 
214 	if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) {
215 		err = mlx5_fpga_device_brb(fdev);
216 		if (err)
217 			goto err_conn_init;
218 	}
219 
220 	goto out;
221 
222 err_conn_init:
223 	mlx5_fpga_conn_device_cleanup(fdev);
224 
225 err_rsvd_gid:
226 	mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_err_nb);
227 	mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_qp_err_nb);
228 	mlx5_core_unreserve_gids(mdev, max_num_qps);
229 out:
230 	spin_lock_irqsave(&fdev->state_lock, flags);
231 	fdev->state = err ? MLX5_FPGA_STATUS_FAILURE : MLX5_FPGA_STATUS_SUCCESS;
232 	spin_unlock_irqrestore(&fdev->state_lock, flags);
233 	return err;
234 }
235 
236 int mlx5_fpga_init(struct mlx5_core_dev *mdev)
237 {
238 	struct mlx5_fpga_device *fdev = NULL;
239 
240 	if (!MLX5_CAP_GEN(mdev, fpga)) {
241 		mlx5_core_dbg(mdev, "FPGA capability not present\n");
242 		return 0;
243 	}
244 
245 	mlx5_core_dbg(mdev, "Initializing FPGA\n");
246 
247 	fdev = mlx5_fpga_device_alloc();
248 	if (!fdev)
249 		return -ENOMEM;
250 
251 	fdev->mdev = mdev;
252 	mdev->fpga = fdev;
253 
254 	return 0;
255 }
256 
257 void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev)
258 {
259 	struct mlx5_fpga_device *fdev = mdev->fpga;
260 	unsigned int max_num_qps;
261 	unsigned long flags;
262 	int err;
263 
264 	if (!fdev)
265 		return;
266 
267 	spin_lock_irqsave(&fdev->state_lock, flags);
268 	if (fdev->state != MLX5_FPGA_STATUS_SUCCESS) {
269 		spin_unlock_irqrestore(&fdev->state_lock, flags);
270 		return;
271 	}
272 	fdev->state = MLX5_FPGA_STATUS_NONE;
273 	spin_unlock_irqrestore(&fdev->state_lock, flags);
274 
275 	if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) {
276 		err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON);
277 		if (err)
278 			mlx5_fpga_err(fdev, "Failed to re-set SBU bypass on: %d\n",
279 				      err);
280 	}
281 
282 	mlx5_fpga_conn_device_cleanup(fdev);
283 	mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_err_nb);
284 	mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_qp_err_nb);
285 
286 	max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps);
287 	mlx5_core_unreserve_gids(mdev, max_num_qps);
288 }
289 
290 void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev)
291 {
292 	struct mlx5_fpga_device *fdev = mdev->fpga;
293 
294 	mlx5_fpga_device_stop(mdev);
295 	kfree(fdev);
296 	mdev->fpga = NULL;
297 }
298 
299 static const char *mlx5_fpga_syndrome_to_string(u8 syndrome)
300 {
301 	if (syndrome < ARRAY_SIZE(mlx5_fpga_error_strings))
302 		return mlx5_fpga_error_strings[syndrome];
303 	return "Unknown";
304 }
305 
306 static const char *mlx5_fpga_qp_syndrome_to_string(u8 syndrome)
307 {
308 	if (syndrome < ARRAY_SIZE(mlx5_fpga_qp_error_strings))
309 		return mlx5_fpga_qp_error_strings[syndrome];
310 	return "Unknown";
311 }
312 
313 static int mlx5_fpga_event(struct mlx5_fpga_device *fdev,
314 			   unsigned long event, void *eqe)
315 {
316 	void *data = ((struct mlx5_eqe *)eqe)->data.raw;
317 	const char *event_name;
318 	bool teardown = false;
319 	unsigned long flags;
320 	u8 syndrome;
321 
322 	switch (event) {
323 	case MLX5_EVENT_TYPE_FPGA_ERROR:
324 		syndrome = MLX5_GET(fpga_error_event, data, syndrome);
325 		event_name = mlx5_fpga_syndrome_to_string(syndrome);
326 		break;
327 	case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
328 		syndrome = MLX5_GET(fpga_qp_error_event, data, syndrome);
329 		event_name = mlx5_fpga_qp_syndrome_to_string(syndrome);
330 		break;
331 	default:
332 		return NOTIFY_DONE;
333 	}
334 
335 	spin_lock_irqsave(&fdev->state_lock, flags);
336 	switch (fdev->state) {
337 	case MLX5_FPGA_STATUS_SUCCESS:
338 		mlx5_fpga_warn(fdev, "Error %u: %s\n", syndrome, event_name);
339 		teardown = true;
340 		break;
341 	default:
342 		mlx5_fpga_warn_ratelimited(fdev, "Unexpected error event %u: %s\n",
343 					   syndrome, event_name);
344 	}
345 	spin_unlock_irqrestore(&fdev->state_lock, flags);
346 	/* We tear-down the card's interfaces and functionality because
347 	 * the FPGA bump-on-the-wire is misbehaving and we lose ability
348 	 * to communicate with the network. User may still be able to
349 	 * recover by re-programming or debugging the FPGA
350 	 */
351 	if (teardown)
352 		mlx5_trigger_health_work(fdev->mdev);
353 
354 	return NOTIFY_OK;
355 }
356