1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Copyright 2016-2019 HabanaLabs, Ltd.
5  * All Rights Reserved.
6  */
7 
8 #define pr_fmt(fmt)			"habanalabs: " fmt
9 
10 #include "habanalabs.h"
11 
12 #include <linux/pci.h>
13 #include <linux/hwmon.h>
14 #include <uapi/misc/habanalabs.h>
15 
hl_device_status(struct hl_device * hdev)16 enum hl_device_status hl_device_status(struct hl_device *hdev)
17 {
18 	enum hl_device_status status;
19 
20 	if (atomic_read(&hdev->in_reset))
21 		status = HL_DEVICE_STATUS_IN_RESET;
22 	else if (hdev->needs_reset)
23 		status = HL_DEVICE_STATUS_NEEDS_RESET;
24 	else if (hdev->disabled)
25 		status = HL_DEVICE_STATUS_MALFUNCTION;
26 	else
27 		status = HL_DEVICE_STATUS_OPERATIONAL;
28 
29 	return status;
30 }
31 
hl_device_operational(struct hl_device * hdev,enum hl_device_status * status)32 bool hl_device_operational(struct hl_device *hdev,
33 		enum hl_device_status *status)
34 {
35 	enum hl_device_status current_status;
36 
37 	current_status = hl_device_status(hdev);
38 	if (status)
39 		*status = current_status;
40 
41 	switch (current_status) {
42 	case HL_DEVICE_STATUS_IN_RESET:
43 	case HL_DEVICE_STATUS_MALFUNCTION:
44 	case HL_DEVICE_STATUS_NEEDS_RESET:
45 		return false;
46 	case HL_DEVICE_STATUS_OPERATIONAL:
47 	default:
48 		return true;
49 	}
50 }
51 
hpriv_release(struct kref * ref)52 static void hpriv_release(struct kref *ref)
53 {
54 	struct hl_fpriv *hpriv;
55 	struct hl_device *hdev;
56 
57 	hpriv = container_of(ref, struct hl_fpriv, refcount);
58 
59 	hdev = hpriv->hdev;
60 
61 	put_pid(hpriv->taskpid);
62 
63 	hl_debugfs_remove_file(hpriv);
64 
65 	mutex_destroy(&hpriv->restore_phase_mutex);
66 
67 	mutex_lock(&hdev->fpriv_list_lock);
68 	list_del(&hpriv->dev_node);
69 	hdev->compute_ctx = NULL;
70 	mutex_unlock(&hdev->fpriv_list_lock);
71 
72 	kfree(hpriv);
73 
74 	if (hdev->reset_upon_device_release)
75 		hl_device_reset(hdev, 0);
76 }
77 
hl_hpriv_get(struct hl_fpriv * hpriv)78 void hl_hpriv_get(struct hl_fpriv *hpriv)
79 {
80 	kref_get(&hpriv->refcount);
81 }
82 
hl_hpriv_put(struct hl_fpriv * hpriv)83 int hl_hpriv_put(struct hl_fpriv *hpriv)
84 {
85 	return kref_put(&hpriv->refcount, hpriv_release);
86 }
87 
88 /*
89  * hl_device_release - release function for habanalabs device
90  *
91  * @inode: pointer to inode structure
92  * @filp: pointer to file structure
93  *
94  * Called when process closes an habanalabs device
95  */
hl_device_release(struct inode * inode,struct file * filp)96 static int hl_device_release(struct inode *inode, struct file *filp)
97 {
98 	struct hl_fpriv *hpriv = filp->private_data;
99 	struct hl_device *hdev = hpriv->hdev;
100 
101 	filp->private_data = NULL;
102 
103 	if (!hdev) {
104 		pr_crit("Closing FD after device was removed. Memory leak will occur and it is advised to reboot.\n");
105 		put_pid(hpriv->taskpid);
106 		return 0;
107 	}
108 
109 	/* Each pending user interrupt holds the user's context, hence we
110 	 * must release them all before calling hl_ctx_mgr_fini().
111 	 */
112 	hl_release_pending_user_interrupts(hpriv->hdev);
113 
114 	hl_cb_mgr_fini(hdev, &hpriv->cb_mgr);
115 	hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr);
116 
117 	if (!hl_hpriv_put(hpriv))
118 		dev_warn(hdev->dev,
119 			"Device is still in use because there are live CS and/or memory mappings\n");
120 
121 	return 0;
122 }
123 
hl_device_release_ctrl(struct inode * inode,struct file * filp)124 static int hl_device_release_ctrl(struct inode *inode, struct file *filp)
125 {
126 	struct hl_fpriv *hpriv = filp->private_data;
127 	struct hl_device *hdev = hpriv->hdev;
128 
129 	filp->private_data = NULL;
130 
131 	if (!hdev) {
132 		pr_err("Closing FD after device was removed\n");
133 		goto out;
134 	}
135 
136 	mutex_lock(&hdev->fpriv_list_lock);
137 	list_del(&hpriv->dev_node);
138 	mutex_unlock(&hdev->fpriv_list_lock);
139 out:
140 	put_pid(hpriv->taskpid);
141 
142 	kfree(hpriv);
143 
144 	return 0;
145 }
146 
147 /*
148  * hl_mmap - mmap function for habanalabs device
149  *
150  * @*filp: pointer to file structure
151  * @*vma: pointer to vm_area_struct of the process
152  *
153  * Called when process does an mmap on habanalabs device. Call the device's mmap
154  * function at the end of the common code.
155  */
hl_mmap(struct file * filp,struct vm_area_struct * vma)156 static int hl_mmap(struct file *filp, struct vm_area_struct *vma)
157 {
158 	struct hl_fpriv *hpriv = filp->private_data;
159 	struct hl_device *hdev = hpriv->hdev;
160 	unsigned long vm_pgoff;
161 
162 	if (!hdev) {
163 		pr_err_ratelimited("Trying to mmap after device was removed! Please close FD\n");
164 		return -ENODEV;
165 	}
166 
167 	vm_pgoff = vma->vm_pgoff;
168 	vma->vm_pgoff = HL_MMAP_OFFSET_VALUE_GET(vm_pgoff);
169 
170 	switch (vm_pgoff & HL_MMAP_TYPE_MASK) {
171 	case HL_MMAP_TYPE_CB:
172 		return hl_cb_mmap(hpriv, vma);
173 
174 	case HL_MMAP_TYPE_BLOCK:
175 		return hl_hw_block_mmap(hpriv, vma);
176 	}
177 
178 	return -EINVAL;
179 }
180 
181 static const struct file_operations hl_ops = {
182 	.owner = THIS_MODULE,
183 	.open = hl_device_open,
184 	.release = hl_device_release,
185 	.mmap = hl_mmap,
186 	.unlocked_ioctl = hl_ioctl,
187 	.compat_ioctl = hl_ioctl
188 };
189 
190 static const struct file_operations hl_ctrl_ops = {
191 	.owner = THIS_MODULE,
192 	.open = hl_device_open_ctrl,
193 	.release = hl_device_release_ctrl,
194 	.unlocked_ioctl = hl_ioctl_control,
195 	.compat_ioctl = hl_ioctl_control
196 };
197 
device_release_func(struct device * dev)198 static void device_release_func(struct device *dev)
199 {
200 	kfree(dev);
201 }
202 
203 /*
204  * device_init_cdev - Initialize cdev and device for habanalabs device
205  *
206  * @hdev: pointer to habanalabs device structure
207  * @hclass: pointer to the class object of the device
208  * @minor: minor number of the specific device
209  * @fpos: file operations to install for this device
210  * @name: name of the device as it will appear in the filesystem
211  * @cdev: pointer to the char device object that will be initialized
212  * @dev: pointer to the device object that will be initialized
213  *
214  * Initialize a cdev and a Linux device for habanalabs's device.
215  */
device_init_cdev(struct hl_device * hdev,struct class * hclass,int minor,const struct file_operations * fops,char * name,struct cdev * cdev,struct device ** dev)216 static int device_init_cdev(struct hl_device *hdev, struct class *hclass,
217 				int minor, const struct file_operations *fops,
218 				char *name, struct cdev *cdev,
219 				struct device **dev)
220 {
221 	cdev_init(cdev, fops);
222 	cdev->owner = THIS_MODULE;
223 
224 	*dev = kzalloc(sizeof(**dev), GFP_KERNEL);
225 	if (!*dev)
226 		return -ENOMEM;
227 
228 	device_initialize(*dev);
229 	(*dev)->devt = MKDEV(hdev->major, minor);
230 	(*dev)->class = hclass;
231 	(*dev)->release = device_release_func;
232 	dev_set_drvdata(*dev, hdev);
233 	dev_set_name(*dev, "%s", name);
234 
235 	return 0;
236 }
237 
device_cdev_sysfs_add(struct hl_device * hdev)238 static int device_cdev_sysfs_add(struct hl_device *hdev)
239 {
240 	int rc;
241 
242 	rc = cdev_device_add(&hdev->cdev, hdev->dev);
243 	if (rc) {
244 		dev_err(hdev->dev,
245 			"failed to add a char device to the system\n");
246 		return rc;
247 	}
248 
249 	rc = cdev_device_add(&hdev->cdev_ctrl, hdev->dev_ctrl);
250 	if (rc) {
251 		dev_err(hdev->dev,
252 			"failed to add a control char device to the system\n");
253 		goto delete_cdev_device;
254 	}
255 
256 	/* hl_sysfs_init() must be done after adding the device to the system */
257 	rc = hl_sysfs_init(hdev);
258 	if (rc) {
259 		dev_err(hdev->dev, "failed to initialize sysfs\n");
260 		goto delete_ctrl_cdev_device;
261 	}
262 
263 	hdev->cdev_sysfs_created = true;
264 
265 	return 0;
266 
267 delete_ctrl_cdev_device:
268 	cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
269 delete_cdev_device:
270 	cdev_device_del(&hdev->cdev, hdev->dev);
271 	return rc;
272 }
273 
device_cdev_sysfs_del(struct hl_device * hdev)274 static void device_cdev_sysfs_del(struct hl_device *hdev)
275 {
276 	if (!hdev->cdev_sysfs_created)
277 		goto put_devices;
278 
279 	hl_sysfs_fini(hdev);
280 	cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
281 	cdev_device_del(&hdev->cdev, hdev->dev);
282 
283 put_devices:
284 	put_device(hdev->dev);
285 	put_device(hdev->dev_ctrl);
286 }
287 
device_hard_reset_pending(struct work_struct * work)288 static void device_hard_reset_pending(struct work_struct *work)
289 {
290 	struct hl_device_reset_work *device_reset_work =
291 		container_of(work, struct hl_device_reset_work,
292 				reset_work.work);
293 	struct hl_device *hdev = device_reset_work->hdev;
294 	int rc;
295 
296 	rc = hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_FROM_RESET_THREAD);
297 	if ((rc == -EBUSY) && !hdev->device_fini_pending) {
298 		dev_info(hdev->dev,
299 			"Could not reset device. will try again in %u seconds",
300 			HL_PENDING_RESET_PER_SEC);
301 
302 		queue_delayed_work(device_reset_work->wq,
303 			&device_reset_work->reset_work,
304 			msecs_to_jiffies(HL_PENDING_RESET_PER_SEC * 1000));
305 	}
306 }
307 
308 /*
309  * device_early_init - do some early initialization for the habanalabs device
310  *
311  * @hdev: pointer to habanalabs device structure
312  *
313  * Install the relevant function pointers and call the early_init function,
314  * if such a function exists
315  */
device_early_init(struct hl_device * hdev)316 static int device_early_init(struct hl_device *hdev)
317 {
318 	int i, rc;
319 	char workq_name[32];
320 
321 	switch (hdev->asic_type) {
322 	case ASIC_GOYA:
323 		goya_set_asic_funcs(hdev);
324 		strscpy(hdev->asic_name, "GOYA", sizeof(hdev->asic_name));
325 		break;
326 	case ASIC_GAUDI:
327 		gaudi_set_asic_funcs(hdev);
328 		strscpy(hdev->asic_name, "GAUDI", sizeof(hdev->asic_name));
329 		break;
330 	case ASIC_GAUDI_SEC:
331 		gaudi_set_asic_funcs(hdev);
332 		strscpy(hdev->asic_name, "GAUDI SEC", sizeof(hdev->asic_name));
333 		break;
334 	default:
335 		dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
336 			hdev->asic_type);
337 		return -EINVAL;
338 	}
339 
340 	rc = hdev->asic_funcs->early_init(hdev);
341 	if (rc)
342 		return rc;
343 
344 	rc = hl_asid_init(hdev);
345 	if (rc)
346 		goto early_fini;
347 
348 	if (hdev->asic_prop.completion_queues_count) {
349 		hdev->cq_wq = kcalloc(hdev->asic_prop.completion_queues_count,
350 				sizeof(*hdev->cq_wq),
351 				GFP_KERNEL);
352 		if (!hdev->cq_wq) {
353 			rc = -ENOMEM;
354 			goto asid_fini;
355 		}
356 	}
357 
358 	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) {
359 		snprintf(workq_name, 32, "hl-free-jobs-%u", (u32) i);
360 		hdev->cq_wq[i] = create_singlethread_workqueue(workq_name);
361 		if (hdev->cq_wq[i] == NULL) {
362 			dev_err(hdev->dev, "Failed to allocate CQ workqueue\n");
363 			rc = -ENOMEM;
364 			goto free_cq_wq;
365 		}
366 	}
367 
368 	hdev->eq_wq = alloc_workqueue("hl-events", WQ_UNBOUND, 0);
369 	if (hdev->eq_wq == NULL) {
370 		dev_err(hdev->dev, "Failed to allocate EQ workqueue\n");
371 		rc = -ENOMEM;
372 		goto free_cq_wq;
373 	}
374 
375 	hdev->sob_reset_wq = alloc_workqueue("hl-sob-reset", WQ_UNBOUND, 0);
376 	if (!hdev->sob_reset_wq) {
377 		dev_err(hdev->dev,
378 			"Failed to allocate SOB reset workqueue\n");
379 		rc = -ENOMEM;
380 		goto free_eq_wq;
381 	}
382 
383 	hdev->hl_chip_info = kzalloc(sizeof(struct hwmon_chip_info),
384 					GFP_KERNEL);
385 	if (!hdev->hl_chip_info) {
386 		rc = -ENOMEM;
387 		goto free_sob_reset_wq;
388 	}
389 
390 	rc = hl_mmu_if_set_funcs(hdev);
391 	if (rc)
392 		goto free_chip_info;
393 
394 	hl_cb_mgr_init(&hdev->kernel_cb_mgr);
395 
396 	hdev->device_reset_work.wq =
397 			create_singlethread_workqueue("hl_device_reset");
398 	if (!hdev->device_reset_work.wq) {
399 		rc = -ENOMEM;
400 		dev_err(hdev->dev, "Failed to create device reset WQ\n");
401 		goto free_cb_mgr;
402 	}
403 
404 	INIT_DELAYED_WORK(&hdev->device_reset_work.reset_work,
405 			device_hard_reset_pending);
406 	hdev->device_reset_work.hdev = hdev;
407 	hdev->device_fini_pending = 0;
408 
409 	mutex_init(&hdev->send_cpu_message_lock);
410 	mutex_init(&hdev->debug_lock);
411 	INIT_LIST_HEAD(&hdev->cs_mirror_list);
412 	spin_lock_init(&hdev->cs_mirror_lock);
413 	INIT_LIST_HEAD(&hdev->fpriv_list);
414 	mutex_init(&hdev->fpriv_list_lock);
415 	atomic_set(&hdev->in_reset, 0);
416 
417 	return 0;
418 
419 free_cb_mgr:
420 	hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);
421 free_chip_info:
422 	kfree(hdev->hl_chip_info);
423 free_sob_reset_wq:
424 	destroy_workqueue(hdev->sob_reset_wq);
425 free_eq_wq:
426 	destroy_workqueue(hdev->eq_wq);
427 free_cq_wq:
428 	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
429 		if (hdev->cq_wq[i])
430 			destroy_workqueue(hdev->cq_wq[i]);
431 	kfree(hdev->cq_wq);
432 asid_fini:
433 	hl_asid_fini(hdev);
434 early_fini:
435 	if (hdev->asic_funcs->early_fini)
436 		hdev->asic_funcs->early_fini(hdev);
437 
438 	return rc;
439 }
440 
441 /*
442  * device_early_fini - finalize all that was done in device_early_init
443  *
444  * @hdev: pointer to habanalabs device structure
445  *
446  */
device_early_fini(struct hl_device * hdev)447 static void device_early_fini(struct hl_device *hdev)
448 {
449 	int i;
450 
451 	mutex_destroy(&hdev->debug_lock);
452 	mutex_destroy(&hdev->send_cpu_message_lock);
453 
454 	mutex_destroy(&hdev->fpriv_list_lock);
455 
456 	hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);
457 
458 	kfree(hdev->hl_chip_info);
459 
460 	destroy_workqueue(hdev->sob_reset_wq);
461 	destroy_workqueue(hdev->eq_wq);
462 	destroy_workqueue(hdev->device_reset_work.wq);
463 
464 	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
465 		destroy_workqueue(hdev->cq_wq[i]);
466 	kfree(hdev->cq_wq);
467 
468 	hl_asid_fini(hdev);
469 
470 	if (hdev->asic_funcs->early_fini)
471 		hdev->asic_funcs->early_fini(hdev);
472 }
473 
set_freq_to_low_job(struct work_struct * work)474 static void set_freq_to_low_job(struct work_struct *work)
475 {
476 	struct hl_device *hdev = container_of(work, struct hl_device,
477 						work_freq.work);
478 
479 	mutex_lock(&hdev->fpriv_list_lock);
480 
481 	if (!hdev->compute_ctx)
482 		hl_device_set_frequency(hdev, PLL_LOW);
483 
484 	mutex_unlock(&hdev->fpriv_list_lock);
485 
486 	schedule_delayed_work(&hdev->work_freq,
487 			usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
488 }
489 
hl_device_heartbeat(struct work_struct * work)490 static void hl_device_heartbeat(struct work_struct *work)
491 {
492 	struct hl_device *hdev = container_of(work, struct hl_device,
493 						work_heartbeat.work);
494 
495 	if (!hl_device_operational(hdev, NULL))
496 		goto reschedule;
497 
498 	if (!hdev->asic_funcs->send_heartbeat(hdev))
499 		goto reschedule;
500 
501 	dev_err(hdev->dev, "Device heartbeat failed!\n");
502 	hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_HEARTBEAT);
503 
504 	return;
505 
506 reschedule:
507 	schedule_delayed_work(&hdev->work_heartbeat,
508 			usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
509 }
510 
511 /*
512  * device_late_init - do late stuff initialization for the habanalabs device
513  *
514  * @hdev: pointer to habanalabs device structure
515  *
516  * Do stuff that either needs the device H/W queues to be active or needs
517  * to happen after all the rest of the initialization is finished
518  */
device_late_init(struct hl_device * hdev)519 static int device_late_init(struct hl_device *hdev)
520 {
521 	int rc;
522 
523 	if (hdev->asic_funcs->late_init) {
524 		rc = hdev->asic_funcs->late_init(hdev);
525 		if (rc) {
526 			dev_err(hdev->dev,
527 				"failed late initialization for the H/W\n");
528 			return rc;
529 		}
530 	}
531 
532 	hdev->high_pll = hdev->asic_prop.high_pll;
533 
534 	/* force setting to low frequency */
535 	hdev->curr_pll_profile = PLL_LOW;
536 
537 	if (hdev->pm_mng_profile == PM_AUTO)
538 		hdev->asic_funcs->set_pll_profile(hdev, PLL_LOW);
539 	else
540 		hdev->asic_funcs->set_pll_profile(hdev, PLL_LAST);
541 
542 	INIT_DELAYED_WORK(&hdev->work_freq, set_freq_to_low_job);
543 	schedule_delayed_work(&hdev->work_freq,
544 	usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
545 
546 	if (hdev->heartbeat) {
547 		INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat);
548 		schedule_delayed_work(&hdev->work_heartbeat,
549 				usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
550 	}
551 
552 	hdev->late_init_done = true;
553 
554 	return 0;
555 }
556 
557 /*
558  * device_late_fini - finalize all that was done in device_late_init
559  *
560  * @hdev: pointer to habanalabs device structure
561  *
562  */
device_late_fini(struct hl_device * hdev)563 static void device_late_fini(struct hl_device *hdev)
564 {
565 	if (!hdev->late_init_done)
566 		return;
567 
568 	cancel_delayed_work_sync(&hdev->work_freq);
569 	if (hdev->heartbeat)
570 		cancel_delayed_work_sync(&hdev->work_heartbeat);
571 
572 	if (hdev->asic_funcs->late_fini)
573 		hdev->asic_funcs->late_fini(hdev);
574 
575 	hdev->late_init_done = false;
576 }
577 
hl_device_utilization(struct hl_device * hdev,u32 * utilization)578 int hl_device_utilization(struct hl_device *hdev, u32 *utilization)
579 {
580 	u64 max_power, curr_power, dc_power, dividend;
581 	int rc;
582 
583 	max_power = hdev->asic_prop.max_power_default;
584 	dc_power = hdev->asic_prop.dc_power_default;
585 	rc = hl_fw_cpucp_power_get(hdev, &curr_power);
586 
587 	if (rc)
588 		return rc;
589 
590 	curr_power = clamp(curr_power, dc_power, max_power);
591 
592 	dividend = (curr_power - dc_power) * 100;
593 	*utilization = (u32) div_u64(dividend, (max_power - dc_power));
594 
595 	return 0;
596 }
597 
598 /*
599  * hl_device_set_frequency - set the frequency of the device
600  *
601  * @hdev: pointer to habanalabs device structure
602  * @freq: the new frequency value
603  *
604  * Change the frequency if needed. This function has no protection against
605  * concurrency, therefore it is assumed that the calling function has protected
606  * itself against the case of calling this function from multiple threads with
607  * different values
608  *
609  * Returns 0 if no change was done, otherwise returns 1
610  */
hl_device_set_frequency(struct hl_device * hdev,enum hl_pll_frequency freq)611 int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq)
612 {
613 	if ((hdev->pm_mng_profile == PM_MANUAL) ||
614 			(hdev->curr_pll_profile == freq))
615 		return 0;
616 
617 	dev_dbg(hdev->dev, "Changing device frequency to %s\n",
618 		freq == PLL_HIGH ? "high" : "low");
619 
620 	hdev->asic_funcs->set_pll_profile(hdev, freq);
621 
622 	hdev->curr_pll_profile = freq;
623 
624 	return 1;
625 }
626 
hl_device_set_debug_mode(struct hl_device * hdev,bool enable)627 int hl_device_set_debug_mode(struct hl_device *hdev, bool enable)
628 {
629 	int rc = 0;
630 
631 	mutex_lock(&hdev->debug_lock);
632 
633 	if (!enable) {
634 		if (!hdev->in_debug) {
635 			dev_err(hdev->dev,
636 				"Failed to disable debug mode because device was not in debug mode\n");
637 			rc = -EFAULT;
638 			goto out;
639 		}
640 
641 		if (!hdev->hard_reset_pending)
642 			hdev->asic_funcs->halt_coresight(hdev);
643 
644 		hdev->in_debug = 0;
645 
646 		if (!hdev->hard_reset_pending)
647 			hdev->asic_funcs->set_clock_gating(hdev);
648 
649 		goto out;
650 	}
651 
652 	if (hdev->in_debug) {
653 		dev_err(hdev->dev,
654 			"Failed to enable debug mode because device is already in debug mode\n");
655 		rc = -EFAULT;
656 		goto out;
657 	}
658 
659 	hdev->asic_funcs->disable_clock_gating(hdev);
660 	hdev->in_debug = 1;
661 
662 out:
663 	mutex_unlock(&hdev->debug_lock);
664 
665 	return rc;
666 }
667 
668 /*
669  * hl_device_suspend - initiate device suspend
670  *
671  * @hdev: pointer to habanalabs device structure
672  *
673  * Puts the hw in the suspend state (all asics).
674  * Returns 0 for success or an error on failure.
675  * Called at driver suspend.
676  */
hl_device_suspend(struct hl_device * hdev)677 int hl_device_suspend(struct hl_device *hdev)
678 {
679 	int rc;
680 
681 	pci_save_state(hdev->pdev);
682 
683 	/* Block future CS/VM/JOB completion operations */
684 	rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
685 	if (rc) {
686 		dev_err(hdev->dev, "Can't suspend while in reset\n");
687 		return -EIO;
688 	}
689 
690 	/* This blocks all other stuff that is not blocked by in_reset */
691 	hdev->disabled = true;
692 
693 	/*
694 	 * Flush anyone that is inside the critical section of enqueue
695 	 * jobs to the H/W
696 	 */
697 	hdev->asic_funcs->hw_queues_lock(hdev);
698 	hdev->asic_funcs->hw_queues_unlock(hdev);
699 
700 	/* Flush processes that are sending message to CPU */
701 	mutex_lock(&hdev->send_cpu_message_lock);
702 	mutex_unlock(&hdev->send_cpu_message_lock);
703 
704 	rc = hdev->asic_funcs->suspend(hdev);
705 	if (rc)
706 		dev_err(hdev->dev,
707 			"Failed to disable PCI access of device CPU\n");
708 
709 	/* Shut down the device */
710 	pci_disable_device(hdev->pdev);
711 	pci_set_power_state(hdev->pdev, PCI_D3hot);
712 
713 	return 0;
714 }
715 
716 /*
717  * hl_device_resume - initiate device resume
718  *
719  * @hdev: pointer to habanalabs device structure
720  *
721  * Bring the hw back to operating state (all asics).
722  * Returns 0 for success or an error on failure.
723  * Called at driver resume.
724  */
hl_device_resume(struct hl_device * hdev)725 int hl_device_resume(struct hl_device *hdev)
726 {
727 	int rc;
728 
729 	pci_set_power_state(hdev->pdev, PCI_D0);
730 	pci_restore_state(hdev->pdev);
731 	rc = pci_enable_device_mem(hdev->pdev);
732 	if (rc) {
733 		dev_err(hdev->dev,
734 			"Failed to enable PCI device in resume\n");
735 		return rc;
736 	}
737 
738 	pci_set_master(hdev->pdev);
739 
740 	rc = hdev->asic_funcs->resume(hdev);
741 	if (rc) {
742 		dev_err(hdev->dev, "Failed to resume device after suspend\n");
743 		goto disable_device;
744 	}
745 
746 
747 	hdev->disabled = false;
748 	atomic_set(&hdev->in_reset, 0);
749 
750 	rc = hl_device_reset(hdev, HL_RESET_HARD);
751 	if (rc) {
752 		dev_err(hdev->dev, "Failed to reset device during resume\n");
753 		goto disable_device;
754 	}
755 
756 	return 0;
757 
758 disable_device:
759 	pci_clear_master(hdev->pdev);
760 	pci_disable_device(hdev->pdev);
761 
762 	return rc;
763 }
764 
device_kill_open_processes(struct hl_device * hdev,u32 timeout)765 static int device_kill_open_processes(struct hl_device *hdev, u32 timeout)
766 {
767 	struct hl_fpriv	*hpriv;
768 	struct task_struct *task = NULL;
769 	u32 pending_cnt;
770 
771 
772 	/* Giving time for user to close FD, and for processes that are inside
773 	 * hl_device_open to finish
774 	 */
775 	if (!list_empty(&hdev->fpriv_list))
776 		ssleep(1);
777 
778 	if (timeout) {
779 		pending_cnt = timeout;
780 	} else {
781 		if (hdev->process_kill_trial_cnt) {
782 			/* Processes have been already killed */
783 			pending_cnt = 1;
784 			goto wait_for_processes;
785 		} else {
786 			/* Wait a small period after process kill */
787 			pending_cnt = HL_PENDING_RESET_PER_SEC;
788 		}
789 	}
790 
791 	mutex_lock(&hdev->fpriv_list_lock);
792 
793 	/* This section must be protected because we are dereferencing
794 	 * pointers that are freed if the process exits
795 	 */
796 	list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node) {
797 		task = get_pid_task(hpriv->taskpid, PIDTYPE_PID);
798 		if (task) {
799 			dev_info(hdev->dev, "Killing user process pid=%d\n",
800 				task_pid_nr(task));
801 			send_sig(SIGKILL, task, 1);
802 			usleep_range(1000, 10000);
803 
804 			put_task_struct(task);
805 		}
806 	}
807 
808 	mutex_unlock(&hdev->fpriv_list_lock);
809 
810 	/*
811 	 * We killed the open users, but that doesn't mean they are closed.
812 	 * It could be that they are running a long cleanup phase in the driver
813 	 * e.g. MMU unmappings, or running other long teardown flow even before
814 	 * our cleanup.
815 	 * Therefore we need to wait again to make sure they are closed before
816 	 * continuing with the reset.
817 	 */
818 
819 wait_for_processes:
820 	while ((!list_empty(&hdev->fpriv_list)) && (pending_cnt)) {
821 		dev_dbg(hdev->dev,
822 			"Waiting for all unmap operations to finish before hard reset\n");
823 
824 		pending_cnt--;
825 
826 		ssleep(1);
827 	}
828 
829 	/* All processes exited successfully */
830 	if (list_empty(&hdev->fpriv_list))
831 		return 0;
832 
833 	/* Give up waiting for processes to exit */
834 	if (hdev->process_kill_trial_cnt == HL_PENDING_RESET_MAX_TRIALS)
835 		return -ETIME;
836 
837 	hdev->process_kill_trial_cnt++;
838 
839 	return -EBUSY;
840 }
841 
device_disable_open_processes(struct hl_device * hdev)842 static void device_disable_open_processes(struct hl_device *hdev)
843 {
844 	struct hl_fpriv *hpriv;
845 
846 	mutex_lock(&hdev->fpriv_list_lock);
847 	list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node)
848 		hpriv->hdev = NULL;
849 	mutex_unlock(&hdev->fpriv_list_lock);
850 }
851 
852 /*
853  * hl_device_reset - reset the device
854  *
855  * @hdev: pointer to habanalabs device structure
856  * @flags: reset flags.
857  *
858  * Block future CS and wait for pending CS to be enqueued
859  * Call ASIC H/W fini
860  * Flush all completions
861  * Re-initialize all internal data structures
862  * Call ASIC H/W init, late_init
863  * Test queues
864  * Enable device
865  *
866  * Returns 0 for success or an error on failure.
867  */
hl_device_reset(struct hl_device * hdev,u32 flags)868 int hl_device_reset(struct hl_device *hdev, u32 flags)
869 {
870 	u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
871 	bool hard_reset, from_hard_reset_thread;
872 	int i, rc;
873 
874 	if (!hdev->init_done) {
875 		dev_err(hdev->dev,
876 			"Can't reset before initialization is done\n");
877 		return 0;
878 	}
879 
880 	hard_reset = (flags & HL_RESET_HARD) != 0;
881 	from_hard_reset_thread = (flags & HL_RESET_FROM_RESET_THREAD) != 0;
882 
883 	if ((!hard_reset) && (!hdev->supports_soft_reset)) {
884 		dev_dbg(hdev->dev, "Doing hard-reset instead of soft-reset\n");
885 		hard_reset = true;
886 	}
887 
888 	/* Re-entry of reset thread */
889 	if (from_hard_reset_thread && hdev->process_kill_trial_cnt)
890 		goto kill_processes;
891 
892 	/*
893 	 * Prevent concurrency in this function - only one reset should be
894 	 * done at any given time. Only need to perform this if we didn't
895 	 * get from the dedicated hard reset thread
896 	 */
897 	if (!from_hard_reset_thread) {
898 		/* Block future CS/VM/JOB completion operations */
899 		rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
900 		if (rc)
901 			return 0;
902 
903 		/*
904 		 * if reset is due to heartbeat, device CPU is no responsive in
905 		 * which case no point sending PCI disable message to it
906 		 */
907 		if (hard_reset && !(flags & HL_RESET_HEARTBEAT)) {
908 			/* Disable PCI access from device F/W so he won't send
909 			 * us additional interrupts. We disable MSI/MSI-X at
910 			 * the halt_engines function and we can't have the F/W
911 			 * sending us interrupts after that. We need to disable
912 			 * the access here because if the device is marked
913 			 * disable, the message won't be send. Also, in case
914 			 * of heartbeat, the device CPU is marked as disable
915 			 * so this message won't be sent
916 			 */
917 			if (hl_fw_send_pci_access_msg(hdev,
918 					CPUCP_PACKET_DISABLE_PCI_ACCESS))
919 				dev_warn(hdev->dev,
920 					"Failed to disable PCI access by F/W\n");
921 		}
922 
923 		/* This also blocks future CS/VM/JOB completion operations */
924 		hdev->disabled = true;
925 
926 		/* Flush anyone that is inside the critical section of enqueue
927 		 * jobs to the H/W
928 		 */
929 		hdev->asic_funcs->hw_queues_lock(hdev);
930 		hdev->asic_funcs->hw_queues_unlock(hdev);
931 
932 		/* Flush anyone that is inside device open */
933 		mutex_lock(&hdev->fpriv_list_lock);
934 		mutex_unlock(&hdev->fpriv_list_lock);
935 
936 		dev_err(hdev->dev, "Going to RESET device!\n");
937 	}
938 
939 again:
940 	if ((hard_reset) && (!from_hard_reset_thread)) {
941 		hdev->hard_reset_pending = true;
942 
943 		hdev->process_kill_trial_cnt = 0;
944 
945 		/*
946 		 * Because the reset function can't run from interrupt or
947 		 * from heartbeat work, we need to call the reset function
948 		 * from a dedicated work
949 		 */
950 		queue_delayed_work(hdev->device_reset_work.wq,
951 			&hdev->device_reset_work.reset_work, 0);
952 
953 		return 0;
954 	}
955 
956 	if (hard_reset) {
957 		device_late_fini(hdev);
958 
959 		/*
960 		 * Now that the heartbeat thread is closed, flush processes
961 		 * which are sending messages to CPU
962 		 */
963 		mutex_lock(&hdev->send_cpu_message_lock);
964 		mutex_unlock(&hdev->send_cpu_message_lock);
965 	}
966 
967 	/*
968 	 * Halt the engines and disable interrupts so we won't get any more
969 	 * completions from H/W and we won't have any accesses from the
970 	 * H/W to the host machine
971 	 */
972 	hdev->asic_funcs->halt_engines(hdev, hard_reset);
973 
974 	/* Go over all the queues, release all CS and their jobs */
975 	hl_cs_rollback_all(hdev);
976 
977 	/* Release all pending user interrupts, each pending user interrupt
978 	 * holds a reference to user context
979 	 */
980 	hl_release_pending_user_interrupts(hdev);
981 
982 kill_processes:
983 	if (hard_reset) {
984 		/* Kill processes here after CS rollback. This is because the
985 		 * process can't really exit until all its CSs are done, which
986 		 * is what we do in cs rollback
987 		 */
988 		rc = device_kill_open_processes(hdev, 0);
989 
990 		if (rc == -EBUSY) {
991 			if (hdev->device_fini_pending) {
992 				dev_crit(hdev->dev,
993 					"Failed to kill all open processes, stopping hard reset\n");
994 				goto out_err;
995 			}
996 
997 			/* signal reset thread to reschedule */
998 			return rc;
999 		}
1000 
1001 		if (rc) {
1002 			dev_crit(hdev->dev,
1003 				"Failed to kill all open processes, stopping hard reset\n");
1004 			goto out_err;
1005 		}
1006 
1007 		/* Flush the Event queue workers to make sure no other thread is
1008 		 * reading or writing to registers during the reset
1009 		 */
1010 		flush_workqueue(hdev->eq_wq);
1011 	}
1012 
1013 	/* Reset the H/W. It will be in idle state after this returns */
1014 	hdev->asic_funcs->hw_fini(hdev, hard_reset);
1015 
1016 	if (hard_reset) {
1017 		/* Release kernel context */
1018 		if (hdev->kernel_ctx && hl_ctx_put(hdev->kernel_ctx) == 1)
1019 			hdev->kernel_ctx = NULL;
1020 		hl_vm_fini(hdev);
1021 		hl_mmu_fini(hdev);
1022 		hl_eq_reset(hdev, &hdev->event_queue);
1023 	}
1024 
1025 	/* Re-initialize PI,CI to 0 in all queues (hw queue, cq) */
1026 	hl_hw_queue_reset(hdev, hard_reset);
1027 	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1028 		hl_cq_reset(hdev, &hdev->completion_queue[i]);
1029 
1030 	mutex_lock(&hdev->fpriv_list_lock);
1031 
1032 	/* Make sure the context switch phase will run again */
1033 	if (hdev->compute_ctx) {
1034 		atomic_set(&hdev->compute_ctx->thread_ctx_switch_token, 1);
1035 		hdev->compute_ctx->thread_ctx_switch_wait_token = 0;
1036 	}
1037 
1038 	mutex_unlock(&hdev->fpriv_list_lock);
1039 
1040 	/* Finished tear-down, starting to re-initialize */
1041 
1042 	if (hard_reset) {
1043 		hdev->device_cpu_disabled = false;
1044 		hdev->hard_reset_pending = false;
1045 
1046 		if (hdev->kernel_ctx) {
1047 			dev_crit(hdev->dev,
1048 				"kernel ctx was alive during hard reset, something is terribly wrong\n");
1049 			rc = -EBUSY;
1050 			goto out_err;
1051 		}
1052 
1053 		rc = hl_mmu_init(hdev);
1054 		if (rc) {
1055 			dev_err(hdev->dev,
1056 				"Failed to initialize MMU S/W after hard reset\n");
1057 			goto out_err;
1058 		}
1059 
1060 		/* Allocate the kernel context */
1061 		hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx),
1062 						GFP_KERNEL);
1063 		if (!hdev->kernel_ctx) {
1064 			rc = -ENOMEM;
1065 			hl_mmu_fini(hdev);
1066 			goto out_err;
1067 		}
1068 
1069 		hdev->compute_ctx = NULL;
1070 
1071 		rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
1072 		if (rc) {
1073 			dev_err(hdev->dev,
1074 				"failed to init kernel ctx in hard reset\n");
1075 			kfree(hdev->kernel_ctx);
1076 			hdev->kernel_ctx = NULL;
1077 			hl_mmu_fini(hdev);
1078 			goto out_err;
1079 		}
1080 	}
1081 
1082 	/* Device is now enabled as part of the initialization requires
1083 	 * communication with the device firmware to get information that
1084 	 * is required for the initialization itself
1085 	 */
1086 	hdev->disabled = false;
1087 
1088 	rc = hdev->asic_funcs->hw_init(hdev);
1089 	if (rc) {
1090 		dev_err(hdev->dev,
1091 			"failed to initialize the H/W after reset\n");
1092 		goto out_err;
1093 	}
1094 
1095 	/* If device is not idle fail the reset process */
1096 	if (!hdev->asic_funcs->is_device_idle(hdev, idle_mask,
1097 			HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL)) {
1098 		dev_err(hdev->dev,
1099 			"device is not idle (mask %#llx %#llx) after reset\n",
1100 			idle_mask[0], idle_mask[1]);
1101 		rc = -EIO;
1102 		goto out_err;
1103 	}
1104 
1105 	/* Check that the communication with the device is working */
1106 	rc = hdev->asic_funcs->test_queues(hdev);
1107 	if (rc) {
1108 		dev_err(hdev->dev,
1109 			"Failed to detect if device is alive after reset\n");
1110 		goto out_err;
1111 	}
1112 
1113 	if (hard_reset) {
1114 		rc = device_late_init(hdev);
1115 		if (rc) {
1116 			dev_err(hdev->dev,
1117 				"Failed late init after hard reset\n");
1118 			goto out_err;
1119 		}
1120 
1121 		rc = hl_vm_init(hdev);
1122 		if (rc) {
1123 			dev_err(hdev->dev,
1124 				"Failed to init memory module after hard reset\n");
1125 			goto out_err;
1126 		}
1127 
1128 		hl_set_max_power(hdev);
1129 	} else {
1130 		rc = hdev->asic_funcs->soft_reset_late_init(hdev);
1131 		if (rc) {
1132 			dev_err(hdev->dev,
1133 				"Failed late init after soft reset\n");
1134 			goto out_err;
1135 		}
1136 	}
1137 
1138 	atomic_set(&hdev->in_reset, 0);
1139 	hdev->needs_reset = false;
1140 
1141 	dev_notice(hdev->dev, "Successfully finished resetting the device\n");
1142 
1143 	if (hard_reset) {
1144 		hdev->hard_reset_cnt++;
1145 
1146 		/* After reset is done, we are ready to receive events from
1147 		 * the F/W. We can't do it before because we will ignore events
1148 		 * and if those events are fatal, we won't know about it and
1149 		 * the device will be operational although it shouldn't be
1150 		 */
1151 		hdev->asic_funcs->enable_events_from_fw(hdev);
1152 	} else {
1153 		hdev->soft_reset_cnt++;
1154 	}
1155 
1156 	return 0;
1157 
1158 out_err:
1159 	hdev->disabled = true;
1160 
1161 	if (hard_reset) {
1162 		dev_err(hdev->dev,
1163 			"Failed to reset! Device is NOT usable\n");
1164 		hdev->hard_reset_cnt++;
1165 	} else {
1166 		dev_err(hdev->dev,
1167 			"Failed to do soft-reset, trying hard reset\n");
1168 		hdev->soft_reset_cnt++;
1169 		hard_reset = true;
1170 		goto again;
1171 	}
1172 
1173 	atomic_set(&hdev->in_reset, 0);
1174 
1175 	return rc;
1176 }
1177 
1178 /*
1179  * hl_device_init - main initialization function for habanalabs device
1180  *
1181  * @hdev: pointer to habanalabs device structure
1182  *
1183  * Allocate an id for the device, do early initialization and then call the
1184  * ASIC specific initialization functions. Finally, create the cdev and the
1185  * Linux device to expose it to the user
1186  */
hl_device_init(struct hl_device * hdev,struct class * hclass)1187 int hl_device_init(struct hl_device *hdev, struct class *hclass)
1188 {
1189 	int i, rc, cq_cnt, user_interrupt_cnt, cq_ready_cnt;
1190 	char *name;
1191 	bool add_cdev_sysfs_on_err = false;
1192 
1193 	name = kasprintf(GFP_KERNEL, "hl%d", hdev->id / 2);
1194 	if (!name) {
1195 		rc = -ENOMEM;
1196 		goto out_disabled;
1197 	}
1198 
1199 	/* Initialize cdev and device structures */
1200 	rc = device_init_cdev(hdev, hclass, hdev->id, &hl_ops, name,
1201 				&hdev->cdev, &hdev->dev);
1202 
1203 	kfree(name);
1204 
1205 	if (rc)
1206 		goto out_disabled;
1207 
1208 	name = kasprintf(GFP_KERNEL, "hl_controlD%d", hdev->id / 2);
1209 	if (!name) {
1210 		rc = -ENOMEM;
1211 		goto free_dev;
1212 	}
1213 
1214 	/* Initialize cdev and device structures for control device */
1215 	rc = device_init_cdev(hdev, hclass, hdev->id_control, &hl_ctrl_ops,
1216 				name, &hdev->cdev_ctrl, &hdev->dev_ctrl);
1217 
1218 	kfree(name);
1219 
1220 	if (rc)
1221 		goto free_dev;
1222 
1223 	/* Initialize ASIC function pointers and perform early init */
1224 	rc = device_early_init(hdev);
1225 	if (rc)
1226 		goto free_dev_ctrl;
1227 
1228 	user_interrupt_cnt = hdev->asic_prop.user_interrupt_count;
1229 
1230 	if (user_interrupt_cnt) {
1231 		hdev->user_interrupt = kcalloc(user_interrupt_cnt,
1232 				sizeof(*hdev->user_interrupt),
1233 				GFP_KERNEL);
1234 
1235 		if (!hdev->user_interrupt) {
1236 			rc = -ENOMEM;
1237 			goto early_fini;
1238 		}
1239 	}
1240 
1241 	/*
1242 	 * Start calling ASIC initialization. First S/W then H/W and finally
1243 	 * late init
1244 	 */
1245 	rc = hdev->asic_funcs->sw_init(hdev);
1246 	if (rc)
1247 		goto user_interrupts_fini;
1248 
1249 	/*
1250 	 * Initialize the H/W queues. Must be done before hw_init, because
1251 	 * there the addresses of the kernel queue are being written to the
1252 	 * registers of the device
1253 	 */
1254 	rc = hl_hw_queues_create(hdev);
1255 	if (rc) {
1256 		dev_err(hdev->dev, "failed to initialize kernel queues\n");
1257 		goto sw_fini;
1258 	}
1259 
1260 	cq_cnt = hdev->asic_prop.completion_queues_count;
1261 
1262 	/*
1263 	 * Initialize the completion queues. Must be done before hw_init,
1264 	 * because there the addresses of the completion queues are being
1265 	 * passed as arguments to request_irq
1266 	 */
1267 	if (cq_cnt) {
1268 		hdev->completion_queue = kcalloc(cq_cnt,
1269 				sizeof(*hdev->completion_queue),
1270 				GFP_KERNEL);
1271 
1272 		if (!hdev->completion_queue) {
1273 			dev_err(hdev->dev,
1274 				"failed to allocate completion queues\n");
1275 			rc = -ENOMEM;
1276 			goto hw_queues_destroy;
1277 		}
1278 	}
1279 
1280 	for (i = 0, cq_ready_cnt = 0 ; i < cq_cnt ; i++, cq_ready_cnt++) {
1281 		rc = hl_cq_init(hdev, &hdev->completion_queue[i],
1282 				hdev->asic_funcs->get_queue_id_for_cq(hdev, i));
1283 		if (rc) {
1284 			dev_err(hdev->dev,
1285 				"failed to initialize completion queue\n");
1286 			goto cq_fini;
1287 		}
1288 		hdev->completion_queue[i].cq_idx = i;
1289 	}
1290 
1291 	/*
1292 	 * Initialize the event queue. Must be done before hw_init,
1293 	 * because there the address of the event queue is being
1294 	 * passed as argument to request_irq
1295 	 */
1296 	rc = hl_eq_init(hdev, &hdev->event_queue);
1297 	if (rc) {
1298 		dev_err(hdev->dev, "failed to initialize event queue\n");
1299 		goto cq_fini;
1300 	}
1301 
1302 	/* MMU S/W must be initialized before kernel context is created */
1303 	rc = hl_mmu_init(hdev);
1304 	if (rc) {
1305 		dev_err(hdev->dev, "Failed to initialize MMU S/W structures\n");
1306 		goto eq_fini;
1307 	}
1308 
1309 	/* Allocate the kernel context */
1310 	hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), GFP_KERNEL);
1311 	if (!hdev->kernel_ctx) {
1312 		rc = -ENOMEM;
1313 		goto mmu_fini;
1314 	}
1315 
1316 	hdev->compute_ctx = NULL;
1317 
1318 	hl_debugfs_add_device(hdev);
1319 
1320 	/* debugfs nodes are created in hl_ctx_init so it must be called after
1321 	 * hl_debugfs_add_device.
1322 	 */
1323 	rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
1324 	if (rc) {
1325 		dev_err(hdev->dev, "failed to initialize kernel context\n");
1326 		kfree(hdev->kernel_ctx);
1327 		goto remove_device_from_debugfs;
1328 	}
1329 
1330 	rc = hl_cb_pool_init(hdev);
1331 	if (rc) {
1332 		dev_err(hdev->dev, "failed to initialize CB pool\n");
1333 		goto release_ctx;
1334 	}
1335 
1336 	/*
1337 	 * From this point, in case of an error, add char devices and create
1338 	 * sysfs nodes as part of the error flow, to allow debugging.
1339 	 */
1340 	add_cdev_sysfs_on_err = true;
1341 
1342 	/* Device is now enabled as part of the initialization requires
1343 	 * communication with the device firmware to get information that
1344 	 * is required for the initialization itself
1345 	 */
1346 	hdev->disabled = false;
1347 
1348 	rc = hdev->asic_funcs->hw_init(hdev);
1349 	if (rc) {
1350 		dev_err(hdev->dev, "failed to initialize the H/W\n");
1351 		rc = 0;
1352 		goto out_disabled;
1353 	}
1354 
1355 	/* Check that the communication with the device is working */
1356 	rc = hdev->asic_funcs->test_queues(hdev);
1357 	if (rc) {
1358 		dev_err(hdev->dev, "Failed to detect if device is alive\n");
1359 		rc = 0;
1360 		goto out_disabled;
1361 	}
1362 
1363 	rc = device_late_init(hdev);
1364 	if (rc) {
1365 		dev_err(hdev->dev, "Failed late initialization\n");
1366 		rc = 0;
1367 		goto out_disabled;
1368 	}
1369 
1370 	dev_info(hdev->dev, "Found %s device with %lluGB DRAM\n",
1371 		hdev->asic_name,
1372 		hdev->asic_prop.dram_size / 1024 / 1024 / 1024);
1373 
1374 	rc = hl_vm_init(hdev);
1375 	if (rc) {
1376 		dev_err(hdev->dev, "Failed to initialize memory module\n");
1377 		rc = 0;
1378 		goto out_disabled;
1379 	}
1380 
1381 	/*
1382 	 * Expose devices and sysfs nodes to user.
1383 	 * From here there is no need to add char devices and create sysfs nodes
1384 	 * in case of an error.
1385 	 */
1386 	add_cdev_sysfs_on_err = false;
1387 	rc = device_cdev_sysfs_add(hdev);
1388 	if (rc) {
1389 		dev_err(hdev->dev,
1390 			"Failed to add char devices and sysfs nodes\n");
1391 		rc = 0;
1392 		goto out_disabled;
1393 	}
1394 
1395 	/* Need to call this again because the max power might change,
1396 	 * depending on card type for certain ASICs
1397 	 */
1398 	hl_set_max_power(hdev);
1399 
1400 	/*
1401 	 * hl_hwmon_init() must be called after device_late_init(), because only
1402 	 * there we get the information from the device about which
1403 	 * hwmon-related sensors the device supports.
1404 	 * Furthermore, it must be done after adding the device to the system.
1405 	 */
1406 	rc = hl_hwmon_init(hdev);
1407 	if (rc) {
1408 		dev_err(hdev->dev, "Failed to initialize hwmon\n");
1409 		rc = 0;
1410 		goto out_disabled;
1411 	}
1412 
1413 	dev_notice(hdev->dev,
1414 		"Successfully added device to habanalabs driver\n");
1415 
1416 	hdev->init_done = true;
1417 
1418 	/* After initialization is done, we are ready to receive events from
1419 	 * the F/W. We can't do it before because we will ignore events and if
1420 	 * those events are fatal, we won't know about it and the device will
1421 	 * be operational although it shouldn't be
1422 	 */
1423 	hdev->asic_funcs->enable_events_from_fw(hdev);
1424 
1425 	return 0;
1426 
1427 release_ctx:
1428 	if (hl_ctx_put(hdev->kernel_ctx) != 1)
1429 		dev_err(hdev->dev,
1430 			"kernel ctx is still alive on initialization failure\n");
1431 remove_device_from_debugfs:
1432 	hl_debugfs_remove_device(hdev);
1433 mmu_fini:
1434 	hl_mmu_fini(hdev);
1435 eq_fini:
1436 	hl_eq_fini(hdev, &hdev->event_queue);
1437 cq_fini:
1438 	for (i = 0 ; i < cq_ready_cnt ; i++)
1439 		hl_cq_fini(hdev, &hdev->completion_queue[i]);
1440 	kfree(hdev->completion_queue);
1441 hw_queues_destroy:
1442 	hl_hw_queues_destroy(hdev);
1443 sw_fini:
1444 	hdev->asic_funcs->sw_fini(hdev);
1445 user_interrupts_fini:
1446 	kfree(hdev->user_interrupt);
1447 early_fini:
1448 	device_early_fini(hdev);
1449 free_dev_ctrl:
1450 	put_device(hdev->dev_ctrl);
1451 free_dev:
1452 	put_device(hdev->dev);
1453 out_disabled:
1454 	hdev->disabled = true;
1455 	if (add_cdev_sysfs_on_err)
1456 		device_cdev_sysfs_add(hdev);
1457 	if (hdev->pdev)
1458 		dev_err(&hdev->pdev->dev,
1459 			"Failed to initialize hl%d. Device is NOT usable !\n",
1460 			hdev->id / 2);
1461 	else
1462 		pr_err("Failed to initialize hl%d. Device is NOT usable !\n",
1463 			hdev->id / 2);
1464 
1465 	return rc;
1466 }
1467 
1468 /*
1469  * hl_device_fini - main tear-down function for habanalabs device
1470  *
1471  * @hdev: pointer to habanalabs device structure
1472  *
1473  * Destroy the device, call ASIC fini functions and release the id
1474  */
hl_device_fini(struct hl_device * hdev)1475 void hl_device_fini(struct hl_device *hdev)
1476 {
1477 	ktime_t timeout;
1478 	int i, rc;
1479 
1480 	dev_info(hdev->dev, "Removing device\n");
1481 
1482 	hdev->device_fini_pending = 1;
1483 	flush_delayed_work(&hdev->device_reset_work.reset_work);
1484 
1485 	/*
1486 	 * This function is competing with the reset function, so try to
1487 	 * take the reset atomic and if we are already in middle of reset,
1488 	 * wait until reset function is finished. Reset function is designed
1489 	 * to always finish. However, in Gaudi, because of all the network
1490 	 * ports, the hard reset could take between 10-30 seconds
1491 	 */
1492 
1493 	timeout = ktime_add_us(ktime_get(),
1494 				HL_HARD_RESET_MAX_TIMEOUT * 1000 * 1000);
1495 	rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
1496 	while (rc) {
1497 		usleep_range(50, 200);
1498 		rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
1499 		if (ktime_compare(ktime_get(), timeout) > 0) {
1500 			dev_crit(hdev->dev,
1501 				"Failed to remove device because reset function did not finish\n");
1502 			return;
1503 		}
1504 	}
1505 
1506 	/* Disable PCI access from device F/W so it won't send us additional
1507 	 * interrupts. We disable MSI/MSI-X at the halt_engines function and we
1508 	 * can't have the F/W sending us interrupts after that. We need to
1509 	 * disable the access here because if the device is marked disable, the
1510 	 * message won't be send. Also, in case of heartbeat, the device CPU is
1511 	 * marked as disable so this message won't be sent
1512 	 */
1513 	hl_fw_send_pci_access_msg(hdev,	CPUCP_PACKET_DISABLE_PCI_ACCESS);
1514 
1515 	/* Mark device as disabled */
1516 	hdev->disabled = true;
1517 
1518 	/* Flush anyone that is inside the critical section of enqueue
1519 	 * jobs to the H/W
1520 	 */
1521 	hdev->asic_funcs->hw_queues_lock(hdev);
1522 	hdev->asic_funcs->hw_queues_unlock(hdev);
1523 
1524 	/* Flush anyone that is inside device open */
1525 	mutex_lock(&hdev->fpriv_list_lock);
1526 	mutex_unlock(&hdev->fpriv_list_lock);
1527 
1528 	hdev->hard_reset_pending = true;
1529 
1530 	hl_hwmon_fini(hdev);
1531 
1532 	device_late_fini(hdev);
1533 
1534 	/*
1535 	 * Halt the engines and disable interrupts so we won't get any more
1536 	 * completions from H/W and we won't have any accesses from the
1537 	 * H/W to the host machine
1538 	 */
1539 	hdev->asic_funcs->halt_engines(hdev, true);
1540 
1541 	/* Go over all the queues, release all CS and their jobs */
1542 	hl_cs_rollback_all(hdev);
1543 
1544 	/* Kill processes here after CS rollback. This is because the process
1545 	 * can't really exit until all its CSs are done, which is what we
1546 	 * do in cs rollback
1547 	 */
1548 	dev_info(hdev->dev,
1549 		"Waiting for all processes to exit (timeout of %u seconds)",
1550 		HL_PENDING_RESET_LONG_SEC);
1551 
1552 	rc = device_kill_open_processes(hdev, HL_PENDING_RESET_LONG_SEC);
1553 	if (rc) {
1554 		dev_crit(hdev->dev, "Failed to kill all open processes\n");
1555 		device_disable_open_processes(hdev);
1556 	}
1557 
1558 	hl_cb_pool_fini(hdev);
1559 
1560 	/* Reset the H/W. It will be in idle state after this returns */
1561 	hdev->asic_funcs->hw_fini(hdev, true);
1562 
1563 	/* Release kernel context */
1564 	if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1))
1565 		dev_err(hdev->dev, "kernel ctx is still alive\n");
1566 
1567 	hl_debugfs_remove_device(hdev);
1568 
1569 	hl_vm_fini(hdev);
1570 
1571 	hl_mmu_fini(hdev);
1572 
1573 	hl_eq_fini(hdev, &hdev->event_queue);
1574 
1575 	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1576 		hl_cq_fini(hdev, &hdev->completion_queue[i]);
1577 	kfree(hdev->completion_queue);
1578 	kfree(hdev->user_interrupt);
1579 
1580 	hl_hw_queues_destroy(hdev);
1581 
1582 	/* Call ASIC S/W finalize function */
1583 	hdev->asic_funcs->sw_fini(hdev);
1584 
1585 	device_early_fini(hdev);
1586 
1587 	/* Hide devices and sysfs nodes from user */
1588 	device_cdev_sysfs_del(hdev);
1589 
1590 	pr_info("removed device successfully\n");
1591 }
1592 
1593 /*
1594  * MMIO register access helper functions.
1595  */
1596 
1597 /*
1598  * hl_rreg - Read an MMIO register
1599  *
1600  * @hdev: pointer to habanalabs device structure
1601  * @reg: MMIO register offset (in bytes)
1602  *
1603  * Returns the value of the MMIO register we are asked to read
1604  *
1605  */
hl_rreg(struct hl_device * hdev,u32 reg)1606 inline u32 hl_rreg(struct hl_device *hdev, u32 reg)
1607 {
1608 	return readl(hdev->rmmio + reg);
1609 }
1610 
1611 /*
1612  * hl_wreg - Write to an MMIO register
1613  *
1614  * @hdev: pointer to habanalabs device structure
1615  * @reg: MMIO register offset (in bytes)
1616  * @val: 32-bit value
1617  *
1618  * Writes the 32-bit value into the MMIO register
1619  *
1620  */
hl_wreg(struct hl_device * hdev,u32 reg,u32 val)1621 inline void hl_wreg(struct hl_device *hdev, u32 reg, u32 val)
1622 {
1623 	writel(val, hdev->rmmio + reg);
1624 }
1625