1 // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
2 /* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved
3  * Copyright (c) 2016 Ivan Vecera <cera@cera.cz>
4  */
5 
6 #include <linux/kernel.h>
7 #include <linux/types.h>
8 #include <linux/device.h>
9 #include <linux/sysfs.h>
10 #include <linux/thermal.h>
11 #include <linux/err.h>
12 #include <linux/sfp.h>
13 
14 #include "core.h"
15 #include "core_env.h"
16 
17 #define MLXSW_THERMAL_POLL_INT	1000	/* ms */
18 #define MLXSW_THERMAL_SLOW_POLL_INT	20000	/* ms */
19 #define MLXSW_THERMAL_ASIC_TEMP_NORM	75000	/* 75C */
20 #define MLXSW_THERMAL_ASIC_TEMP_HIGH	85000	/* 85C */
21 #define MLXSW_THERMAL_ASIC_TEMP_HOT	105000	/* 105C */
22 #define MLXSW_THERMAL_HYSTERESIS_TEMP	5000	/* 5C */
23 #define MLXSW_THERMAL_MODULE_TEMP_SHIFT	(MLXSW_THERMAL_HYSTERESIS_TEMP * 2)
24 #define MLXSW_THERMAL_ZONE_MAX_NAME	16
25 #define MLXSW_THERMAL_TEMP_SCORE_MAX	GENMASK(31, 0)
26 #define MLXSW_THERMAL_MAX_STATE	10
27 #define MLXSW_THERMAL_MIN_STATE	2
28 #define MLXSW_THERMAL_MAX_DUTY	255
29 
30 /* External cooling devices, allowed for binding to mlxsw thermal zones. */
31 static char * const mlxsw_thermal_external_allowed_cdev[] = {
32 	"mlxreg_fan",
33 };
34 
35 enum mlxsw_thermal_trips {
36 	MLXSW_THERMAL_TEMP_TRIP_NORM,
37 	MLXSW_THERMAL_TEMP_TRIP_HIGH,
38 	MLXSW_THERMAL_TEMP_TRIP_HOT,
39 };
40 
41 struct mlxsw_thermal_trip {
42 	int	type;
43 	int	temp;
44 	int	hyst;
45 	int	min_state;
46 	int	max_state;
47 };
48 
49 static const struct mlxsw_thermal_trip default_thermal_trips[] = {
50 	{	/* In range - 0-40% PWM */
51 		.type		= THERMAL_TRIP_ACTIVE,
52 		.temp		= MLXSW_THERMAL_ASIC_TEMP_NORM,
53 		.hyst		= MLXSW_THERMAL_HYSTERESIS_TEMP,
54 		.min_state	= 0,
55 		.max_state	= (4 * MLXSW_THERMAL_MAX_STATE) / 10,
56 	},
57 	{
58 		/* In range - 40-100% PWM */
59 		.type		= THERMAL_TRIP_ACTIVE,
60 		.temp		= MLXSW_THERMAL_ASIC_TEMP_HIGH,
61 		.hyst		= MLXSW_THERMAL_HYSTERESIS_TEMP,
62 		.min_state	= (4 * MLXSW_THERMAL_MAX_STATE) / 10,
63 		.max_state	= MLXSW_THERMAL_MAX_STATE,
64 	},
65 	{	/* Warning */
66 		.type		= THERMAL_TRIP_HOT,
67 		.temp		= MLXSW_THERMAL_ASIC_TEMP_HOT,
68 		.min_state	= MLXSW_THERMAL_MAX_STATE,
69 		.max_state	= MLXSW_THERMAL_MAX_STATE,
70 	},
71 };
72 
73 #define MLXSW_THERMAL_NUM_TRIPS	ARRAY_SIZE(default_thermal_trips)
74 
75 /* Make sure all trips are writable */
76 #define MLXSW_THERMAL_TRIP_MASK	(BIT(MLXSW_THERMAL_NUM_TRIPS) - 1)
77 
78 struct mlxsw_thermal;
79 
80 struct mlxsw_thermal_module {
81 	struct mlxsw_thermal *parent;
82 	struct thermal_zone_device *tzdev;
83 	struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS];
84 	int module; /* Module or gearbox number */
85 };
86 
87 struct mlxsw_thermal {
88 	struct mlxsw_core *core;
89 	const struct mlxsw_bus_info *bus_info;
90 	struct thermal_zone_device *tzdev;
91 	int polling_delay;
92 	struct thermal_cooling_device *cdevs[MLXSW_MFCR_PWMS_MAX];
93 	u8 cooling_levels[MLXSW_THERMAL_MAX_STATE + 1];
94 	struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS];
95 	struct mlxsw_thermal_module *tz_module_arr;
96 	u8 tz_module_num;
97 	struct mlxsw_thermal_module *tz_gearbox_arr;
98 	u8 tz_gearbox_num;
99 	unsigned int tz_highest_score;
100 	struct thermal_zone_device *tz_highest_dev;
101 };
102 
103 static inline u8 mlxsw_state_to_duty(int state)
104 {
105 	return DIV_ROUND_CLOSEST(state * MLXSW_THERMAL_MAX_DUTY,
106 				 MLXSW_THERMAL_MAX_STATE);
107 }
108 
109 static inline int mlxsw_duty_to_state(u8 duty)
110 {
111 	return DIV_ROUND_CLOSEST(duty * MLXSW_THERMAL_MAX_STATE,
112 				 MLXSW_THERMAL_MAX_DUTY);
113 }
114 
115 static int mlxsw_get_cooling_device_idx(struct mlxsw_thermal *thermal,
116 					struct thermal_cooling_device *cdev)
117 {
118 	int i;
119 
120 	for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++)
121 		if (thermal->cdevs[i] == cdev)
122 			return i;
123 
124 	/* Allow mlxsw thermal zone binding to an external cooling device */
125 	for (i = 0; i < ARRAY_SIZE(mlxsw_thermal_external_allowed_cdev); i++) {
126 		if (strnstr(cdev->type, mlxsw_thermal_external_allowed_cdev[i],
127 			    strlen(cdev->type)))
128 			return 0;
129 	}
130 
131 	return -ENODEV;
132 }
133 
134 static void
135 mlxsw_thermal_module_trips_reset(struct mlxsw_thermal_module *tz)
136 {
137 	tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = 0;
138 	tz->trips[MLXSW_THERMAL_TEMP_TRIP_HIGH].temp = 0;
139 	tz->trips[MLXSW_THERMAL_TEMP_TRIP_HOT].temp = 0;
140 }
141 
142 static int
143 mlxsw_thermal_module_trips_update(struct device *dev, struct mlxsw_core *core,
144 				  struct mlxsw_thermal_module *tz,
145 				  int crit_temp, int emerg_temp)
146 {
147 	int err;
148 
149 	/* Do not try to query temperature thresholds directly from the module's
150 	 * EEPROM if we got valid thresholds from MTMP.
151 	 */
152 	if (!emerg_temp || !crit_temp) {
153 		err = mlxsw_env_module_temp_thresholds_get(core, tz->module,
154 							   SFP_TEMP_HIGH_WARN,
155 							   &crit_temp);
156 		if (err)
157 			return err;
158 
159 		err = mlxsw_env_module_temp_thresholds_get(core, tz->module,
160 							   SFP_TEMP_HIGH_ALARM,
161 							   &emerg_temp);
162 		if (err)
163 			return err;
164 	}
165 
166 	if (crit_temp > emerg_temp) {
167 		dev_warn(dev, "%s : Critical threshold %d is above emergency threshold %d\n",
168 			 tz->tzdev->type, crit_temp, emerg_temp);
169 		return 0;
170 	}
171 
172 	/* According to the system thermal requirements, the thermal zones are
173 	 * defined with three trip points. The critical and emergency
174 	 * temperature thresholds, provided by QSFP module are set as "active"
175 	 * and "hot" trip points, "normal" trip point is derived from "active"
176 	 * by subtracting double hysteresis value.
177 	 */
178 	if (crit_temp >= MLXSW_THERMAL_MODULE_TEMP_SHIFT)
179 		tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = crit_temp -
180 					MLXSW_THERMAL_MODULE_TEMP_SHIFT;
181 	else
182 		tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = crit_temp;
183 	tz->trips[MLXSW_THERMAL_TEMP_TRIP_HIGH].temp = crit_temp;
184 	tz->trips[MLXSW_THERMAL_TEMP_TRIP_HOT].temp = emerg_temp;
185 
186 	return 0;
187 }
188 
189 static void mlxsw_thermal_tz_score_update(struct mlxsw_thermal *thermal,
190 					  struct thermal_zone_device *tzdev,
191 					  struct mlxsw_thermal_trip *trips,
192 					  int temp)
193 {
194 	struct mlxsw_thermal_trip *trip = trips;
195 	unsigned int score, delta, i, shift = 1;
196 
197 	/* Calculate thermal zone score, if temperature is above the hot
198 	 * threshold score is set to MLXSW_THERMAL_TEMP_SCORE_MAX.
199 	 */
200 	score = MLXSW_THERMAL_TEMP_SCORE_MAX;
201 	for (i = MLXSW_THERMAL_TEMP_TRIP_NORM; i < MLXSW_THERMAL_NUM_TRIPS;
202 	     i++, trip++) {
203 		if (temp < trip->temp) {
204 			delta = DIV_ROUND_CLOSEST(temp, trip->temp - temp);
205 			score = delta * shift;
206 			break;
207 		}
208 		shift *= 256;
209 	}
210 
211 	if (score > thermal->tz_highest_score) {
212 		thermal->tz_highest_score = score;
213 		thermal->tz_highest_dev = tzdev;
214 	}
215 }
216 
217 static int mlxsw_thermal_bind(struct thermal_zone_device *tzdev,
218 			      struct thermal_cooling_device *cdev)
219 {
220 	struct mlxsw_thermal *thermal = tzdev->devdata;
221 	struct device *dev = thermal->bus_info->dev;
222 	int i, err;
223 
224 	/* If the cooling device is one of ours bind it */
225 	if (mlxsw_get_cooling_device_idx(thermal, cdev) < 0)
226 		return 0;
227 
228 	for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++) {
229 		const struct mlxsw_thermal_trip *trip = &thermal->trips[i];
230 
231 		err = thermal_zone_bind_cooling_device(tzdev, i, cdev,
232 						       trip->max_state,
233 						       trip->min_state,
234 						       THERMAL_WEIGHT_DEFAULT);
235 		if (err < 0) {
236 			dev_err(dev, "Failed to bind cooling device to trip %d\n", i);
237 			return err;
238 		}
239 	}
240 	return 0;
241 }
242 
243 static int mlxsw_thermal_unbind(struct thermal_zone_device *tzdev,
244 				struct thermal_cooling_device *cdev)
245 {
246 	struct mlxsw_thermal *thermal = tzdev->devdata;
247 	struct device *dev = thermal->bus_info->dev;
248 	int i;
249 	int err;
250 
251 	/* If the cooling device is our one unbind it */
252 	if (mlxsw_get_cooling_device_idx(thermal, cdev) < 0)
253 		return 0;
254 
255 	for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++) {
256 		err = thermal_zone_unbind_cooling_device(tzdev, i, cdev);
257 		if (err < 0) {
258 			dev_err(dev, "Failed to unbind cooling device\n");
259 			return err;
260 		}
261 	}
262 	return 0;
263 }
264 
265 static int mlxsw_thermal_get_temp(struct thermal_zone_device *tzdev,
266 				  int *p_temp)
267 {
268 	struct mlxsw_thermal *thermal = tzdev->devdata;
269 	struct device *dev = thermal->bus_info->dev;
270 	char mtmp_pl[MLXSW_REG_MTMP_LEN];
271 	int temp;
272 	int err;
273 
274 	mlxsw_reg_mtmp_pack(mtmp_pl, 0, false, false);
275 
276 	err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtmp), mtmp_pl);
277 	if (err) {
278 		dev_err(dev, "Failed to query temp sensor\n");
279 		return err;
280 	}
281 	mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL, NULL, NULL);
282 	if (temp > 0)
283 		mlxsw_thermal_tz_score_update(thermal, tzdev, thermal->trips,
284 					      temp);
285 
286 	*p_temp = temp;
287 	return 0;
288 }
289 
290 static int mlxsw_thermal_get_trip_type(struct thermal_zone_device *tzdev,
291 				       int trip,
292 				       enum thermal_trip_type *p_type)
293 {
294 	struct mlxsw_thermal *thermal = tzdev->devdata;
295 
296 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
297 		return -EINVAL;
298 
299 	*p_type = thermal->trips[trip].type;
300 	return 0;
301 }
302 
303 static int mlxsw_thermal_get_trip_temp(struct thermal_zone_device *tzdev,
304 				       int trip, int *p_temp)
305 {
306 	struct mlxsw_thermal *thermal = tzdev->devdata;
307 
308 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
309 		return -EINVAL;
310 
311 	*p_temp = thermal->trips[trip].temp;
312 	return 0;
313 }
314 
315 static int mlxsw_thermal_set_trip_temp(struct thermal_zone_device *tzdev,
316 				       int trip, int temp)
317 {
318 	struct mlxsw_thermal *thermal = tzdev->devdata;
319 
320 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
321 		return -EINVAL;
322 
323 	thermal->trips[trip].temp = temp;
324 	return 0;
325 }
326 
327 static int mlxsw_thermal_get_trip_hyst(struct thermal_zone_device *tzdev,
328 				       int trip, int *p_hyst)
329 {
330 	struct mlxsw_thermal *thermal = tzdev->devdata;
331 
332 	*p_hyst = thermal->trips[trip].hyst;
333 	return 0;
334 }
335 
336 static int mlxsw_thermal_set_trip_hyst(struct thermal_zone_device *tzdev,
337 				       int trip, int hyst)
338 {
339 	struct mlxsw_thermal *thermal = tzdev->devdata;
340 
341 	thermal->trips[trip].hyst = hyst;
342 	return 0;
343 }
344 
345 static int mlxsw_thermal_trend_get(struct thermal_zone_device *tzdev,
346 				   int trip, enum thermal_trend *trend)
347 {
348 	struct mlxsw_thermal *thermal = tzdev->devdata;
349 
350 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
351 		return -EINVAL;
352 
353 	if (tzdev == thermal->tz_highest_dev)
354 		return 1;
355 
356 	*trend = THERMAL_TREND_STABLE;
357 	return 0;
358 }
359 
360 static struct thermal_zone_params mlxsw_thermal_params = {
361 	.no_hwmon = true,
362 };
363 
364 static struct thermal_zone_device_ops mlxsw_thermal_ops = {
365 	.bind = mlxsw_thermal_bind,
366 	.unbind = mlxsw_thermal_unbind,
367 	.get_temp = mlxsw_thermal_get_temp,
368 	.get_trip_type	= mlxsw_thermal_get_trip_type,
369 	.get_trip_temp	= mlxsw_thermal_get_trip_temp,
370 	.set_trip_temp	= mlxsw_thermal_set_trip_temp,
371 	.get_trip_hyst	= mlxsw_thermal_get_trip_hyst,
372 	.set_trip_hyst	= mlxsw_thermal_set_trip_hyst,
373 	.get_trend	= mlxsw_thermal_trend_get,
374 };
375 
376 static int mlxsw_thermal_module_bind(struct thermal_zone_device *tzdev,
377 				     struct thermal_cooling_device *cdev)
378 {
379 	struct mlxsw_thermal_module *tz = tzdev->devdata;
380 	struct mlxsw_thermal *thermal = tz->parent;
381 	int i, j, err;
382 
383 	/* If the cooling device is one of ours bind it */
384 	if (mlxsw_get_cooling_device_idx(thermal, cdev) < 0)
385 		return 0;
386 
387 	for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++) {
388 		const struct mlxsw_thermal_trip *trip = &tz->trips[i];
389 
390 		err = thermal_zone_bind_cooling_device(tzdev, i, cdev,
391 						       trip->max_state,
392 						       trip->min_state,
393 						       THERMAL_WEIGHT_DEFAULT);
394 		if (err < 0)
395 			goto err_thermal_zone_bind_cooling_device;
396 	}
397 	return 0;
398 
399 err_thermal_zone_bind_cooling_device:
400 	for (j = i - 1; j >= 0; j--)
401 		thermal_zone_unbind_cooling_device(tzdev, j, cdev);
402 	return err;
403 }
404 
405 static int mlxsw_thermal_module_unbind(struct thermal_zone_device *tzdev,
406 				       struct thermal_cooling_device *cdev)
407 {
408 	struct mlxsw_thermal_module *tz = tzdev->devdata;
409 	struct mlxsw_thermal *thermal = tz->parent;
410 	int i;
411 	int err;
412 
413 	/* If the cooling device is one of ours unbind it */
414 	if (mlxsw_get_cooling_device_idx(thermal, cdev) < 0)
415 		return 0;
416 
417 	for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++) {
418 		err = thermal_zone_unbind_cooling_device(tzdev, i, cdev);
419 		WARN_ON(err);
420 	}
421 	return err;
422 }
423 
424 static void
425 mlxsw_thermal_module_temp_and_thresholds_get(struct mlxsw_core *core,
426 					     u16 sensor_index, int *p_temp,
427 					     int *p_crit_temp,
428 					     int *p_emerg_temp)
429 {
430 	char mtmp_pl[MLXSW_REG_MTMP_LEN];
431 	int err;
432 
433 	/* Read module temperature and thresholds. */
434 	mlxsw_reg_mtmp_pack(mtmp_pl, sensor_index, false, false);
435 	err = mlxsw_reg_query(core, MLXSW_REG(mtmp), mtmp_pl);
436 	if (err) {
437 		/* Set temperature and thresholds to zero to avoid passing
438 		 * uninitialized data back to the caller.
439 		 */
440 		*p_temp = 0;
441 		*p_crit_temp = 0;
442 		*p_emerg_temp = 0;
443 
444 		return;
445 	}
446 	mlxsw_reg_mtmp_unpack(mtmp_pl, p_temp, NULL, p_crit_temp, p_emerg_temp,
447 			      NULL);
448 }
449 
450 static int mlxsw_thermal_module_temp_get(struct thermal_zone_device *tzdev,
451 					 int *p_temp)
452 {
453 	struct mlxsw_thermal_module *tz = tzdev->devdata;
454 	struct mlxsw_thermal *thermal = tz->parent;
455 	int temp, crit_temp, emerg_temp;
456 	struct device *dev;
457 	u16 sensor_index;
458 	int err;
459 
460 	dev = thermal->bus_info->dev;
461 	sensor_index = MLXSW_REG_MTMP_MODULE_INDEX_MIN + tz->module;
462 
463 	/* Read module temperature and thresholds. */
464 	mlxsw_thermal_module_temp_and_thresholds_get(thermal->core,
465 						     sensor_index, &temp,
466 						     &crit_temp, &emerg_temp);
467 	*p_temp = temp;
468 
469 	if (!temp)
470 		return 0;
471 
472 	/* Update trip points. */
473 	err = mlxsw_thermal_module_trips_update(dev, thermal->core, tz,
474 						crit_temp, emerg_temp);
475 	if (!err && temp > 0)
476 		mlxsw_thermal_tz_score_update(thermal, tzdev, tz->trips, temp);
477 
478 	return 0;
479 }
480 
481 static int
482 mlxsw_thermal_module_trip_type_get(struct thermal_zone_device *tzdev, int trip,
483 				   enum thermal_trip_type *p_type)
484 {
485 	struct mlxsw_thermal_module *tz = tzdev->devdata;
486 
487 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
488 		return -EINVAL;
489 
490 	*p_type = tz->trips[trip].type;
491 	return 0;
492 }
493 
494 static int
495 mlxsw_thermal_module_trip_temp_get(struct thermal_zone_device *tzdev,
496 				   int trip, int *p_temp)
497 {
498 	struct mlxsw_thermal_module *tz = tzdev->devdata;
499 
500 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
501 		return -EINVAL;
502 
503 	*p_temp = tz->trips[trip].temp;
504 	return 0;
505 }
506 
507 static int
508 mlxsw_thermal_module_trip_temp_set(struct thermal_zone_device *tzdev,
509 				   int trip, int temp)
510 {
511 	struct mlxsw_thermal_module *tz = tzdev->devdata;
512 
513 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
514 		return -EINVAL;
515 
516 	tz->trips[trip].temp = temp;
517 	return 0;
518 }
519 
520 static int
521 mlxsw_thermal_module_trip_hyst_get(struct thermal_zone_device *tzdev, int trip,
522 				   int *p_hyst)
523 {
524 	struct mlxsw_thermal_module *tz = tzdev->devdata;
525 
526 	*p_hyst = tz->trips[trip].hyst;
527 	return 0;
528 }
529 
530 static int
531 mlxsw_thermal_module_trip_hyst_set(struct thermal_zone_device *tzdev, int trip,
532 				   int hyst)
533 {
534 	struct mlxsw_thermal_module *tz = tzdev->devdata;
535 
536 	tz->trips[trip].hyst = hyst;
537 	return 0;
538 }
539 
540 static int mlxsw_thermal_module_trend_get(struct thermal_zone_device *tzdev,
541 					  int trip, enum thermal_trend *trend)
542 {
543 	struct mlxsw_thermal_module *tz = tzdev->devdata;
544 	struct mlxsw_thermal *thermal = tz->parent;
545 
546 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
547 		return -EINVAL;
548 
549 	if (tzdev == thermal->tz_highest_dev)
550 		return 1;
551 
552 	*trend = THERMAL_TREND_STABLE;
553 	return 0;
554 }
555 
556 static struct thermal_zone_device_ops mlxsw_thermal_module_ops = {
557 	.bind		= mlxsw_thermal_module_bind,
558 	.unbind		= mlxsw_thermal_module_unbind,
559 	.get_temp	= mlxsw_thermal_module_temp_get,
560 	.get_trip_type	= mlxsw_thermal_module_trip_type_get,
561 	.get_trip_temp	= mlxsw_thermal_module_trip_temp_get,
562 	.set_trip_temp	= mlxsw_thermal_module_trip_temp_set,
563 	.get_trip_hyst	= mlxsw_thermal_module_trip_hyst_get,
564 	.set_trip_hyst	= mlxsw_thermal_module_trip_hyst_set,
565 	.get_trend	= mlxsw_thermal_module_trend_get,
566 };
567 
568 static int mlxsw_thermal_gearbox_temp_get(struct thermal_zone_device *tzdev,
569 					  int *p_temp)
570 {
571 	struct mlxsw_thermal_module *tz = tzdev->devdata;
572 	struct mlxsw_thermal *thermal = tz->parent;
573 	char mtmp_pl[MLXSW_REG_MTMP_LEN];
574 	u16 index;
575 	int temp;
576 	int err;
577 
578 	index = MLXSW_REG_MTMP_GBOX_INDEX_MIN + tz->module;
579 	mlxsw_reg_mtmp_pack(mtmp_pl, index, false, false);
580 
581 	err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtmp), mtmp_pl);
582 	if (err)
583 		return err;
584 
585 	mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL, NULL, NULL);
586 	if (temp > 0)
587 		mlxsw_thermal_tz_score_update(thermal, tzdev, tz->trips, temp);
588 
589 	*p_temp = temp;
590 	return 0;
591 }
592 
593 static struct thermal_zone_device_ops mlxsw_thermal_gearbox_ops = {
594 	.bind		= mlxsw_thermal_module_bind,
595 	.unbind		= mlxsw_thermal_module_unbind,
596 	.get_temp	= mlxsw_thermal_gearbox_temp_get,
597 	.get_trip_type	= mlxsw_thermal_module_trip_type_get,
598 	.get_trip_temp	= mlxsw_thermal_module_trip_temp_get,
599 	.set_trip_temp	= mlxsw_thermal_module_trip_temp_set,
600 	.get_trip_hyst	= mlxsw_thermal_module_trip_hyst_get,
601 	.set_trip_hyst	= mlxsw_thermal_module_trip_hyst_set,
602 	.get_trend	= mlxsw_thermal_module_trend_get,
603 };
604 
605 static int mlxsw_thermal_get_max_state(struct thermal_cooling_device *cdev,
606 				       unsigned long *p_state)
607 {
608 	*p_state = MLXSW_THERMAL_MAX_STATE;
609 	return 0;
610 }
611 
612 static int mlxsw_thermal_get_cur_state(struct thermal_cooling_device *cdev,
613 				       unsigned long *p_state)
614 
615 {
616 	struct mlxsw_thermal *thermal = cdev->devdata;
617 	struct device *dev = thermal->bus_info->dev;
618 	char mfsc_pl[MLXSW_REG_MFSC_LEN];
619 	int err, idx;
620 	u8 duty;
621 
622 	idx = mlxsw_get_cooling_device_idx(thermal, cdev);
623 	if (idx < 0)
624 		return idx;
625 
626 	mlxsw_reg_mfsc_pack(mfsc_pl, idx, 0);
627 	err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfsc), mfsc_pl);
628 	if (err) {
629 		dev_err(dev, "Failed to query PWM duty\n");
630 		return err;
631 	}
632 
633 	duty = mlxsw_reg_mfsc_pwm_duty_cycle_get(mfsc_pl);
634 	*p_state = mlxsw_duty_to_state(duty);
635 	return 0;
636 }
637 
638 static int mlxsw_thermal_set_cur_state(struct thermal_cooling_device *cdev,
639 				       unsigned long state)
640 
641 {
642 	struct mlxsw_thermal *thermal = cdev->devdata;
643 	struct device *dev = thermal->bus_info->dev;
644 	char mfsc_pl[MLXSW_REG_MFSC_LEN];
645 	int idx;
646 	int err;
647 
648 	if (state > MLXSW_THERMAL_MAX_STATE)
649 		return -EINVAL;
650 
651 	idx = mlxsw_get_cooling_device_idx(thermal, cdev);
652 	if (idx < 0)
653 		return idx;
654 
655 	/* Normalize the state to the valid speed range. */
656 	state = thermal->cooling_levels[state];
657 	mlxsw_reg_mfsc_pack(mfsc_pl, idx, mlxsw_state_to_duty(state));
658 	err = mlxsw_reg_write(thermal->core, MLXSW_REG(mfsc), mfsc_pl);
659 	if (err) {
660 		dev_err(dev, "Failed to write PWM duty\n");
661 		return err;
662 	}
663 	return 0;
664 }
665 
666 static const struct thermal_cooling_device_ops mlxsw_cooling_ops = {
667 	.get_max_state	= mlxsw_thermal_get_max_state,
668 	.get_cur_state	= mlxsw_thermal_get_cur_state,
669 	.set_cur_state	= mlxsw_thermal_set_cur_state,
670 };
671 
672 static int
673 mlxsw_thermal_module_tz_init(struct mlxsw_thermal_module *module_tz)
674 {
675 	char tz_name[MLXSW_THERMAL_ZONE_MAX_NAME];
676 	int err;
677 
678 	snprintf(tz_name, sizeof(tz_name), "mlxsw-module%d",
679 		 module_tz->module + 1);
680 	module_tz->tzdev = thermal_zone_device_register(tz_name,
681 							MLXSW_THERMAL_NUM_TRIPS,
682 							MLXSW_THERMAL_TRIP_MASK,
683 							module_tz,
684 							&mlxsw_thermal_module_ops,
685 							&mlxsw_thermal_params,
686 							0,
687 							module_tz->parent->polling_delay);
688 	if (IS_ERR(module_tz->tzdev)) {
689 		err = PTR_ERR(module_tz->tzdev);
690 		return err;
691 	}
692 
693 	err = thermal_zone_device_enable(module_tz->tzdev);
694 	if (err)
695 		thermal_zone_device_unregister(module_tz->tzdev);
696 
697 	return err;
698 }
699 
700 static void mlxsw_thermal_module_tz_fini(struct thermal_zone_device *tzdev)
701 {
702 	thermal_zone_device_unregister(tzdev);
703 }
704 
705 static int
706 mlxsw_thermal_module_init(struct device *dev, struct mlxsw_core *core,
707 			  struct mlxsw_thermal *thermal, u8 module)
708 {
709 	struct mlxsw_thermal_module *module_tz;
710 	int dummy_temp, crit_temp, emerg_temp;
711 	u16 sensor_index;
712 
713 	sensor_index = MLXSW_REG_MTMP_MODULE_INDEX_MIN + module;
714 	module_tz = &thermal->tz_module_arr[module];
715 	/* Skip if parent is already set (case of port split). */
716 	if (module_tz->parent)
717 		return 0;
718 	module_tz->module = module;
719 	module_tz->parent = thermal;
720 	memcpy(module_tz->trips, default_thermal_trips,
721 	       sizeof(thermal->trips));
722 	/* Initialize all trip point. */
723 	mlxsw_thermal_module_trips_reset(module_tz);
724 	/* Read module temperature and thresholds. */
725 	mlxsw_thermal_module_temp_and_thresholds_get(core, sensor_index, &dummy_temp,
726 						     &crit_temp, &emerg_temp);
727 	/* Update trip point according to the module data. */
728 	return mlxsw_thermal_module_trips_update(dev, core, module_tz,
729 						 crit_temp, emerg_temp);
730 }
731 
732 static void mlxsw_thermal_module_fini(struct mlxsw_thermal_module *module_tz)
733 {
734 	if (module_tz && module_tz->tzdev) {
735 		mlxsw_thermal_module_tz_fini(module_tz->tzdev);
736 		module_tz->tzdev = NULL;
737 		module_tz->parent = NULL;
738 	}
739 }
740 
741 static int
742 mlxsw_thermal_modules_init(struct device *dev, struct mlxsw_core *core,
743 			   struct mlxsw_thermal *thermal)
744 {
745 	struct mlxsw_thermal_module *module_tz;
746 	char mgpir_pl[MLXSW_REG_MGPIR_LEN];
747 	int i, err;
748 
749 	mlxsw_reg_mgpir_pack(mgpir_pl);
750 	err = mlxsw_reg_query(core, MLXSW_REG(mgpir), mgpir_pl);
751 	if (err)
752 		return err;
753 
754 	mlxsw_reg_mgpir_unpack(mgpir_pl, NULL, NULL, NULL,
755 			       &thermal->tz_module_num);
756 
757 	thermal->tz_module_arr = kcalloc(thermal->tz_module_num,
758 					 sizeof(*thermal->tz_module_arr),
759 					 GFP_KERNEL);
760 	if (!thermal->tz_module_arr)
761 		return -ENOMEM;
762 
763 	for (i = 0; i < thermal->tz_module_num; i++) {
764 		err = mlxsw_thermal_module_init(dev, core, thermal, i);
765 		if (err)
766 			goto err_thermal_module_init;
767 	}
768 
769 	for (i = 0; i < thermal->tz_module_num; i++) {
770 		module_tz = &thermal->tz_module_arr[i];
771 		if (!module_tz->parent)
772 			continue;
773 		err = mlxsw_thermal_module_tz_init(module_tz);
774 		if (err)
775 			goto err_thermal_module_tz_init;
776 	}
777 
778 	return 0;
779 
780 err_thermal_module_tz_init:
781 err_thermal_module_init:
782 	for (i = thermal->tz_module_num - 1; i >= 0; i--)
783 		mlxsw_thermal_module_fini(&thermal->tz_module_arr[i]);
784 	kfree(thermal->tz_module_arr);
785 	return err;
786 }
787 
788 static void
789 mlxsw_thermal_modules_fini(struct mlxsw_thermal *thermal)
790 {
791 	int i;
792 
793 	for (i = thermal->tz_module_num - 1; i >= 0; i--)
794 		mlxsw_thermal_module_fini(&thermal->tz_module_arr[i]);
795 	kfree(thermal->tz_module_arr);
796 }
797 
798 static int
799 mlxsw_thermal_gearbox_tz_init(struct mlxsw_thermal_module *gearbox_tz)
800 {
801 	char tz_name[MLXSW_THERMAL_ZONE_MAX_NAME];
802 	int ret;
803 
804 	snprintf(tz_name, sizeof(tz_name), "mlxsw-gearbox%d",
805 		 gearbox_tz->module + 1);
806 	gearbox_tz->tzdev = thermal_zone_device_register(tz_name,
807 						MLXSW_THERMAL_NUM_TRIPS,
808 						MLXSW_THERMAL_TRIP_MASK,
809 						gearbox_tz,
810 						&mlxsw_thermal_gearbox_ops,
811 						&mlxsw_thermal_params, 0,
812 						gearbox_tz->parent->polling_delay);
813 	if (IS_ERR(gearbox_tz->tzdev))
814 		return PTR_ERR(gearbox_tz->tzdev);
815 
816 	ret = thermal_zone_device_enable(gearbox_tz->tzdev);
817 	if (ret)
818 		thermal_zone_device_unregister(gearbox_tz->tzdev);
819 
820 	return ret;
821 }
822 
823 static void
824 mlxsw_thermal_gearbox_tz_fini(struct mlxsw_thermal_module *gearbox_tz)
825 {
826 	thermal_zone_device_unregister(gearbox_tz->tzdev);
827 }
828 
829 static int
830 mlxsw_thermal_gearboxes_init(struct device *dev, struct mlxsw_core *core,
831 			     struct mlxsw_thermal *thermal)
832 {
833 	enum mlxsw_reg_mgpir_device_type device_type;
834 	struct mlxsw_thermal_module *gearbox_tz;
835 	char mgpir_pl[MLXSW_REG_MGPIR_LEN];
836 	u8 gbox_num;
837 	int i;
838 	int err;
839 
840 	mlxsw_reg_mgpir_pack(mgpir_pl);
841 	err = mlxsw_reg_query(core, MLXSW_REG(mgpir), mgpir_pl);
842 	if (err)
843 		return err;
844 
845 	mlxsw_reg_mgpir_unpack(mgpir_pl, &gbox_num, &device_type, NULL,
846 			       NULL);
847 	if (device_type != MLXSW_REG_MGPIR_DEVICE_TYPE_GEARBOX_DIE ||
848 	    !gbox_num)
849 		return 0;
850 
851 	thermal->tz_gearbox_num = gbox_num;
852 	thermal->tz_gearbox_arr = kcalloc(thermal->tz_gearbox_num,
853 					  sizeof(*thermal->tz_gearbox_arr),
854 					  GFP_KERNEL);
855 	if (!thermal->tz_gearbox_arr)
856 		return -ENOMEM;
857 
858 	for (i = 0; i < thermal->tz_gearbox_num; i++) {
859 		gearbox_tz = &thermal->tz_gearbox_arr[i];
860 		memcpy(gearbox_tz->trips, default_thermal_trips,
861 		       sizeof(thermal->trips));
862 		gearbox_tz->module = i;
863 		gearbox_tz->parent = thermal;
864 		err = mlxsw_thermal_gearbox_tz_init(gearbox_tz);
865 		if (err)
866 			goto err_thermal_gearbox_tz_init;
867 	}
868 
869 	return 0;
870 
871 err_thermal_gearbox_tz_init:
872 	for (i--; i >= 0; i--)
873 		mlxsw_thermal_gearbox_tz_fini(&thermal->tz_gearbox_arr[i]);
874 	kfree(thermal->tz_gearbox_arr);
875 	return err;
876 }
877 
878 static void
879 mlxsw_thermal_gearboxes_fini(struct mlxsw_thermal *thermal)
880 {
881 	int i;
882 
883 	for (i = thermal->tz_gearbox_num - 1; i >= 0; i--)
884 		mlxsw_thermal_gearbox_tz_fini(&thermal->tz_gearbox_arr[i]);
885 	kfree(thermal->tz_gearbox_arr);
886 }
887 
888 int mlxsw_thermal_init(struct mlxsw_core *core,
889 		       const struct mlxsw_bus_info *bus_info,
890 		       struct mlxsw_thermal **p_thermal)
891 {
892 	char mfcr_pl[MLXSW_REG_MFCR_LEN] = { 0 };
893 	enum mlxsw_reg_mfcr_pwm_frequency freq;
894 	struct device *dev = bus_info->dev;
895 	struct mlxsw_thermal *thermal;
896 	u16 tacho_active;
897 	u8 pwm_active;
898 	int err, i;
899 
900 	thermal = devm_kzalloc(dev, sizeof(*thermal),
901 			       GFP_KERNEL);
902 	if (!thermal)
903 		return -ENOMEM;
904 
905 	thermal->core = core;
906 	thermal->bus_info = bus_info;
907 	memcpy(thermal->trips, default_thermal_trips, sizeof(thermal->trips));
908 
909 	err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfcr), mfcr_pl);
910 	if (err) {
911 		dev_err(dev, "Failed to probe PWMs\n");
912 		goto err_reg_query;
913 	}
914 	mlxsw_reg_mfcr_unpack(mfcr_pl, &freq, &tacho_active, &pwm_active);
915 
916 	for (i = 0; i < MLXSW_MFCR_TACHOS_MAX; i++) {
917 		if (tacho_active & BIT(i)) {
918 			char mfsl_pl[MLXSW_REG_MFSL_LEN];
919 
920 			mlxsw_reg_mfsl_pack(mfsl_pl, i, 0, 0);
921 
922 			/* We need to query the register to preserve maximum */
923 			err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfsl),
924 					      mfsl_pl);
925 			if (err)
926 				goto err_reg_query;
927 
928 			/* set the minimal RPMs to 0 */
929 			mlxsw_reg_mfsl_tach_min_set(mfsl_pl, 0);
930 			err = mlxsw_reg_write(thermal->core, MLXSW_REG(mfsl),
931 					      mfsl_pl);
932 			if (err)
933 				goto err_reg_write;
934 		}
935 	}
936 	for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++) {
937 		if (pwm_active & BIT(i)) {
938 			struct thermal_cooling_device *cdev;
939 
940 			cdev = thermal_cooling_device_register("mlxsw_fan",
941 							       thermal,
942 							       &mlxsw_cooling_ops);
943 			if (IS_ERR(cdev)) {
944 				err = PTR_ERR(cdev);
945 				dev_err(dev, "Failed to register cooling device\n");
946 				goto err_thermal_cooling_device_register;
947 			}
948 			thermal->cdevs[i] = cdev;
949 		}
950 	}
951 
952 	/* Initialize cooling levels per PWM state. */
953 	for (i = 0; i < MLXSW_THERMAL_MAX_STATE; i++)
954 		thermal->cooling_levels[i] = max(MLXSW_THERMAL_MIN_STATE, i);
955 
956 	thermal->polling_delay = bus_info->low_frequency ?
957 				 MLXSW_THERMAL_SLOW_POLL_INT :
958 				 MLXSW_THERMAL_POLL_INT;
959 
960 	thermal->tzdev = thermal_zone_device_register("mlxsw",
961 						      MLXSW_THERMAL_NUM_TRIPS,
962 						      MLXSW_THERMAL_TRIP_MASK,
963 						      thermal,
964 						      &mlxsw_thermal_ops,
965 						      &mlxsw_thermal_params, 0,
966 						      thermal->polling_delay);
967 	if (IS_ERR(thermal->tzdev)) {
968 		err = PTR_ERR(thermal->tzdev);
969 		dev_err(dev, "Failed to register thermal zone\n");
970 		goto err_thermal_zone_device_register;
971 	}
972 
973 	err = mlxsw_thermal_modules_init(dev, core, thermal);
974 	if (err)
975 		goto err_thermal_modules_init;
976 
977 	err = mlxsw_thermal_gearboxes_init(dev, core, thermal);
978 	if (err)
979 		goto err_thermal_gearboxes_init;
980 
981 	err = thermal_zone_device_enable(thermal->tzdev);
982 	if (err)
983 		goto err_thermal_zone_device_enable;
984 
985 	*p_thermal = thermal;
986 	return 0;
987 
988 err_thermal_zone_device_enable:
989 	mlxsw_thermal_gearboxes_fini(thermal);
990 err_thermal_gearboxes_init:
991 	mlxsw_thermal_modules_fini(thermal);
992 err_thermal_modules_init:
993 	if (thermal->tzdev) {
994 		thermal_zone_device_unregister(thermal->tzdev);
995 		thermal->tzdev = NULL;
996 	}
997 err_thermal_zone_device_register:
998 err_thermal_cooling_device_register:
999 	for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++)
1000 		if (thermal->cdevs[i])
1001 			thermal_cooling_device_unregister(thermal->cdevs[i]);
1002 err_reg_write:
1003 err_reg_query:
1004 	devm_kfree(dev, thermal);
1005 	return err;
1006 }
1007 
1008 void mlxsw_thermal_fini(struct mlxsw_thermal *thermal)
1009 {
1010 	int i;
1011 
1012 	mlxsw_thermal_gearboxes_fini(thermal);
1013 	mlxsw_thermal_modules_fini(thermal);
1014 	if (thermal->tzdev) {
1015 		thermal_zone_device_unregister(thermal->tzdev);
1016 		thermal->tzdev = NULL;
1017 	}
1018 
1019 	for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++) {
1020 		if (thermal->cdevs[i]) {
1021 			thermal_cooling_device_unregister(thermal->cdevs[i]);
1022 			thermal->cdevs[i] = NULL;
1023 		}
1024 	}
1025 
1026 	devm_kfree(thermal->bus_info->dev, thermal);
1027 }
1028