1 // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
2 /* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved
3  * Copyright (c) 2016 Ivan Vecera <cera@cera.cz>
4  */
5 
6 #include <linux/kernel.h>
7 #include <linux/types.h>
8 #include <linux/device.h>
9 #include <linux/sysfs.h>
10 #include <linux/thermal.h>
11 #include <linux/err.h>
12 #include <linux/sfp.h>
13 
14 #include "core.h"
15 #include "core_env.h"
16 
17 #define MLXSW_THERMAL_POLL_INT	1000	/* ms */
18 #define MLXSW_THERMAL_SLOW_POLL_INT	20000	/* ms */
19 #define MLXSW_THERMAL_ASIC_TEMP_NORM	75000	/* 75C */
20 #define MLXSW_THERMAL_ASIC_TEMP_HIGH	85000	/* 85C */
21 #define MLXSW_THERMAL_ASIC_TEMP_HOT	105000	/* 105C */
22 #define MLXSW_THERMAL_ASIC_TEMP_CRIT	110000	/* 110C */
23 #define MLXSW_THERMAL_HYSTERESIS_TEMP	5000	/* 5C */
24 #define MLXSW_THERMAL_MODULE_TEMP_SHIFT	(MLXSW_THERMAL_HYSTERESIS_TEMP * 2)
25 #define MLXSW_THERMAL_ZONE_MAX_NAME	16
26 #define MLXSW_THERMAL_TEMP_SCORE_MAX	GENMASK(31, 0)
27 #define MLXSW_THERMAL_MAX_STATE	10
28 #define MLXSW_THERMAL_MAX_DUTY	255
29 /* Minimum and maximum fan allowed speed in percent: from 20% to 100%. Values
30  * MLXSW_THERMAL_MAX_STATE + x, where x is between 2 and 10 are used for
31  * setting fan speed dynamic minimum. For example, if value is set to 14 (40%)
32  * cooling levels vector will be set to 4, 4, 4, 4, 4, 5, 6, 7, 8, 9, 10 to
33  * introduce PWM speed in percent: 40, 40, 40, 40, 40, 50, 60. 70, 80, 90, 100.
34  */
35 #define MLXSW_THERMAL_SPEED_MIN		(MLXSW_THERMAL_MAX_STATE + 2)
36 #define MLXSW_THERMAL_SPEED_MAX		(MLXSW_THERMAL_MAX_STATE * 2)
37 #define MLXSW_THERMAL_SPEED_MIN_LEVEL	2		/* 20% */
38 
39 /* External cooling devices, allowed for binding to mlxsw thermal zones. */
40 static char * const mlxsw_thermal_external_allowed_cdev[] = {
41 	"mlxreg_fan",
42 };
43 
44 enum mlxsw_thermal_trips {
45 	MLXSW_THERMAL_TEMP_TRIP_NORM,
46 	MLXSW_THERMAL_TEMP_TRIP_HIGH,
47 	MLXSW_THERMAL_TEMP_TRIP_HOT,
48 	MLXSW_THERMAL_TEMP_TRIP_CRIT,
49 };
50 
51 struct mlxsw_thermal_trip {
52 	int	type;
53 	int	temp;
54 	int	hyst;
55 	int	min_state;
56 	int	max_state;
57 };
58 
59 static const struct mlxsw_thermal_trip default_thermal_trips[] = {
60 	{	/* In range - 0-40% PWM */
61 		.type		= THERMAL_TRIP_ACTIVE,
62 		.temp		= MLXSW_THERMAL_ASIC_TEMP_NORM,
63 		.hyst		= MLXSW_THERMAL_HYSTERESIS_TEMP,
64 		.min_state	= 0,
65 		.max_state	= (4 * MLXSW_THERMAL_MAX_STATE) / 10,
66 	},
67 	{
68 		/* In range - 40-100% PWM */
69 		.type		= THERMAL_TRIP_ACTIVE,
70 		.temp		= MLXSW_THERMAL_ASIC_TEMP_HIGH,
71 		.hyst		= MLXSW_THERMAL_HYSTERESIS_TEMP,
72 		.min_state	= (4 * MLXSW_THERMAL_MAX_STATE) / 10,
73 		.max_state	= MLXSW_THERMAL_MAX_STATE,
74 	},
75 	{	/* Warning */
76 		.type		= THERMAL_TRIP_HOT,
77 		.temp		= MLXSW_THERMAL_ASIC_TEMP_HOT,
78 		.hyst		= MLXSW_THERMAL_HYSTERESIS_TEMP,
79 		.min_state	= MLXSW_THERMAL_MAX_STATE,
80 		.max_state	= MLXSW_THERMAL_MAX_STATE,
81 	},
82 	{	/* Critical - soft poweroff */
83 		.type		= THERMAL_TRIP_CRITICAL,
84 		.temp		= MLXSW_THERMAL_ASIC_TEMP_CRIT,
85 		.min_state	= MLXSW_THERMAL_MAX_STATE,
86 		.max_state	= MLXSW_THERMAL_MAX_STATE,
87 	}
88 };
89 
90 #define MLXSW_THERMAL_NUM_TRIPS	ARRAY_SIZE(default_thermal_trips)
91 
92 /* Make sure all trips are writable */
93 #define MLXSW_THERMAL_TRIP_MASK	(BIT(MLXSW_THERMAL_NUM_TRIPS) - 1)
94 
95 struct mlxsw_thermal;
96 
97 struct mlxsw_thermal_module {
98 	struct mlxsw_thermal *parent;
99 	struct thermal_zone_device *tzdev;
100 	struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS];
101 	enum thermal_device_mode mode;
102 	int module; /* Module or gearbox number */
103 };
104 
105 struct mlxsw_thermal {
106 	struct mlxsw_core *core;
107 	const struct mlxsw_bus_info *bus_info;
108 	struct thermal_zone_device *tzdev;
109 	int polling_delay;
110 	struct thermal_cooling_device *cdevs[MLXSW_MFCR_PWMS_MAX];
111 	u8 cooling_levels[MLXSW_THERMAL_MAX_STATE + 1];
112 	struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS];
113 	enum thermal_device_mode mode;
114 	struct mlxsw_thermal_module *tz_module_arr;
115 	u8 tz_module_num;
116 	struct mlxsw_thermal_module *tz_gearbox_arr;
117 	u8 tz_gearbox_num;
118 	unsigned int tz_highest_score;
119 	struct thermal_zone_device *tz_highest_dev;
120 };
121 
122 static inline u8 mlxsw_state_to_duty(int state)
123 {
124 	return DIV_ROUND_CLOSEST(state * MLXSW_THERMAL_MAX_DUTY,
125 				 MLXSW_THERMAL_MAX_STATE);
126 }
127 
128 static inline int mlxsw_duty_to_state(u8 duty)
129 {
130 	return DIV_ROUND_CLOSEST(duty * MLXSW_THERMAL_MAX_STATE,
131 				 MLXSW_THERMAL_MAX_DUTY);
132 }
133 
134 static int mlxsw_get_cooling_device_idx(struct mlxsw_thermal *thermal,
135 					struct thermal_cooling_device *cdev)
136 {
137 	int i;
138 
139 	for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++)
140 		if (thermal->cdevs[i] == cdev)
141 			return i;
142 
143 	/* Allow mlxsw thermal zone binding to an external cooling device */
144 	for (i = 0; i < ARRAY_SIZE(mlxsw_thermal_external_allowed_cdev); i++) {
145 		if (strnstr(cdev->type, mlxsw_thermal_external_allowed_cdev[i],
146 			    sizeof(cdev->type)))
147 			return 0;
148 	}
149 
150 	return -ENODEV;
151 }
152 
153 static void
154 mlxsw_thermal_module_trips_reset(struct mlxsw_thermal_module *tz)
155 {
156 	tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = 0;
157 	tz->trips[MLXSW_THERMAL_TEMP_TRIP_HIGH].temp = 0;
158 	tz->trips[MLXSW_THERMAL_TEMP_TRIP_HOT].temp = 0;
159 	tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp = 0;
160 }
161 
162 static int
163 mlxsw_thermal_module_trips_update(struct device *dev, struct mlxsw_core *core,
164 				  struct mlxsw_thermal_module *tz)
165 {
166 	int crit_temp, emerg_temp;
167 	int err;
168 
169 	err = mlxsw_env_module_temp_thresholds_get(core, tz->module,
170 						   SFP_TEMP_HIGH_WARN,
171 						   &crit_temp);
172 	if (err)
173 		return err;
174 
175 	err = mlxsw_env_module_temp_thresholds_get(core, tz->module,
176 						   SFP_TEMP_HIGH_ALARM,
177 						   &emerg_temp);
178 	if (err)
179 		return err;
180 
181 	/* According to the system thermal requirements, the thermal zones are
182 	 * defined with four trip points. The critical and emergency
183 	 * temperature thresholds, provided by QSFP module are set as "active"
184 	 * and "hot" trip points, "normal" and "critical" trip points are
185 	 * derived from "active" and "hot" by subtracting or adding double
186 	 * hysteresis value.
187 	 */
188 	if (crit_temp >= MLXSW_THERMAL_MODULE_TEMP_SHIFT)
189 		tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = crit_temp -
190 					MLXSW_THERMAL_MODULE_TEMP_SHIFT;
191 	else
192 		tz->trips[MLXSW_THERMAL_TEMP_TRIP_NORM].temp = crit_temp;
193 	tz->trips[MLXSW_THERMAL_TEMP_TRIP_HIGH].temp = crit_temp;
194 	tz->trips[MLXSW_THERMAL_TEMP_TRIP_HOT].temp = emerg_temp;
195 	if (emerg_temp > crit_temp)
196 		tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp = emerg_temp +
197 					MLXSW_THERMAL_MODULE_TEMP_SHIFT;
198 	else
199 		tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp = emerg_temp;
200 
201 	return 0;
202 }
203 
204 static void mlxsw_thermal_tz_score_update(struct mlxsw_thermal *thermal,
205 					  struct thermal_zone_device *tzdev,
206 					  struct mlxsw_thermal_trip *trips,
207 					  int temp)
208 {
209 	struct mlxsw_thermal_trip *trip = trips;
210 	unsigned int score, delta, i, shift = 1;
211 
212 	/* Calculate thermal zone score, if temperature is above the critical
213 	 * threshold score is set to MLXSW_THERMAL_TEMP_SCORE_MAX.
214 	 */
215 	score = MLXSW_THERMAL_TEMP_SCORE_MAX;
216 	for (i = MLXSW_THERMAL_TEMP_TRIP_NORM; i < MLXSW_THERMAL_NUM_TRIPS;
217 	     i++, trip++) {
218 		if (temp < trip->temp) {
219 			delta = DIV_ROUND_CLOSEST(temp, trip->temp - temp);
220 			score = delta * shift;
221 			break;
222 		}
223 		shift *= 256;
224 	}
225 
226 	if (score > thermal->tz_highest_score) {
227 		thermal->tz_highest_score = score;
228 		thermal->tz_highest_dev = tzdev;
229 	}
230 }
231 
232 static int mlxsw_thermal_bind(struct thermal_zone_device *tzdev,
233 			      struct thermal_cooling_device *cdev)
234 {
235 	struct mlxsw_thermal *thermal = tzdev->devdata;
236 	struct device *dev = thermal->bus_info->dev;
237 	int i, err;
238 
239 	/* If the cooling device is one of ours bind it */
240 	if (mlxsw_get_cooling_device_idx(thermal, cdev) < 0)
241 		return 0;
242 
243 	for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++) {
244 		const struct mlxsw_thermal_trip *trip = &thermal->trips[i];
245 
246 		err = thermal_zone_bind_cooling_device(tzdev, i, cdev,
247 						       trip->max_state,
248 						       trip->min_state,
249 						       THERMAL_WEIGHT_DEFAULT);
250 		if (err < 0) {
251 			dev_err(dev, "Failed to bind cooling device to trip %d\n", i);
252 			return err;
253 		}
254 	}
255 	return 0;
256 }
257 
258 static int mlxsw_thermal_unbind(struct thermal_zone_device *tzdev,
259 				struct thermal_cooling_device *cdev)
260 {
261 	struct mlxsw_thermal *thermal = tzdev->devdata;
262 	struct device *dev = thermal->bus_info->dev;
263 	int i;
264 	int err;
265 
266 	/* If the cooling device is our one unbind it */
267 	if (mlxsw_get_cooling_device_idx(thermal, cdev) < 0)
268 		return 0;
269 
270 	for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++) {
271 		err = thermal_zone_unbind_cooling_device(tzdev, i, cdev);
272 		if (err < 0) {
273 			dev_err(dev, "Failed to unbind cooling device\n");
274 			return err;
275 		}
276 	}
277 	return 0;
278 }
279 
280 static int mlxsw_thermal_get_mode(struct thermal_zone_device *tzdev,
281 				  enum thermal_device_mode *mode)
282 {
283 	struct mlxsw_thermal *thermal = tzdev->devdata;
284 
285 	*mode = thermal->mode;
286 
287 	return 0;
288 }
289 
290 static int mlxsw_thermal_set_mode(struct thermal_zone_device *tzdev,
291 				  enum thermal_device_mode mode)
292 {
293 	struct mlxsw_thermal *thermal = tzdev->devdata;
294 
295 	mutex_lock(&tzdev->lock);
296 
297 	if (mode == THERMAL_DEVICE_ENABLED)
298 		tzdev->polling_delay = thermal->polling_delay;
299 	else
300 		tzdev->polling_delay = 0;
301 
302 	mutex_unlock(&tzdev->lock);
303 
304 	thermal->mode = mode;
305 	thermal_zone_device_update(tzdev, THERMAL_EVENT_UNSPECIFIED);
306 
307 	return 0;
308 }
309 
310 static int mlxsw_thermal_get_temp(struct thermal_zone_device *tzdev,
311 				  int *p_temp)
312 {
313 	struct mlxsw_thermal *thermal = tzdev->devdata;
314 	struct device *dev = thermal->bus_info->dev;
315 	char mtmp_pl[MLXSW_REG_MTMP_LEN];
316 	int temp;
317 	int err;
318 
319 	mlxsw_reg_mtmp_pack(mtmp_pl, 0, false, false);
320 
321 	err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtmp), mtmp_pl);
322 	if (err) {
323 		dev_err(dev, "Failed to query temp sensor\n");
324 		return err;
325 	}
326 	mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
327 	if (temp > 0)
328 		mlxsw_thermal_tz_score_update(thermal, tzdev, thermal->trips,
329 					      temp);
330 
331 	*p_temp = temp;
332 	return 0;
333 }
334 
335 static int mlxsw_thermal_get_trip_type(struct thermal_zone_device *tzdev,
336 				       int trip,
337 				       enum thermal_trip_type *p_type)
338 {
339 	struct mlxsw_thermal *thermal = tzdev->devdata;
340 
341 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
342 		return -EINVAL;
343 
344 	*p_type = thermal->trips[trip].type;
345 	return 0;
346 }
347 
348 static int mlxsw_thermal_get_trip_temp(struct thermal_zone_device *tzdev,
349 				       int trip, int *p_temp)
350 {
351 	struct mlxsw_thermal *thermal = tzdev->devdata;
352 
353 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
354 		return -EINVAL;
355 
356 	*p_temp = thermal->trips[trip].temp;
357 	return 0;
358 }
359 
360 static int mlxsw_thermal_set_trip_temp(struct thermal_zone_device *tzdev,
361 				       int trip, int temp)
362 {
363 	struct mlxsw_thermal *thermal = tzdev->devdata;
364 
365 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS ||
366 	    temp > MLXSW_THERMAL_ASIC_TEMP_CRIT)
367 		return -EINVAL;
368 
369 	thermal->trips[trip].temp = temp;
370 	return 0;
371 }
372 
373 static int mlxsw_thermal_get_trip_hyst(struct thermal_zone_device *tzdev,
374 				       int trip, int *p_hyst)
375 {
376 	struct mlxsw_thermal *thermal = tzdev->devdata;
377 
378 	*p_hyst = thermal->trips[trip].hyst;
379 	return 0;
380 }
381 
382 static int mlxsw_thermal_set_trip_hyst(struct thermal_zone_device *tzdev,
383 				       int trip, int hyst)
384 {
385 	struct mlxsw_thermal *thermal = tzdev->devdata;
386 
387 	thermal->trips[trip].hyst = hyst;
388 	return 0;
389 }
390 
391 static int mlxsw_thermal_trend_get(struct thermal_zone_device *tzdev,
392 				   int trip, enum thermal_trend *trend)
393 {
394 	struct mlxsw_thermal_module *tz = tzdev->devdata;
395 	struct mlxsw_thermal *thermal = tz->parent;
396 
397 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
398 		return -EINVAL;
399 
400 	if (tzdev == thermal->tz_highest_dev)
401 		return 1;
402 
403 	*trend = THERMAL_TREND_STABLE;
404 	return 0;
405 }
406 
407 static struct thermal_zone_device_ops mlxsw_thermal_ops = {
408 	.bind = mlxsw_thermal_bind,
409 	.unbind = mlxsw_thermal_unbind,
410 	.get_mode = mlxsw_thermal_get_mode,
411 	.set_mode = mlxsw_thermal_set_mode,
412 	.get_temp = mlxsw_thermal_get_temp,
413 	.get_trip_type	= mlxsw_thermal_get_trip_type,
414 	.get_trip_temp	= mlxsw_thermal_get_trip_temp,
415 	.set_trip_temp	= mlxsw_thermal_set_trip_temp,
416 	.get_trip_hyst	= mlxsw_thermal_get_trip_hyst,
417 	.set_trip_hyst	= mlxsw_thermal_set_trip_hyst,
418 	.get_trend	= mlxsw_thermal_trend_get,
419 };
420 
421 static int mlxsw_thermal_module_bind(struct thermal_zone_device *tzdev,
422 				     struct thermal_cooling_device *cdev)
423 {
424 	struct mlxsw_thermal_module *tz = tzdev->devdata;
425 	struct mlxsw_thermal *thermal = tz->parent;
426 	int i, j, err;
427 
428 	/* If the cooling device is one of ours bind it */
429 	if (mlxsw_get_cooling_device_idx(thermal, cdev) < 0)
430 		return 0;
431 
432 	for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++) {
433 		const struct mlxsw_thermal_trip *trip = &tz->trips[i];
434 
435 		err = thermal_zone_bind_cooling_device(tzdev, i, cdev,
436 						       trip->max_state,
437 						       trip->min_state,
438 						       THERMAL_WEIGHT_DEFAULT);
439 		if (err < 0)
440 			goto err_bind_cooling_device;
441 	}
442 	return 0;
443 
444 err_bind_cooling_device:
445 	for (j = i - 1; j >= 0; j--)
446 		thermal_zone_unbind_cooling_device(tzdev, j, cdev);
447 	return err;
448 }
449 
450 static int mlxsw_thermal_module_unbind(struct thermal_zone_device *tzdev,
451 				       struct thermal_cooling_device *cdev)
452 {
453 	struct mlxsw_thermal_module *tz = tzdev->devdata;
454 	struct mlxsw_thermal *thermal = tz->parent;
455 	int i;
456 	int err;
457 
458 	/* If the cooling device is one of ours unbind it */
459 	if (mlxsw_get_cooling_device_idx(thermal, cdev) < 0)
460 		return 0;
461 
462 	for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++) {
463 		err = thermal_zone_unbind_cooling_device(tzdev, i, cdev);
464 		WARN_ON(err);
465 	}
466 	return err;
467 }
468 
469 static int mlxsw_thermal_module_mode_get(struct thermal_zone_device *tzdev,
470 					 enum thermal_device_mode *mode)
471 {
472 	struct mlxsw_thermal_module *tz = tzdev->devdata;
473 
474 	*mode = tz->mode;
475 
476 	return 0;
477 }
478 
479 static int mlxsw_thermal_module_mode_set(struct thermal_zone_device *tzdev,
480 					 enum thermal_device_mode mode)
481 {
482 	struct mlxsw_thermal_module *tz = tzdev->devdata;
483 	struct mlxsw_thermal *thermal = tz->parent;
484 
485 	mutex_lock(&tzdev->lock);
486 
487 	if (mode == THERMAL_DEVICE_ENABLED)
488 		tzdev->polling_delay = thermal->polling_delay;
489 	else
490 		tzdev->polling_delay = 0;
491 
492 	mutex_unlock(&tzdev->lock);
493 
494 	tz->mode = mode;
495 	thermal_zone_device_update(tzdev, THERMAL_EVENT_UNSPECIFIED);
496 
497 	return 0;
498 }
499 
500 static int mlxsw_thermal_module_temp_get(struct thermal_zone_device *tzdev,
501 					 int *p_temp)
502 {
503 	struct mlxsw_thermal_module *tz = tzdev->devdata;
504 	struct mlxsw_thermal *thermal = tz->parent;
505 	struct device *dev = thermal->bus_info->dev;
506 	char mtmp_pl[MLXSW_REG_MTMP_LEN];
507 	int temp;
508 	int err;
509 
510 	/* Read module temperature. */
511 	mlxsw_reg_mtmp_pack(mtmp_pl, MLXSW_REG_MTMP_MODULE_INDEX_MIN +
512 			    tz->module, false, false);
513 	err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtmp), mtmp_pl);
514 	if (err) {
515 		/* Do not return error - in case of broken module's sensor
516 		 * it will cause error message flooding.
517 		 */
518 		temp = 0;
519 		*p_temp = (int) temp;
520 		return 0;
521 	}
522 	mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
523 	*p_temp = temp;
524 
525 	if (!temp)
526 		return 0;
527 
528 	/* Update trip points. */
529 	err = mlxsw_thermal_module_trips_update(dev, thermal->core, tz);
530 	if (!err && temp > 0)
531 		mlxsw_thermal_tz_score_update(thermal, tzdev, tz->trips, temp);
532 
533 	return 0;
534 }
535 
536 static int
537 mlxsw_thermal_module_trip_type_get(struct thermal_zone_device *tzdev, int trip,
538 				   enum thermal_trip_type *p_type)
539 {
540 	struct mlxsw_thermal_module *tz = tzdev->devdata;
541 
542 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
543 		return -EINVAL;
544 
545 	*p_type = tz->trips[trip].type;
546 	return 0;
547 }
548 
549 static int
550 mlxsw_thermal_module_trip_temp_get(struct thermal_zone_device *tzdev,
551 				   int trip, int *p_temp)
552 {
553 	struct mlxsw_thermal_module *tz = tzdev->devdata;
554 
555 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
556 		return -EINVAL;
557 
558 	*p_temp = tz->trips[trip].temp;
559 	return 0;
560 }
561 
562 static int
563 mlxsw_thermal_module_trip_temp_set(struct thermal_zone_device *tzdev,
564 				   int trip, int temp)
565 {
566 	struct mlxsw_thermal_module *tz = tzdev->devdata;
567 
568 	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS ||
569 	    temp > tz->trips[MLXSW_THERMAL_TEMP_TRIP_CRIT].temp)
570 		return -EINVAL;
571 
572 	tz->trips[trip].temp = temp;
573 	return 0;
574 }
575 
576 static int
577 mlxsw_thermal_module_trip_hyst_get(struct thermal_zone_device *tzdev, int trip,
578 				   int *p_hyst)
579 {
580 	struct mlxsw_thermal_module *tz = tzdev->devdata;
581 
582 	*p_hyst = tz->trips[trip].hyst;
583 	return 0;
584 }
585 
586 static int
587 mlxsw_thermal_module_trip_hyst_set(struct thermal_zone_device *tzdev, int trip,
588 				   int hyst)
589 {
590 	struct mlxsw_thermal_module *tz = tzdev->devdata;
591 
592 	tz->trips[trip].hyst = hyst;
593 	return 0;
594 }
595 
596 static struct thermal_zone_device_ops mlxsw_thermal_module_ops = {
597 	.bind		= mlxsw_thermal_module_bind,
598 	.unbind		= mlxsw_thermal_module_unbind,
599 	.get_mode	= mlxsw_thermal_module_mode_get,
600 	.set_mode	= mlxsw_thermal_module_mode_set,
601 	.get_temp	= mlxsw_thermal_module_temp_get,
602 	.get_trip_type	= mlxsw_thermal_module_trip_type_get,
603 	.get_trip_temp	= mlxsw_thermal_module_trip_temp_get,
604 	.set_trip_temp	= mlxsw_thermal_module_trip_temp_set,
605 	.get_trip_hyst	= mlxsw_thermal_module_trip_hyst_get,
606 	.set_trip_hyst	= mlxsw_thermal_module_trip_hyst_set,
607 	.get_trend	= mlxsw_thermal_trend_get,
608 };
609 
610 static int mlxsw_thermal_gearbox_temp_get(struct thermal_zone_device *tzdev,
611 					  int *p_temp)
612 {
613 	struct mlxsw_thermal_module *tz = tzdev->devdata;
614 	struct mlxsw_thermal *thermal = tz->parent;
615 	char mtmp_pl[MLXSW_REG_MTMP_LEN];
616 	u16 index;
617 	int temp;
618 	int err;
619 
620 	index = MLXSW_REG_MTMP_GBOX_INDEX_MIN + tz->module;
621 	mlxsw_reg_mtmp_pack(mtmp_pl, index, false, false);
622 
623 	err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtmp), mtmp_pl);
624 	if (err)
625 		return err;
626 
627 	mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
628 	if (temp > 0)
629 		mlxsw_thermal_tz_score_update(thermal, tzdev, tz->trips, temp);
630 
631 	*p_temp = temp;
632 	return 0;
633 }
634 
635 static struct thermal_zone_device_ops mlxsw_thermal_gearbox_ops = {
636 	.bind		= mlxsw_thermal_module_bind,
637 	.unbind		= mlxsw_thermal_module_unbind,
638 	.get_mode	= mlxsw_thermal_module_mode_get,
639 	.set_mode	= mlxsw_thermal_module_mode_set,
640 	.get_temp	= mlxsw_thermal_gearbox_temp_get,
641 	.get_trip_type	= mlxsw_thermal_module_trip_type_get,
642 	.get_trip_temp	= mlxsw_thermal_module_trip_temp_get,
643 	.set_trip_temp	= mlxsw_thermal_module_trip_temp_set,
644 	.get_trip_hyst	= mlxsw_thermal_module_trip_hyst_get,
645 	.set_trip_hyst	= mlxsw_thermal_module_trip_hyst_set,
646 	.get_trend	= mlxsw_thermal_trend_get,
647 };
648 
649 static int mlxsw_thermal_get_max_state(struct thermal_cooling_device *cdev,
650 				       unsigned long *p_state)
651 {
652 	*p_state = MLXSW_THERMAL_MAX_STATE;
653 	return 0;
654 }
655 
656 static int mlxsw_thermal_get_cur_state(struct thermal_cooling_device *cdev,
657 				       unsigned long *p_state)
658 
659 {
660 	struct mlxsw_thermal *thermal = cdev->devdata;
661 	struct device *dev = thermal->bus_info->dev;
662 	char mfsc_pl[MLXSW_REG_MFSC_LEN];
663 	int err, idx;
664 	u8 duty;
665 
666 	idx = mlxsw_get_cooling_device_idx(thermal, cdev);
667 	if (idx < 0)
668 		return idx;
669 
670 	mlxsw_reg_mfsc_pack(mfsc_pl, idx, 0);
671 	err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfsc), mfsc_pl);
672 	if (err) {
673 		dev_err(dev, "Failed to query PWM duty\n");
674 		return err;
675 	}
676 
677 	duty = mlxsw_reg_mfsc_pwm_duty_cycle_get(mfsc_pl);
678 	*p_state = mlxsw_duty_to_state(duty);
679 	return 0;
680 }
681 
682 static int mlxsw_thermal_set_cur_state(struct thermal_cooling_device *cdev,
683 				       unsigned long state)
684 
685 {
686 	struct mlxsw_thermal *thermal = cdev->devdata;
687 	struct device *dev = thermal->bus_info->dev;
688 	char mfsc_pl[MLXSW_REG_MFSC_LEN];
689 	unsigned long cur_state, i;
690 	int idx;
691 	u8 duty;
692 	int err;
693 
694 	idx = mlxsw_get_cooling_device_idx(thermal, cdev);
695 	if (idx < 0)
696 		return idx;
697 
698 	/* Verify if this request is for changing allowed fan dynamical
699 	 * minimum. If it is - update cooling levels accordingly and update
700 	 * state, if current state is below the newly requested minimum state.
701 	 * For example, if current state is 5, and minimal state is to be
702 	 * changed from 4 to 6, thermal->cooling_levels[0 to 5] will be changed
703 	 * all from 4 to 6. And state 5 (thermal->cooling_levels[4]) should be
704 	 * overwritten.
705 	 */
706 	if (state >= MLXSW_THERMAL_SPEED_MIN &&
707 	    state <= MLXSW_THERMAL_SPEED_MAX) {
708 		state -= MLXSW_THERMAL_MAX_STATE;
709 		for (i = 0; i <= MLXSW_THERMAL_MAX_STATE; i++)
710 			thermal->cooling_levels[i] = max(state, i);
711 
712 		mlxsw_reg_mfsc_pack(mfsc_pl, idx, 0);
713 		err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfsc), mfsc_pl);
714 		if (err)
715 			return err;
716 
717 		duty = mlxsw_reg_mfsc_pwm_duty_cycle_get(mfsc_pl);
718 		cur_state = mlxsw_duty_to_state(duty);
719 
720 		/* If current fan state is lower than requested dynamical
721 		 * minimum, increase fan speed up to dynamical minimum.
722 		 */
723 		if (state < cur_state)
724 			return 0;
725 
726 		state = cur_state;
727 	}
728 
729 	if (state > MLXSW_THERMAL_MAX_STATE)
730 		return -EINVAL;
731 
732 	/* Normalize the state to the valid speed range. */
733 	state = thermal->cooling_levels[state];
734 	mlxsw_reg_mfsc_pack(mfsc_pl, idx, mlxsw_state_to_duty(state));
735 	err = mlxsw_reg_write(thermal->core, MLXSW_REG(mfsc), mfsc_pl);
736 	if (err) {
737 		dev_err(dev, "Failed to write PWM duty\n");
738 		return err;
739 	}
740 	return 0;
741 }
742 
743 static const struct thermal_cooling_device_ops mlxsw_cooling_ops = {
744 	.get_max_state	= mlxsw_thermal_get_max_state,
745 	.get_cur_state	= mlxsw_thermal_get_cur_state,
746 	.set_cur_state	= mlxsw_thermal_set_cur_state,
747 };
748 
749 static int
750 mlxsw_thermal_module_tz_init(struct mlxsw_thermal_module *module_tz)
751 {
752 	char tz_name[MLXSW_THERMAL_ZONE_MAX_NAME];
753 	int err;
754 
755 	snprintf(tz_name, sizeof(tz_name), "mlxsw-module%d",
756 		 module_tz->module + 1);
757 	module_tz->tzdev = thermal_zone_device_register(tz_name,
758 							MLXSW_THERMAL_NUM_TRIPS,
759 							MLXSW_THERMAL_TRIP_MASK,
760 							module_tz,
761 							&mlxsw_thermal_module_ops,
762 							NULL, 0, 0);
763 	if (IS_ERR(module_tz->tzdev)) {
764 		err = PTR_ERR(module_tz->tzdev);
765 		return err;
766 	}
767 
768 	module_tz->mode = THERMAL_DEVICE_ENABLED;
769 	return 0;
770 }
771 
772 static void mlxsw_thermal_module_tz_fini(struct thermal_zone_device *tzdev)
773 {
774 	thermal_zone_device_unregister(tzdev);
775 }
776 
777 static int
778 mlxsw_thermal_module_init(struct device *dev, struct mlxsw_core *core,
779 			  struct mlxsw_thermal *thermal, u8 module)
780 {
781 	struct mlxsw_thermal_module *module_tz;
782 
783 	module_tz = &thermal->tz_module_arr[module];
784 	/* Skip if parent is already set (case of port split). */
785 	if (module_tz->parent)
786 		return 0;
787 	module_tz->module = module;
788 	module_tz->parent = thermal;
789 	memcpy(module_tz->trips, default_thermal_trips,
790 	       sizeof(thermal->trips));
791 	/* Initialize all trip point. */
792 	mlxsw_thermal_module_trips_reset(module_tz);
793 	/* Update trip point according to the module data. */
794 	return mlxsw_thermal_module_trips_update(dev, core, module_tz);
795 }
796 
797 static void mlxsw_thermal_module_fini(struct mlxsw_thermal_module *module_tz)
798 {
799 	if (module_tz && module_tz->tzdev) {
800 		mlxsw_thermal_module_tz_fini(module_tz->tzdev);
801 		module_tz->tzdev = NULL;
802 		module_tz->parent = NULL;
803 	}
804 }
805 
806 static int
807 mlxsw_thermal_modules_init(struct device *dev, struct mlxsw_core *core,
808 			   struct mlxsw_thermal *thermal)
809 {
810 	struct mlxsw_thermal_module *module_tz;
811 	char mgpir_pl[MLXSW_REG_MGPIR_LEN];
812 	int i, err;
813 
814 	if (!mlxsw_core_res_query_enabled(core))
815 		return 0;
816 
817 	mlxsw_reg_mgpir_pack(mgpir_pl);
818 	err = mlxsw_reg_query(core, MLXSW_REG(mgpir), mgpir_pl);
819 	if (err)
820 		return err;
821 
822 	mlxsw_reg_mgpir_unpack(mgpir_pl, NULL, NULL, NULL,
823 			       &thermal->tz_module_num);
824 
825 	thermal->tz_module_arr = kcalloc(thermal->tz_module_num,
826 					 sizeof(*thermal->tz_module_arr),
827 					 GFP_KERNEL);
828 	if (!thermal->tz_module_arr)
829 		return -ENOMEM;
830 
831 	for (i = 0; i < thermal->tz_module_num; i++) {
832 		err = mlxsw_thermal_module_init(dev, core, thermal, i);
833 		if (err)
834 			goto err_unreg_tz_module_arr;
835 	}
836 
837 	for (i = 0; i < thermal->tz_module_num; i++) {
838 		module_tz = &thermal->tz_module_arr[i];
839 		if (!module_tz->parent)
840 			continue;
841 		err = mlxsw_thermal_module_tz_init(module_tz);
842 		if (err)
843 			goto err_unreg_tz_module_arr;
844 	}
845 
846 	return 0;
847 
848 err_unreg_tz_module_arr:
849 	for (i = thermal->tz_module_num - 1; i >= 0; i--)
850 		mlxsw_thermal_module_fini(&thermal->tz_module_arr[i]);
851 	kfree(thermal->tz_module_arr);
852 	return err;
853 }
854 
855 static void
856 mlxsw_thermal_modules_fini(struct mlxsw_thermal *thermal)
857 {
858 	int i;
859 
860 	if (!mlxsw_core_res_query_enabled(thermal->core))
861 		return;
862 
863 	for (i = thermal->tz_module_num - 1; i >= 0; i--)
864 		mlxsw_thermal_module_fini(&thermal->tz_module_arr[i]);
865 	kfree(thermal->tz_module_arr);
866 }
867 
868 static int
869 mlxsw_thermal_gearbox_tz_init(struct mlxsw_thermal_module *gearbox_tz)
870 {
871 	char tz_name[MLXSW_THERMAL_ZONE_MAX_NAME];
872 
873 	snprintf(tz_name, sizeof(tz_name), "mlxsw-gearbox%d",
874 		 gearbox_tz->module + 1);
875 	gearbox_tz->tzdev = thermal_zone_device_register(tz_name,
876 						MLXSW_THERMAL_NUM_TRIPS,
877 						MLXSW_THERMAL_TRIP_MASK,
878 						gearbox_tz,
879 						&mlxsw_thermal_gearbox_ops,
880 						NULL, 0, 0);
881 	if (IS_ERR(gearbox_tz->tzdev))
882 		return PTR_ERR(gearbox_tz->tzdev);
883 
884 	gearbox_tz->mode = THERMAL_DEVICE_ENABLED;
885 	return 0;
886 }
887 
888 static void
889 mlxsw_thermal_gearbox_tz_fini(struct mlxsw_thermal_module *gearbox_tz)
890 {
891 	thermal_zone_device_unregister(gearbox_tz->tzdev);
892 }
893 
894 static int
895 mlxsw_thermal_gearboxes_init(struct device *dev, struct mlxsw_core *core,
896 			     struct mlxsw_thermal *thermal)
897 {
898 	struct mlxsw_thermal_module *gearbox_tz;
899 	char mgpir_pl[MLXSW_REG_MGPIR_LEN];
900 	int i;
901 	int err;
902 
903 	if (!mlxsw_core_res_query_enabled(core))
904 		return 0;
905 
906 	mlxsw_reg_mgpir_pack(mgpir_pl);
907 	err = mlxsw_reg_query(core, MLXSW_REG(mgpir), mgpir_pl);
908 	if (err)
909 		return err;
910 
911 	mlxsw_reg_mgpir_unpack(mgpir_pl, &thermal->tz_gearbox_num, NULL, NULL,
912 			       NULL);
913 	if (!thermal->tz_gearbox_num)
914 		return 0;
915 
916 	thermal->tz_gearbox_arr = kcalloc(thermal->tz_gearbox_num,
917 					  sizeof(*thermal->tz_gearbox_arr),
918 					  GFP_KERNEL);
919 	if (!thermal->tz_gearbox_arr)
920 		return -ENOMEM;
921 
922 	for (i = 0; i < thermal->tz_gearbox_num; i++) {
923 		gearbox_tz = &thermal->tz_gearbox_arr[i];
924 		memcpy(gearbox_tz->trips, default_thermal_trips,
925 		       sizeof(thermal->trips));
926 		gearbox_tz->module = i;
927 		gearbox_tz->parent = thermal;
928 		err = mlxsw_thermal_gearbox_tz_init(gearbox_tz);
929 		if (err)
930 			goto err_unreg_tz_gearbox;
931 	}
932 
933 	return 0;
934 
935 err_unreg_tz_gearbox:
936 	for (i--; i >= 0; i--)
937 		mlxsw_thermal_gearbox_tz_fini(&thermal->tz_gearbox_arr[i]);
938 	kfree(thermal->tz_gearbox_arr);
939 	return err;
940 }
941 
942 static void
943 mlxsw_thermal_gearboxes_fini(struct mlxsw_thermal *thermal)
944 {
945 	int i;
946 
947 	if (!mlxsw_core_res_query_enabled(thermal->core))
948 		return;
949 
950 	for (i = thermal->tz_gearbox_num - 1; i >= 0; i--)
951 		mlxsw_thermal_gearbox_tz_fini(&thermal->tz_gearbox_arr[i]);
952 	kfree(thermal->tz_gearbox_arr);
953 }
954 
955 int mlxsw_thermal_init(struct mlxsw_core *core,
956 		       const struct mlxsw_bus_info *bus_info,
957 		       struct mlxsw_thermal **p_thermal)
958 {
959 	char mfcr_pl[MLXSW_REG_MFCR_LEN] = { 0 };
960 	enum mlxsw_reg_mfcr_pwm_frequency freq;
961 	struct device *dev = bus_info->dev;
962 	struct mlxsw_thermal *thermal;
963 	u16 tacho_active;
964 	u8 pwm_active;
965 	int err, i;
966 
967 	thermal = devm_kzalloc(dev, sizeof(*thermal),
968 			       GFP_KERNEL);
969 	if (!thermal)
970 		return -ENOMEM;
971 
972 	thermal->core = core;
973 	thermal->bus_info = bus_info;
974 	memcpy(thermal->trips, default_thermal_trips, sizeof(thermal->trips));
975 
976 	err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfcr), mfcr_pl);
977 	if (err) {
978 		dev_err(dev, "Failed to probe PWMs\n");
979 		goto err_free_thermal;
980 	}
981 	mlxsw_reg_mfcr_unpack(mfcr_pl, &freq, &tacho_active, &pwm_active);
982 
983 	for (i = 0; i < MLXSW_MFCR_TACHOS_MAX; i++) {
984 		if (tacho_active & BIT(i)) {
985 			char mfsl_pl[MLXSW_REG_MFSL_LEN];
986 
987 			mlxsw_reg_mfsl_pack(mfsl_pl, i, 0, 0);
988 
989 			/* We need to query the register to preserve maximum */
990 			err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfsl),
991 					      mfsl_pl);
992 			if (err)
993 				goto err_free_thermal;
994 
995 			/* set the minimal RPMs to 0 */
996 			mlxsw_reg_mfsl_tach_min_set(mfsl_pl, 0);
997 			err = mlxsw_reg_write(thermal->core, MLXSW_REG(mfsl),
998 					      mfsl_pl);
999 			if (err)
1000 				goto err_free_thermal;
1001 		}
1002 	}
1003 	for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++) {
1004 		if (pwm_active & BIT(i)) {
1005 			struct thermal_cooling_device *cdev;
1006 
1007 			cdev = thermal_cooling_device_register("mlxsw_fan",
1008 							       thermal,
1009 							       &mlxsw_cooling_ops);
1010 			if (IS_ERR(cdev)) {
1011 				err = PTR_ERR(cdev);
1012 				dev_err(dev, "Failed to register cooling device\n");
1013 				goto err_unreg_cdevs;
1014 			}
1015 			thermal->cdevs[i] = cdev;
1016 		}
1017 	}
1018 
1019 	/* Initialize cooling levels per PWM state. */
1020 	for (i = 0; i < MLXSW_THERMAL_MAX_STATE; i++)
1021 		thermal->cooling_levels[i] = max(MLXSW_THERMAL_SPEED_MIN_LEVEL,
1022 						 i);
1023 
1024 	thermal->polling_delay = bus_info->low_frequency ?
1025 				 MLXSW_THERMAL_SLOW_POLL_INT :
1026 				 MLXSW_THERMAL_POLL_INT;
1027 
1028 	thermal->tzdev = thermal_zone_device_register("mlxsw",
1029 						      MLXSW_THERMAL_NUM_TRIPS,
1030 						      MLXSW_THERMAL_TRIP_MASK,
1031 						      thermal,
1032 						      &mlxsw_thermal_ops,
1033 						      NULL, 0,
1034 						      thermal->polling_delay);
1035 	if (IS_ERR(thermal->tzdev)) {
1036 		err = PTR_ERR(thermal->tzdev);
1037 		dev_err(dev, "Failed to register thermal zone\n");
1038 		goto err_unreg_cdevs;
1039 	}
1040 
1041 	err = mlxsw_thermal_modules_init(dev, core, thermal);
1042 	if (err)
1043 		goto err_unreg_tzdev;
1044 
1045 	err = mlxsw_thermal_gearboxes_init(dev, core, thermal);
1046 	if (err)
1047 		goto err_unreg_modules_tzdev;
1048 
1049 	thermal->mode = THERMAL_DEVICE_ENABLED;
1050 	*p_thermal = thermal;
1051 	return 0;
1052 
1053 err_unreg_modules_tzdev:
1054 	mlxsw_thermal_modules_fini(thermal);
1055 err_unreg_tzdev:
1056 	if (thermal->tzdev) {
1057 		thermal_zone_device_unregister(thermal->tzdev);
1058 		thermal->tzdev = NULL;
1059 	}
1060 err_unreg_cdevs:
1061 	for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++)
1062 		if (thermal->cdevs[i])
1063 			thermal_cooling_device_unregister(thermal->cdevs[i]);
1064 err_free_thermal:
1065 	devm_kfree(dev, thermal);
1066 	return err;
1067 }
1068 
1069 void mlxsw_thermal_fini(struct mlxsw_thermal *thermal)
1070 {
1071 	int i;
1072 
1073 	mlxsw_thermal_gearboxes_fini(thermal);
1074 	mlxsw_thermal_modules_fini(thermal);
1075 	if (thermal->tzdev) {
1076 		thermal_zone_device_unregister(thermal->tzdev);
1077 		thermal->tzdev = NULL;
1078 	}
1079 
1080 	for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++) {
1081 		if (thermal->cdevs[i]) {
1082 			thermal_cooling_device_unregister(thermal->cdevs[i]);
1083 			thermal->cdevs[i] = NULL;
1084 		}
1085 	}
1086 
1087 	devm_kfree(thermal->bus_info->dev, thermal);
1088 }
1089