1 // SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
2 /*
3 * Interface with the On Chip Controller,
4 * which enforces power and thermal management
5 *
6 * Copyright 2013-2019 IBM Corp.
7 */
8
9 #include <skiboot.h>
10 #include <xscom.h>
11 #include <xscom-p8-regs.h>
12 #include <io.h>
13 #include <cpu.h>
14 #include <chip.h>
15 #include <mem_region.h>
16 #include <timebase.h>
17 #include <errorlog.h>
18 #include <opal-api.h>
19 #include <opal-msg.h>
20 #include <timer.h>
21 #include <i2c.h>
22 #include <powercap.h>
23 #include <psr.h>
24 #include <sensor.h>
25 #include <occ.h>
26 #include <psi.h>
27
28 /* OCC Communication Area for PStates */
29
30 #define P8_HOMER_OPAL_DATA_OFFSET 0x1F8000
31 #define P9_HOMER_OPAL_DATA_OFFSET 0x0E2000
32
33 #define OPAL_DYNAMIC_DATA_OFFSET 0x0B80
34 /* relative to HOMER_OPAL_DATA_OFFSET */
35
36 #define MAX_PSTATES 256
37 #define MAX_P8_CORES 12
38 #define MAX_P9_CORES 24
39 #define MAX_P10_CORES 32
40
41 #define MAX_OPAL_CMD_DATA_LENGTH 4090
42 #define MAX_OCC_RSP_DATA_LENGTH 8698
43
44 #define P8_PIR_CORE_MASK 0xFFF8
45 #define P9_PIR_QUAD_MASK 0xFFF0
46 #define P10_PIR_CHIP_MASK 0x0000
47 #define FREQ_MAX_IN_DOMAIN 0
48 #define FREQ_MOST_RECENTLY_SET 1
49
50 /**
51 * OCC-OPAL Shared Memory Region
52 *
53 * Reference document :
54 * https://github.com/open-power/docs/blob/master/occ/OCC_OpenPwr_FW_Interfaces.pdf
55 *
56 * Supported layout versions:
57 * - 0x01, 0x02 : P8
58 * https://github.com/open-power/occ/blob/master_p8/src/occ/proc/proc_pstate.h
59 *
60 * - 0x90 : P9
61 * https://github.com/open-power/occ/blob/master/src/occ_405/proc/proc_pstate.h
62 * In 0x90 the data is separated into :-
63 * -- Static Data (struct occ_pstate_table): Data is written once by OCC
64 * -- Dynamic Data (struct occ_dynamic_data): Data is updated at runtime
65 *
66 * struct occ_pstate_table - Pstate table layout
67 * @valid: Indicates if data is valid
68 * @version: Layout version [Major/Minor]
69 * @v2.throttle: Reason for limiting the max pstate
70 * @v9.occ_role: OCC role (Master/Slave)
71 * @v#.pstate_min: Minimum pstate ever allowed
72 * @v#.pstate_nom: Nominal pstate
73 * @v#.pstate_turbo: Maximum turbo pstate
74 * @v#.pstate_ultra_turbo: Maximum ultra turbo pstate and the maximum
75 * pstate ever allowed
76 * @v#.pstates: Pstate-id and frequency list from Pmax to Pmin
77 * @v#.pstates.id: Pstate-id
78 * @v#.pstates.flags: Pstate-flag(reserved)
79 * @v2.pstates.vdd: Voltage Identifier
80 * @v2.pstates.vcs: Voltage Identifier
81 * @v#.pstates.freq_khz: Frequency in KHz
82 * @v#.core_max[1..N]: Max pstate with N active cores
83 * @spare/reserved/pad: Unused data
84 */
85 struct occ_pstate_table {
86 u8 valid;
87 u8 version;
88 union __packed {
89 struct __packed { /* Version 0x01 and 0x02 */
90 u8 throttle;
91 s8 pstate_min;
92 s8 pstate_nom;
93 s8 pstate_turbo;
94 s8 pstate_ultra_turbo;
95 u8 spare;
96 u64 reserved;
97 struct __packed {
98 s8 id;
99 u8 flags;
100 u8 vdd;
101 u8 vcs;
102 __be32 freq_khz;
103 } pstates[MAX_PSTATES];
104 s8 core_max[MAX_P8_CORES];
105 u8 pad[100];
106 } v2;
107 struct __packed { /* Version 0x90 */
108 u8 occ_role;
109 u8 pstate_min;
110 u8 pstate_nom;
111 u8 pstate_turbo;
112 u8 pstate_ultra_turbo;
113 u8 spare;
114 u64 reserved1;
115 u64 reserved2;
116 struct __packed {
117 u8 id;
118 u8 flags;
119 u16 reserved;
120 __be32 freq_khz;
121 } pstates[MAX_PSTATES];
122 u8 core_max[MAX_P9_CORES];
123 u8 pad[56];
124 } v9;
125 struct __packed { /* Version 0xA0 */
126 u8 occ_role;
127 u8 pstate_min;
128 u8 pstate_fixed_freq;
129 u8 pstate_base;
130 u8 pstate_ultra_turbo;
131 u8 pstate_fmax;
132 u8 minor;
133 u8 pstate_bottom_throttle;
134 u8 spare;
135 u8 spare1;
136 u32 reserved_32;
137 u64 reserved_64;
138 struct __packed {
139 u8 id;
140 u8 valid;
141 u16 reserved;
142 __be32 freq_khz;
143 } pstates[MAX_PSTATES];
144 u8 core_max[MAX_P10_CORES];
145 u8 pad[48];
146 } v10;
147 };
148 } __packed;
149
150 /**
151 * OPAL-OCC Command Response Interface
152 *
153 * OPAL-OCC Command Buffer
154 *
155 * ---------------------------------------------------------------------
156 * | OPAL | Cmd | OPAL | | Cmd Data | Cmd Data | OPAL |
157 * | Cmd | Request | OCC | Reserved | Length | Length | Cmd |
158 * | Flags | ID | Cmd | | (MSB) | (LSB) | Data... |
159 * ---------------------------------------------------------------------
160 * | ….OPAL Command Data up to max of Cmd Data Length 4090 bytes |
161 * | |
162 * ---------------------------------------------------------------------
163 *
164 * OPAL Command Flag
165 *
166 * -----------------------------------------------------------------
167 * | Bit 7 | Bit 6 | Bit 5 | Bit 4 | Bit 3 | Bit 2 | Bit 1 | Bit 0 |
168 * | (msb) | | | | | | | (lsb) |
169 * -----------------------------------------------------------------
170 * |Cmd | | | | | | | |
171 * |Ready | | | | | | | |
172 * -----------------------------------------------------------------
173 *
174 * struct opal_command_buffer - Defines the layout of OPAL command buffer
175 * @flag: Provides general status of the command
176 * @request_id: Token to identify request
177 * @cmd: Command sent
178 * @data_size: Command data length
179 * @data: Command specific data
180 * @spare: Unused byte
181 */
182 struct opal_command_buffer {
183 u8 flag;
184 u8 request_id;
185 u8 cmd;
186 u8 spare;
187 u16 data_size;
188 u8 data[MAX_OPAL_CMD_DATA_LENGTH];
189 } __packed;
190
191 /**
192 * OPAL-OCC Response Buffer
193 *
194 * ---------------------------------------------------------------------
195 * | OCC | Cmd | OPAL | Response | Rsp Data | Rsp Data | OPAL |
196 * | Rsp | Request | OCC | Status | Length | Length | Rsp |
197 * | Flags | ID | Cmd | | (MSB) | (LSB) | Data... |
198 * ---------------------------------------------------------------------
199 * | ….OPAL Response Data up to max of Rsp Data Length 8698 bytes |
200 * | |
201 * ---------------------------------------------------------------------
202 *
203 * OCC Response Flag
204 *
205 * -----------------------------------------------------------------
206 * | Bit 7 | Bit 6 | Bit 5 | Bit 4 | Bit 3 | Bit 2 | Bit 1 | Bit 0 |
207 * | (msb) | | | | | | | (lsb) |
208 * -----------------------------------------------------------------
209 * | | | | | | |OCC in | Rsp |
210 * | | | | | | |progress|Ready |
211 * -----------------------------------------------------------------
212 *
213 * struct occ_response_buffer - Defines the layout of OCC response buffer
214 * @flag: Provides general status of the response
215 * @request_id: Token to identify request
216 * @cmd: Command requested
217 * @status: Indicates success/failure status of
218 * the command
219 * @data_size: Response data length
220 * @data: Response specific data
221 */
222 struct occ_response_buffer {
223 u8 flag;
224 u8 request_id;
225 u8 cmd;
226 u8 status;
227 u16 data_size;
228 u8 data[MAX_OCC_RSP_DATA_LENGTH];
229 } __packed;
230
231 /**
232 * OCC-OPAL Shared Memory Interface Dynamic Data Vx90
233 *
234 * struct occ_dynamic_data - Contains runtime attributes
235 * @occ_state: Current state of OCC
236 * @major_version: Major version number
237 * @minor_version: Minor version number (backwards compatible)
238 * Version 1 indicates GPU presence populated
239 * @gpus_present: Bitmask of GPUs present (on systems where GPU
240 * presence is detected through APSS)
241 * @cpu_throttle: Reason for limiting the max pstate
242 * @mem_throttle: Reason for throttling memory
243 * @quick_pwr_drop: Indicates if QPD is asserted
244 * @pwr_shifting_ratio: Indicates the current percentage of power to
245 * take away from the CPU vs GPU when shifting
246 * power to maintain a power cap. Value of 100
247 * means take all power from CPU.
248 * @pwr_cap_type: Indicates type of power cap in effect
249 * @hard_min_pwr_cap: Hard minimum system power cap in Watts.
250 * Guaranteed unless hardware failure
251 * @max_pwr_cap: Maximum allowed system power cap in Watts
252 * @cur_pwr_cap: Current system power cap
253 * @soft_min_pwr_cap: Soft powercap minimum. OCC may or may not be
254 * able to maintain this
255 * @spare/reserved: Unused data
256 * @cmd: Opal Command Buffer
257 * @rsp: OCC Response Buffer
258 */
259 struct occ_dynamic_data {
260 u8 occ_state;
261 u8 major_version;
262 u8 minor_version;
263 u8 gpus_present;
264 struct __packed { /* Version 0x90 */
265 u8 spare1;
266 } v9;
267 struct __packed { /* Version 0xA0 */
268 u8 wof_enabled;
269 } v10;
270 u8 cpu_throttle;
271 u8 mem_throttle;
272 u8 quick_pwr_drop;
273 u8 pwr_shifting_ratio;
274 u8 pwr_cap_type;
275 u16 hard_min_pwr_cap;
276 u16 max_pwr_cap;
277 u16 cur_pwr_cap;
278 u16 soft_min_pwr_cap;
279 u8 pad[110];
280 struct opal_command_buffer cmd;
281 struct occ_response_buffer rsp;
282 } __packed;
283
284 static bool occ_reset;
285 static struct lock occ_lock = LOCK_UNLOCKED;
286 static unsigned long homer_opal_data_offset;
287
288 DEFINE_LOG_ENTRY(OPAL_RC_OCC_PSTATE_INIT, OPAL_PLATFORM_ERR_EVT, OPAL_OCC,
289 OPAL_CEC_HARDWARE, OPAL_INFO,
290 OPAL_NA);
291
292 DEFINE_LOG_ENTRY(OPAL_RC_OCC_TIMEOUT, OPAL_PLATFORM_ERR_EVT, OPAL_OCC,
293 OPAL_CEC_HARDWARE, OPAL_UNRECOVERABLE_ERR_GENERAL,
294 OPAL_NA);
295
296 /*
297 * POWER9 and newer platforms have pstate values which are unsigned
298 * positive values. They are continuous set of unsigned integers
299 * [0 to +N] where Pmax is 0 and Pmin is N. The linear ordering of
300 * pstates for P9 has changed compared to P8. Where P8 has negative
301 * pstate values advertised as [0 to -N] where Pmax is 0 and
302 * Pmin is -N. The following routine helps to abstract pstate
303 * comparison with pmax and perform sanity checks on pstate limits.
304 */
305
306 /**
307 * cmp_pstates: Compares the given two pstates and determines which
308 * among them is associated with a higher pstate.
309 *
310 * @a,@b: The pstate ids of the pstates being compared.
311 *
312 * Returns: -1 : If pstate associated with @a is smaller than
313 * the pstate associated with @b.
314 * 0 : If pstates associated with @a and @b are equal.
315 * 1 : If pstate associated with @a is greater than
316 * the pstate associated with @b.
317 */
cmp_pstates(int a,int b)318 static int cmp_pstates(int a, int b)
319 {
320 /* P8 has 0 to -N (pmax to pmin), P9 has 0 to +N (pmax to pmin) */
321 if (a > b)
322 return (proc_gen == proc_gen_p8)? 1 : -1;
323 else if (a < b)
324 return (proc_gen == proc_gen_p8)? -1 : 1;
325
326 return 0;
327 }
328
329 static inline
get_occ_pstate_table(struct proc_chip * chip)330 struct occ_pstate_table *get_occ_pstate_table(struct proc_chip *chip)
331 {
332 return (struct occ_pstate_table *)
333 (chip->homer_base + homer_opal_data_offset);
334 }
335
336 static inline
get_occ_dynamic_data(struct proc_chip * chip)337 struct occ_dynamic_data *get_occ_dynamic_data(struct proc_chip *chip)
338 {
339 return (struct occ_dynamic_data *)
340 (chip->homer_base + homer_opal_data_offset +
341 OPAL_DYNAMIC_DATA_OFFSET);
342 }
343
344 /*
345 * On Chips which have at least one active EX unit, check the
346 * HOMER area for pstate-table valid bit on versions 0x1 and 0x2, or
347 * HOMER dynamic area occ_state on version 0x90.
348 */
wait_for_all_occ_init(void)349 static bool wait_for_all_occ_init(void)
350 {
351 struct proc_chip *chip;
352 struct dt_node *xn;
353 struct occ_pstate_table *occ_data;
354 struct occ_dynamic_data *occ_dyn_data;
355 int tries;
356 uint64_t start_time, end_time;
357 uint32_t timeout = 0;
358
359 if (platform.occ_timeout)
360 timeout = platform.occ_timeout();
361
362 start_time = mftb();
363 for_each_chip(chip) {
364 u8 version;
365
366 /*
367 * If the chip doesn't any EX unit present, then OCC
368 * will not update the pstate-table. So, skip the
369 * check.
370 */
371 if (!chip->ex_present) {
372 prlog(PR_DEBUG, "OCC: Chip %02x has no active EX units. Skipping check\n",
373 chip->id);
374 continue;
375 }
376
377 /* Check for valid homer address */
378 if (!chip->homer_base) {
379 /**
380 * @fwts-label OCCInvalidHomerBase
381 * @fwts-advice The HOMER base address for a chip
382 * was not valid. This means that OCC (On Chip
383 * Controller) will be non-functional and CPU
384 * frequency scaling will not be functional. CPU may
385 * be set to a safe, low frequency. Power savings in
386 * CPU idle or CPU hotplug may be impacted.
387 */
388 prlog(PR_ERR,"OCC: Chip: %x homer_base is not valid\n",
389 chip->id);
390 return false;
391 }
392
393 /* Get PState table address */
394 occ_data = get_occ_pstate_table(chip);
395
396 /*
397 * Wait for the OCC to set an appropriate version bit.
398 * The wait is needed since on some platforms (such P8
399 * Tuletta), OCC is not loaded before OPAL boot. Hence
400 * initialization can take a while.
401 *
402 * Note: Checking for occ_data->version == (0x01/0x02/0x90/0xA0)
403 * is ok because we clear all of
404 * homer_base+size before passing memory to host
405 * services. This ensures occ_data->version == 0x0
406 * before OCC load.
407 */
408 tries = timeout * 10;
409 while (tries--) {
410 version = occ_data->version;
411
412 if (version == 0x01 || version == 0x02 ||
413 version == 0x90 || version == 0xA0)
414 break;
415
416 time_wait_ms(100);
417 }
418
419 version = occ_data->version;
420 switch (version) {
421 case 0x1:
422 case 0x2:
423 /*
424 * OCC-OPAL interface version 0x1 and 0x2 do not have
425 * the dynamic data. Hence the the only way to figure out
426 * if the OCC is up or not is to check the valid-bit
427 * in the pstate table.
428 */
429 if (occ_data->valid != 1) {
430 /**
431 * @fwts-label OCCInvalidPStateTable
432 * @fwts-advice The pstate table for a chip
433 * was not valid. This means that OCC (On Chip
434 * Controller) will be non-functional and CPU
435 * frequency scaling will not be functional. CPU may
436 * be set to a low, safe frequency. This means
437 * that CPU idle states and CPU frequency scaling
438 * may not be functional.
439 */
440 prlog(PR_ERR, "OCC: Chip: %x PState table is not valid\n",
441 chip->id);
442 return false;
443 }
444 break;
445
446 case 0x90:
447 /*
448 * OCC-OPAL interface version 0x90 has a
449 * dynamic data section. This has an
450 * occ_state field whose values inform about
451 * the state of the OCC.
452 *
453 * 0x00 = OCC not running. No communication
454 * allowed.
455 *
456 * 0x01 = Standby. No communication allowed.
457 *
458 * 0x02 = Observation State. Communication
459 * allowed and is command dependent.
460 *
461 * 0x03 = Active State. Communication allowed
462 * and is command dependent.
463 *
464 * 0x04 = Safe State. No communication
465 * allowed. Just like CPU throttle
466 * status, some failures will not allow
467 * for OCC to update state to safe.
468 *
469 * 0x05 = Characterization State.
470 * Communication allowed and is command
471 * dependent.
472 *
473 * We will error out if OCC is not in the
474 * Active State.
475 *
476 * XXX : Should we error out only if no
477 * communication is allowed with the
478 * OCC ?
479 */
480 occ_dyn_data = get_occ_dynamic_data(chip);
481 if (occ_dyn_data->occ_state != 0x3) {
482 /**
483 * @fwts-label OCCInactive
484 * @fwts-advice The OCC for a chip was not active.
485 * This means that CPU frequency scaling will
486 * not be functional. CPU may be set to a low,
487 * safe frequency. This means that CPU idle
488 * states and CPU frequency scaling may not be
489 * functional.
490 */
491 prlog(PR_ERR, "OCC: Chip: %x: OCC not active\n",
492 chip->id);
493 return false;
494 }
495 break;
496
497 case 0xA0:
498 /*
499 * OCC-OPAL interface version 0x90 has a
500 * dynamic data section. This has an
501 * occ_state field whose values inform about
502 * the state of the OCC.
503 *
504 * 0x00 = OCC not running. No communication
505 * allowed.
506 *
507 * 0x01 = Standby. No communication allowed.
508 *
509 * 0x02 = Observation State. Communication
510 * allowed and is command dependent.
511 *
512 * 0x03 = Active State. Communication allowed
513 * and is command dependent.
514 *
515 * 0x04 = Safe State. No communication
516 * allowed. Just like CPU throttle
517 * status, some failures will not allow
518 * for OCC to update state to safe.
519 *
520 * 0x05 = Characterization State.
521 * Communication allowed and is command
522 * dependent.
523 *
524 * We will error out if OCC is not in the
525 * Active State.
526 *
527 * XXX : Should we error out only if no
528 * communication is allowed with the
529 * OCC ?
530 */
531 occ_dyn_data = get_occ_dynamic_data(chip);
532 if (occ_dyn_data->occ_state != 0x3) {
533 /**
534 * @fwts-label OCCInactive
535 * @fwts-advice The OCC for a chip was not active.
536 * This means that CPU frequency scaling will
537 * not be functional. CPU may be set to a low,
538 * safe frequency. This means that CPU idle
539 * states and CPU frequency scaling may not be
540 * functional.
541 */
542 prlog(PR_ERR, "OCC: Chip: %x: OCC not active\n",
543 chip->id);
544 return false;
545 }
546 break;
547
548 default:
549 prlog(PR_ERR, "OCC: Unknown OCC-OPAL interface version.\n");
550 return false;
551 }
552
553 if (!chip->occ_functional)
554 chip->occ_functional = true;
555
556 prlog(PR_DEBUG, "OCC: Chip %02x Data (%016llx) = %016llx\n",
557 chip->id, (uint64_t)occ_data, be64_to_cpu(*(__be64 *)occ_data));
558
559 if (version == 0x90 || version == 0xA0) {
560 occ_dyn_data = get_occ_dynamic_data(chip);
561 prlog(PR_DEBUG, "OCC: Chip %02x Dynamic Data (%016llx) = %016llx\n",
562 chip->id, (uint64_t)occ_dyn_data,
563 be64_to_cpu(*(__be64 *)occ_dyn_data));
564 }
565 }
566
567 end_time = mftb();
568 prlog(PR_NOTICE, "OCC: All Chip Rdy after %lu ms\n",
569 tb_to_msecs(end_time - start_time));
570
571 dt_for_each_compatible(dt_root, xn, "ibm,xscom") {
572 const struct dt_property *p;
573 p = dt_find_property(xn, "ibm,occ-functional-state");
574 if (!p)
575 dt_add_property_cells(xn, "ibm,occ-functional-state",
576 0x1);
577 }
578 return true;
579 }
580
581 /*
582 * OCC provides pstate table entries in continuous descending order.
583 * Parse the pstate table to skip pstate_ids that are greater
584 * than Pmax. If a pstate_id is equal to Pmin then add it to
585 * the list and break from the loop as this is the last valid
586 * element in the pstate table.
587 */
parse_pstates_v2(struct occ_pstate_table * data,__be32 * dt_id,__be32 * dt_freq,int nr_pstates,int pmax,int pmin)588 static void parse_pstates_v2(struct occ_pstate_table *data, __be32 *dt_id,
589 __be32 *dt_freq, int nr_pstates, int pmax, int pmin)
590 {
591 int i, j;
592
593 for (i = 0, j = 0; i < MAX_PSTATES && j < nr_pstates; i++) {
594 if (cmp_pstates(data->v2.pstates[i].id, pmax) > 0)
595 continue;
596
597 dt_id[j] = cpu_to_be32(data->v2.pstates[i].id);
598 dt_freq[j] = cpu_to_be32(be32_to_cpu(data->v2.pstates[i].freq_khz) / 1000);
599 j++;
600
601 if (data->v2.pstates[i].id == pmin)
602 break;
603 }
604
605 if (j != nr_pstates)
606 prerror("OCC: Expected pstates(%d) is not equal to parsed pstates(%d)\n",
607 nr_pstates, j);
608 }
609
parse_pstates_v9(struct occ_pstate_table * data,__be32 * dt_id,__be32 * dt_freq,int nr_pstates,int pmax,int pmin)610 static void parse_pstates_v9(struct occ_pstate_table *data, __be32 *dt_id,
611 __be32 *dt_freq, int nr_pstates, int pmax, int pmin)
612 {
613 int i, j;
614
615 for (i = 0, j = 0; i < MAX_PSTATES && j < nr_pstates; i++) {
616 if (cmp_pstates(data->v9.pstates[i].id, pmax) > 0)
617 continue;
618
619 dt_id[j] = cpu_to_be32(data->v9.pstates[i].id);
620 dt_freq[j] = cpu_to_be32(be32_to_cpu(data->v9.pstates[i].freq_khz) / 1000);
621 j++;
622
623 if (data->v9.pstates[i].id == pmin)
624 break;
625 }
626
627 if (j != nr_pstates)
628 prerror("OCC: Expected pstates(%d) is not equal to parsed pstates(%d)\n",
629 nr_pstates, j);
630 }
631
parse_pstates_v10(struct occ_pstate_table * data,__be32 * dt_id,__be32 * dt_freq,int nr_pstates,int pmax,int pmin)632 static void parse_pstates_v10(struct occ_pstate_table *data, __be32 *dt_id,
633 __be32 *dt_freq, int nr_pstates, int pmax, int pmin)
634 {
635 int i, j;
636 int invalid = 0;
637
638 for (i = 0, j = 0; i < MAX_PSTATES && j < nr_pstates; i++) {
639 if (cmp_pstates(data->v10.pstates[i].id, pmax) > 0)
640 continue;
641
642 if (!data->v10.pstates[i].valid) {
643 prlog(PR_WARNING, "OCC: Found Invalid pstate with index %d. Skipping it.\n", i);
644 invalid++;
645 continue;
646 }
647
648 dt_id[j] = cpu_to_be32(data->v10.pstates[i].id);
649 dt_freq[j] = cpu_to_be32(be32_to_cpu(data->v10.pstates[i].freq_khz) / 1000);
650 j++;
651
652 if (data->v10.pstates[i].id == pmin)
653 break;
654 }
655
656 if ((j + invalid) != nr_pstates) {
657 prerror("OCC: Expected pstates(%d) not equal to (Parsed pstates(%d) + Invalid Pstates (%d))\n",
658 nr_pstates, j, invalid);
659 }
660 }
661
parse_vid(struct occ_pstate_table * occ_data,struct dt_node * node,u8 nr_pstates,int pmax,int pmin)662 static void parse_vid(struct occ_pstate_table *occ_data,
663 struct dt_node *node, u8 nr_pstates,
664 int pmax, int pmin)
665 {
666 u8 *dt_vdd, *dt_vcs;
667 int i, j;
668
669 dt_vdd = malloc(nr_pstates);
670 assert(dt_vdd);
671 dt_vcs = malloc(nr_pstates);
672 assert(dt_vcs);
673
674 for (i = 0, j = 0; i < MAX_PSTATES && j < nr_pstates; i++) {
675 if (cmp_pstates(occ_data->v2.pstates[i].id, pmax) > 0)
676 continue;
677
678 dt_vdd[j] = occ_data->v2.pstates[i].vdd;
679 dt_vcs[j] = occ_data->v2.pstates[i].vcs;
680 j++;
681
682 if (occ_data->v2.pstates[i].id == pmin)
683 break;
684 }
685
686 dt_add_property(node, "ibm,pstate-vdds", dt_vdd, nr_pstates);
687 dt_add_property(node, "ibm,pstate-vcss", dt_vcs, nr_pstates);
688
689 free(dt_vdd);
690 free(dt_vcs);
691 }
692
693 /* Add device tree properties to describe pstates states */
694 /* Return nominal pstate to set in each core */
add_cpu_pstate_properties(struct dt_node * power_mgt,int * pstate_nom)695 static bool add_cpu_pstate_properties(struct dt_node *power_mgt,
696 int *pstate_nom)
697 {
698 struct proc_chip *chip;
699 uint64_t occ_data_area;
700 struct occ_pstate_table *occ_data = NULL;
701 struct occ_dynamic_data *occ_dyn_data;
702 /* Arrays for device tree */
703 __be32 *dt_id, *dt_freq;
704 int pmax, pmin, pnom;
705 u8 nr_pstates;
706 bool ultra_turbo_supported;
707 int i, major, minor;
708
709 prlog(PR_DEBUG, "OCC: CPU pstate state device tree init\n");
710
711 /*
712 * Find first chip with an OCC which has as a valid
713 * pstate-table
714 */
715 for_each_chip(chip) {
716 occ_data = get_occ_pstate_table(chip);
717
718 /* Dump first 16 bytes of PState table */
719 occ_data_area = (uint64_t)occ_data;
720 prlog(PR_DEBUG, "OCC: Chip %02d :Data (%16llx) = %16llx %16llx\n",
721 chip->id, occ_data_area,
722 be64_to_cpu(*(__be64 *)occ_data_area),
723 be64_to_cpu(*(__be64 *)(occ_data_area + 8)));
724
725 if (occ_data->valid)
726 break;
727 /*
728 * XXX : Error out if !occ_data->valid but Chip has at
729 * least one EX Unit?
730 */
731 }
732
733 assert(occ_data);
734 if (!occ_data->valid) {
735 /**
736 * @fwts-label OCCInvalidPStateTableDT
737 * @fwts-advice The pstate tables for none of the chips
738 * are valid. This means that OCC (On Chip
739 * Controller) will be non-functional. This means
740 * that CPU idle states and CPU frequency scaling
741 * will not be functional as OPAL doesn't populate
742 * the device tree with pstates in this case.
743 */
744 prlog(PR_ERR, "OCC: PState table is not valid\n");
745 return false;
746 }
747
748 /*
749 * Workload-Optimized-Frequency(WOF) or Ultra-Turbo is supported
750 * from version 0x02 onwards. If WOF is disabled then, the max
751 * ultra_turbo pstate will be equal to max turbo pstate.
752 */
753 ultra_turbo_supported = true;
754
755 major = occ_data->version >> 4;
756 minor = occ_data->version & 0xF;
757
758 /* Parse Pmax, Pmin and Pnominal */
759 switch (major) {
760 case 0:
761 if (proc_gen >= proc_gen_p9) {
762 /**
763 * @fwts-label OCCInvalidVersion02
764 * @fwts-advice The PState table layout version is not
765 * supported in P9. So OPAL will not parse the PState
766 * table. CPU frequency scaling will not be functional
767 * as frequency and pstate-ids are not added to DT.
768 */
769 prerror("OCC: Version %x is not supported in P9\n",
770 occ_data->version);
771 return false;
772 }
773 if (minor == 0x1)
774 ultra_turbo_supported = false;
775 pmin = occ_data->v2.pstate_min;
776 pnom = occ_data->v2.pstate_nom;
777 if (ultra_turbo_supported)
778 pmax = occ_data->v2.pstate_ultra_turbo;
779 else
780 pmax = occ_data->v2.pstate_turbo;
781 break;
782 case 0x9:
783 if (proc_gen == proc_gen_p8) {
784 /**
785 * @fwts-label OCCInvalidVersion90
786 * @fwts-advice The PState table layout version is not
787 * supported in P8. So OPAL will not parse the PState
788 * table. CPU frequency scaling will not be functional
789 * as frequency and pstate-ids are not added to DT.
790 */
791 prerror("OCC: Version %x is not supported in P8\n",
792 occ_data->version);
793 return false;
794 }
795 pmin = occ_data->v9.pstate_min;
796 pnom = occ_data->v9.pstate_nom;
797 pmax = occ_data->v9.pstate_ultra_turbo;
798 break;
799 case 0xA:
800 pmin = occ_data->v10.pstate_min;
801 pnom = occ_data->v10.pstate_fixed_freq;
802 occ_dyn_data = get_occ_dynamic_data(chip);
803 if (occ_dyn_data->v10.wof_enabled)
804 pmax = occ_data->v10.pstate_ultra_turbo;
805 else
806 pmax = occ_data->v10.pstate_fmax;
807 break;
808 default:
809 /**
810 * @fwts-label OCCUnsupportedVersion
811 * @fwts-advice The PState table layout version is not
812 * supported. So OPAL will not parse the PState table.
813 * CPU frequency scaling will not be functional as OPAL
814 * doesn't populate the device tree with pstates.
815 */
816 prerror("OCC: Unsupported pstate table layout version %d\n",
817 occ_data->version);
818 return false;
819 }
820
821 /* Sanity check for pstate limits */
822 if (cmp_pstates(pmin, pmax) > 0) {
823 /**
824 * @fwts-label OCCInvalidPStateLimits
825 * @fwts-advice The min pstate is greater than the
826 * max pstate, this could be due to corrupted/invalid
827 * data in OCC-OPAL shared memory region. So OPAL has
828 * not added pstates to device tree. This means that
829 * CPU Frequency management will not be functional in
830 * the host.
831 */
832 prerror("OCC: Invalid pstate limits. Pmin(%d) > Pmax (%d)\n",
833 pmin, pmax);
834 return false;
835 }
836
837 if (cmp_pstates(pnom, pmax) > 0) {
838 /**
839 * @fwts-label OCCInvalidNominalPState
840 * @fwts-advice The nominal pstate is greater than the
841 * max pstate, this could be due to corrupted/invalid
842 * data in OCC-OPAL shared memory region. So OPAL has
843 * limited the nominal pstate to max pstate.
844 */
845 prerror("OCC: Clipping nominal pstate(%d) to Pmax(%d)\n",
846 pnom, pmax);
847 pnom = pmax;
848 }
849
850 nr_pstates = labs(pmax - pmin) + 1;
851 prlog(PR_DEBUG, "OCC: Version %x Min %d Nom %d Max %d Nr States %d\n",
852 occ_data->version, pmin, pnom, pmax, nr_pstates);
853 if (((major == 0x9 || major == 0xA) && nr_pstates <= 1) ||
854 (major == 0 && (nr_pstates <= 1 || nr_pstates > 128))) {
855 /**
856 * @fwts-label OCCInvalidPStateRange
857 * @fwts-advice The number of pstates is outside the valid
858 * range (currently <=1 or > 128 on p8, >255 on P9), so OPAL
859 * has not added pstates to the device tree. This means that
860 * OCC (On Chip Controller) will be non-functional. This means
861 * that CPU idle states and CPU frequency scaling
862 * will not be functional.
863 */
864 prerror("OCC: OCC range is not valid; No of pstates = %d\n",
865 nr_pstates);
866 return false;
867 }
868
869 dt_id = malloc(nr_pstates * sizeof(__be32));
870 assert(dt_id);
871 dt_freq = malloc(nr_pstates * sizeof(__be32));
872 assert(dt_freq);
873
874 switch (major) {
875 case 0:
876 parse_pstates_v2(occ_data, dt_id, dt_freq, nr_pstates,
877 pmax, pmin);
878 break;
879 case 0x9:
880 parse_pstates_v9(occ_data, dt_id, dt_freq, nr_pstates,
881 pmax, pmin);
882 break;
883 case 0xA:
884 parse_pstates_v10(occ_data, dt_id, dt_freq, nr_pstates,
885 pmax, pmin);
886 break;
887 default:
888 return false;
889 }
890
891 /* Add the device-tree entries */
892 dt_add_property(power_mgt, "ibm,pstate-ids", dt_id,
893 nr_pstates * sizeof(__be32));
894 dt_add_property(power_mgt, "ibm,pstate-frequencies-mhz", dt_freq,
895 nr_pstates * sizeof(__be32));
896 dt_add_property_cells(power_mgt, "ibm,pstate-min", pmin);
897 dt_add_property_cells(power_mgt, "ibm,pstate-nominal", pnom);
898 dt_add_property_cells(power_mgt, "ibm,pstate-max", pmax);
899
900 free(dt_freq);
901 free(dt_id);
902
903 /*
904 * Parse and add WOF properties: turbo, ultra-turbo and core_max array.
905 * core_max[1..n] array provides the max sustainable pstate that can be
906 * achieved with i active cores in the chip.
907 */
908 if (ultra_turbo_supported) {
909 int pturbo, pultra_turbo;
910 u8 nr_cores = get_available_nr_cores_in_chip(chip->id);
911 __be32 *dt_cmax;
912
913 dt_cmax = malloc(nr_cores * sizeof(u32));
914 assert(dt_cmax);
915 switch (major) {
916 case 0:
917 pturbo = occ_data->v2.pstate_turbo;
918 pultra_turbo = occ_data->v2.pstate_ultra_turbo;
919 for (i = 0; i < nr_cores; i++)
920 dt_cmax[i] = cpu_to_be32(occ_data->v2.core_max[i]);
921 break;
922 case 0x9:
923 pturbo = occ_data->v9.pstate_turbo;
924 pultra_turbo = occ_data->v9.pstate_ultra_turbo;
925 for (i = 0; i < nr_cores; i++)
926 dt_cmax[i] = cpu_to_be32(occ_data->v9.core_max[i]);
927 break;
928 case 0xA:
929 pturbo = occ_data->v10.pstate_base;
930 pultra_turbo = occ_data->v10.pstate_ultra_turbo;
931 for (i = 0; i < nr_cores; i++)
932 dt_cmax[i] = cpu_to_be32(occ_data->v10.core_max[i]);
933 break;
934 default:
935 return false;
936 }
937
938 if (cmp_pstates(pturbo, pmax) > 0) {
939 prerror("OCC: Clipping turbo pstate(%d) to Pmax(%d)\n",
940 pturbo, pmax);
941 dt_add_property_cells(power_mgt, "ibm,pstate-turbo",
942 pmax);
943 } else {
944 dt_add_property_cells(power_mgt, "ibm,pstate-turbo",
945 pturbo);
946 }
947
948 dt_add_property_cells(power_mgt, "ibm,pstate-ultra-turbo",
949 pultra_turbo);
950 dt_add_property(power_mgt, "ibm,pstate-core-max", dt_cmax,
951 nr_cores * sizeof(u32));
952
953 dt_add_property_cells(power_mgt, "ibm,pstate-base", pturbo);
954 free(dt_cmax);
955 }
956
957 if (major == 0x9 || major == 0xA)
958 goto out;
959
960 dt_add_property_cells(power_mgt, "#address-cells", 2);
961 dt_add_property_cells(power_mgt, "#size-cells", 1);
962
963 /* Add chip specific pstate properties */
964 for_each_chip(chip) {
965 struct dt_node *occ_node;
966
967 occ_data = get_occ_pstate_table(chip);
968 occ_node = dt_new_addr(power_mgt, "occ", (uint64_t)occ_data);
969 if (!occ_node) {
970 /**
971 * @fwts-label OCCDTFailedNodeCreation
972 * @fwts-advice Failed to create
973 * /ibm,opal/power-mgt/occ. Per-chip pstate properties
974 * are not added to Device Tree.
975 */
976 prerror("OCC: Failed to create /ibm,opal/power-mgt/occ@%llx\n",
977 (uint64_t)occ_data);
978 return false;
979 }
980
981 dt_add_property_cells(occ_node, "reg",
982 hi32((uint64_t)occ_data),
983 lo32((uint64_t)occ_data),
984 OPAL_DYNAMIC_DATA_OFFSET +
985 sizeof(struct occ_dynamic_data));
986 dt_add_property_cells(occ_node, "ibm,chip-id", chip->id);
987
988 /*
989 * Parse and add pstate Voltage Identifiers (VID) to DT which
990 * are provided by OCC in version 0x01 and 0x02
991 */
992 parse_vid(occ_data, occ_node, nr_pstates, pmax, pmin);
993 }
994 out:
995 /* Return pstate to set for each core */
996 *pstate_nom = pnom;
997 return true;
998 }
999
1000 /*
1001 * Prepare chip for pstate transitions
1002 */
1003
cpu_pstates_prepare_core(struct proc_chip * chip,struct cpu_thread * c,int pstate_nom)1004 static bool cpu_pstates_prepare_core(struct proc_chip *chip,
1005 struct cpu_thread *c,
1006 int pstate_nom)
1007 {
1008 uint32_t core = pir_to_core_id(c->pir);
1009 uint64_t tmp, pstate;
1010 int rc;
1011
1012 /*
1013 * Currently Fastsleep init clears EX_PM_SPR_OVERRIDE_EN.
1014 * Need to ensure only relevant bits are inited
1015 */
1016
1017 /* Init PM GP1 for SCOM based PSTATE control to set nominal freq
1018 *
1019 * Use the OR SCOM to set the required bits in PM_GP1 register
1020 * since the OCC might be mainpulating the PM_GP1 register as well.
1021 */
1022 rc = xscom_write(chip->id, XSCOM_ADDR_P8_EX_SLAVE(core, EX_PM_SET_GP1),
1023 EX_PM_SETUP_GP1_PM_SPR_OVERRIDE_EN);
1024 if (rc) {
1025 log_simple_error(&e_info(OPAL_RC_OCC_PSTATE_INIT),
1026 "OCC: Failed to write PM_GP1 in pstates init\n");
1027 return false;
1028 }
1029
1030 /* Set new pstate to core */
1031 rc = xscom_read(chip->id, XSCOM_ADDR_P8_EX_SLAVE(core, EX_PM_PPMCR), &tmp);
1032 if (rc) {
1033 log_simple_error(&e_info(OPAL_RC_OCC_PSTATE_INIT),
1034 "OCC: Failed to read PM_PPMCR from OCC in pstates init\n");
1035 return false;
1036 }
1037 tmp = tmp & ~0xFFFF000000000000ULL;
1038 pstate = ((uint64_t) pstate_nom) & 0xFF;
1039 tmp = tmp | (pstate << 56) | (pstate << 48);
1040 rc = xscom_write(chip->id, XSCOM_ADDR_P8_EX_SLAVE(core, EX_PM_PPMCR), tmp);
1041 if (rc) {
1042 log_simple_error(&e_info(OPAL_RC_OCC_PSTATE_INIT),
1043 "OCC: Failed to write PM_PPMCR in pstates init\n");
1044 return false;
1045 }
1046 time_wait_ms(1); /* Wait for PState to change */
1047 /*
1048 * Init PM GP1 for SPR based PSTATE control.
1049 * Once OCC is active EX_PM_SETUP_GP1_DPLL_FREQ_OVERRIDE_EN will be
1050 * cleared by OCC. Sapphire need not clear.
1051 * However wait for DVFS state machine to become idle after min->nominal
1052 * transition initiated above. If not switch over to SPR control could fail.
1053 *
1054 * Use the AND SCOM to clear the required bits in PM_GP1 register
1055 * since the OCC might be mainpulating the PM_GP1 register as well.
1056 */
1057 tmp = ~EX_PM_SETUP_GP1_PM_SPR_OVERRIDE_EN;
1058 rc = xscom_write(chip->id, XSCOM_ADDR_P8_EX_SLAVE(core, EX_PM_CLEAR_GP1),
1059 tmp);
1060 if (rc) {
1061 log_simple_error(&e_info(OPAL_RC_OCC_PSTATE_INIT),
1062 "OCC: Failed to write PM_GP1 in pstates init\n");
1063 return false;
1064 }
1065
1066 /* Just debug */
1067 rc = xscom_read(chip->id, XSCOM_ADDR_P8_EX_SLAVE(core, EX_PM_PPMSR), &tmp);
1068 if (rc) {
1069 log_simple_error(&e_info(OPAL_RC_OCC_PSTATE_INIT),
1070 "OCC: Failed to read PM_PPMSR from OCC"
1071 "in pstates init\n");
1072 return false;
1073 }
1074 prlog(PR_DEBUG, "OCC: Chip %x Core %x PPMSR %016llx\n",
1075 chip->id, core, tmp);
1076
1077 /*
1078 * If PMSR is still in transition at this point due to PState change
1079 * initiated above, then the switchover to SPR may not work.
1080 * ToDo: Check for DVFS state machine idle before change.
1081 */
1082
1083 return true;
1084 }
1085
1086 static bool occ_opal_msg_outstanding = false;
occ_msg_consumed(void * data __unused,int status __unused)1087 static void occ_msg_consumed(void *data __unused, int status __unused)
1088 {
1089 lock(&occ_lock);
1090 occ_opal_msg_outstanding = false;
1091 unlock(&occ_lock);
1092 }
1093
get_cpu_throttle(struct proc_chip * chip)1094 static inline u8 get_cpu_throttle(struct proc_chip *chip)
1095 {
1096 struct occ_pstate_table *pdata = get_occ_pstate_table(chip);
1097 struct occ_dynamic_data *data;
1098
1099 switch (pdata->version >> 4) {
1100 case 0:
1101 return pdata->v2.throttle;
1102 case 0x9:
1103 case 0xA:
1104 data = get_occ_dynamic_data(chip);
1105 return data->cpu_throttle;
1106 default:
1107 return 0;
1108 };
1109 }
1110
is_occ_reset(void)1111 bool is_occ_reset(void)
1112 {
1113 return occ_reset;
1114 }
1115
occ_throttle_poll(void * data __unused)1116 static void occ_throttle_poll(void *data __unused)
1117 {
1118 struct proc_chip *chip;
1119 struct occ_pstate_table *occ_data;
1120 struct opal_occ_msg occ_msg;
1121 int rc;
1122
1123 if (!try_lock(&occ_lock))
1124 return;
1125 if (occ_reset) {
1126 int inactive = 0;
1127
1128 for_each_chip(chip) {
1129 occ_data = get_occ_pstate_table(chip);
1130 if (occ_data->valid != 1) {
1131 inactive = 1;
1132 break;
1133 }
1134 }
1135 if (!inactive) {
1136 /*
1137 * Queue OCC_THROTTLE with throttle status as 0 to
1138 * indicate all OCCs are active after a reset.
1139 */
1140 occ_msg.type = cpu_to_be64(OCC_THROTTLE);
1141 occ_msg.chip = 0;
1142 occ_msg.throttle_status = 0;
1143 rc = _opal_queue_msg(OPAL_MSG_OCC, NULL, NULL,
1144 sizeof(struct opal_occ_msg),
1145 &occ_msg);
1146 if (!rc)
1147 occ_reset = false;
1148 }
1149 } else {
1150 if (occ_opal_msg_outstanding)
1151 goto done;
1152 for_each_chip(chip) {
1153 u8 throttle;
1154
1155 occ_data = get_occ_pstate_table(chip);
1156 throttle = get_cpu_throttle(chip);
1157 if ((occ_data->valid == 1) &&
1158 (chip->throttle != throttle) &&
1159 (throttle <= OCC_MAX_THROTTLE_STATUS)) {
1160 occ_msg.type = cpu_to_be64(OCC_THROTTLE);
1161 occ_msg.chip = cpu_to_be64(chip->id);
1162 occ_msg.throttle_status = cpu_to_be64(throttle);
1163 rc = _opal_queue_msg(OPAL_MSG_OCC, NULL,
1164 occ_msg_consumed,
1165 sizeof(struct opal_occ_msg),
1166 &occ_msg);
1167 if (!rc) {
1168 chip->throttle = throttle;
1169 occ_opal_msg_outstanding = true;
1170 break;
1171 }
1172 }
1173 }
1174 }
1175 done:
1176 unlock(&occ_lock);
1177 }
1178
1179 /* OPAL-OCC Command/Response Interface */
1180
1181 enum occ_state {
1182 OCC_STATE_NOT_RUNNING = 0x00,
1183 OCC_STATE_STANDBY = 0x01,
1184 OCC_STATE_OBSERVATION = 0x02,
1185 OCC_STATE_ACTIVE = 0x03,
1186 OCC_STATE_SAFE = 0x04,
1187 OCC_STATE_CHARACTERIZATION = 0x05,
1188 };
1189
1190 enum occ_role {
1191 OCC_ROLE_SLAVE = 0x0,
1192 OCC_ROLE_MASTER = 0x1,
1193 };
1194
1195 enum occ_cmd {
1196 OCC_CMD_CLEAR_SENSOR_DATA,
1197 OCC_CMD_SET_POWER_CAP,
1198 OCC_CMD_SET_POWER_SHIFTING_RATIO,
1199 OCC_CMD_SELECT_SENSOR_GROUP,
1200 };
1201
1202 struct opal_occ_cmd_info {
1203 enum occ_cmd cmd;
1204 u8 cmd_value;
1205 u16 cmd_size;
1206 u16 rsp_size;
1207 int timeout_ms;
1208 u16 state_mask;
1209 u8 role_mask;
1210 };
1211
1212 static struct opal_occ_cmd_info occ_cmds[] = {
1213 { OCC_CMD_CLEAR_SENSOR_DATA,
1214 0xD0, 4, 4, 1000,
1215 PPC_BIT16(OCC_STATE_OBSERVATION) |
1216 PPC_BIT16(OCC_STATE_ACTIVE) |
1217 PPC_BIT16(OCC_STATE_CHARACTERIZATION),
1218 PPC_BIT8(OCC_ROLE_MASTER) | PPC_BIT8(OCC_ROLE_SLAVE)
1219 },
1220 { OCC_CMD_SET_POWER_CAP,
1221 0xD1, 2, 2, 1000,
1222 PPC_BIT16(OCC_STATE_OBSERVATION) |
1223 PPC_BIT16(OCC_STATE_ACTIVE) |
1224 PPC_BIT16(OCC_STATE_CHARACTERIZATION),
1225 PPC_BIT8(OCC_ROLE_MASTER)
1226 },
1227 { OCC_CMD_SET_POWER_SHIFTING_RATIO,
1228 0xD2, 1, 1, 1000,
1229 PPC_BIT16(OCC_STATE_OBSERVATION) |
1230 PPC_BIT16(OCC_STATE_ACTIVE) |
1231 PPC_BIT16(OCC_STATE_CHARACTERIZATION),
1232 PPC_BIT8(OCC_ROLE_MASTER) | PPC_BIT8(OCC_ROLE_SLAVE)
1233 },
1234 { OCC_CMD_SELECT_SENSOR_GROUP,
1235 0xD3, 2, 2, 1000,
1236 PPC_BIT16(OCC_STATE_OBSERVATION) |
1237 PPC_BIT16(OCC_STATE_ACTIVE) |
1238 PPC_BIT16(OCC_STATE_CHARACTERIZATION),
1239 PPC_BIT8(OCC_ROLE_MASTER) | PPC_BIT8(OCC_ROLE_SLAVE)
1240 },
1241 };
1242
1243 enum occ_response_status {
1244 OCC_RSP_SUCCESS = 0x00,
1245 OCC_RSP_INVALID_COMMAND = 0x11,
1246 OCC_RSP_INVALID_CMD_DATA_LENGTH = 0x12,
1247 OCC_RSP_INVALID_DATA = 0x13,
1248 OCC_RSP_INTERNAL_ERROR = 0x15,
1249 };
1250
1251 #define OCC_FLAG_RSP_READY 0x01
1252 #define OCC_FLAG_CMD_IN_PROGRESS 0x02
1253 #define OPAL_FLAG_CMD_READY 0x80
1254
1255 struct opal_occ_cmd_data {
1256 u8 *data;
1257 enum occ_cmd cmd;
1258 };
1259
1260 static struct cmd_interface {
1261 struct lock queue_lock;
1262 struct timer timeout;
1263 struct opal_occ_cmd_data *cdata;
1264 struct opal_command_buffer *cmd;
1265 struct occ_response_buffer *rsp;
1266 u8 *occ_state;
1267 u8 *valid;
1268 u32 chip_id;
1269 u32 token;
1270 u16 enabled_sensor_mask;
1271 u8 occ_role;
1272 u8 request_id;
1273 bool cmd_in_progress;
1274 bool retry;
1275 } *chips;
1276
1277 static int nr_occs;
1278
get_chip_cmd_interface(int chip_id)1279 static inline struct cmd_interface *get_chip_cmd_interface(int chip_id)
1280 {
1281 int i;
1282
1283 for (i = 0; i < nr_occs; i++)
1284 if (chips[i].chip_id == chip_id)
1285 return &chips[i];
1286
1287 return NULL;
1288 }
1289
occ_in_progress(struct cmd_interface * chip)1290 static inline bool occ_in_progress(struct cmd_interface *chip)
1291 {
1292 return (chip->rsp->flag == OCC_FLAG_CMD_IN_PROGRESS);
1293 }
1294
write_occ_cmd(struct cmd_interface * chip)1295 static int write_occ_cmd(struct cmd_interface *chip)
1296 {
1297 struct opal_command_buffer *cmd = chip->cmd;
1298 enum occ_cmd ocmd = chip->cdata->cmd;
1299
1300 if (!chip->retry && occ_in_progress(chip)) {
1301 chip->cmd_in_progress = false;
1302 return OPAL_BUSY;
1303 }
1304
1305 cmd->flag = chip->rsp->flag = 0;
1306 cmd->cmd = occ_cmds[ocmd].cmd_value;
1307 cmd->request_id = chip->request_id++;
1308 cmd->data_size = occ_cmds[ocmd].cmd_size;
1309 memcpy(&cmd->data, chip->cdata->data, cmd->data_size);
1310 cmd->flag = OPAL_FLAG_CMD_READY;
1311
1312 schedule_timer(&chip->timeout,
1313 msecs_to_tb(occ_cmds[ocmd].timeout_ms));
1314
1315 return OPAL_ASYNC_COMPLETION;
1316 }
1317
opal_occ_command(struct cmd_interface * chip,int token,struct opal_occ_cmd_data * cdata)1318 static int64_t opal_occ_command(struct cmd_interface *chip, int token,
1319 struct opal_occ_cmd_data *cdata)
1320 {
1321 int rc;
1322
1323 if (!(*chip->valid) ||
1324 (!(PPC_BIT16(*chip->occ_state) & occ_cmds[cdata->cmd].state_mask)))
1325 return OPAL_HARDWARE;
1326
1327 if (!(PPC_BIT8(chip->occ_role) & occ_cmds[cdata->cmd].role_mask))
1328 return OPAL_PERMISSION;
1329
1330 lock(&chip->queue_lock);
1331 if (chip->cmd_in_progress) {
1332 rc = OPAL_BUSY;
1333 goto out;
1334 }
1335
1336 chip->cdata = cdata;
1337 chip->token = token;
1338 chip->cmd_in_progress = true;
1339 chip->retry = false;
1340 rc = write_occ_cmd(chip);
1341 out:
1342 unlock(&chip->queue_lock);
1343 return rc;
1344 }
1345
sanity_check_opal_cmd(struct opal_command_buffer * cmd,struct cmd_interface * chip)1346 static inline bool sanity_check_opal_cmd(struct opal_command_buffer *cmd,
1347 struct cmd_interface *chip)
1348 {
1349 return ((cmd->cmd == occ_cmds[chip->cdata->cmd].cmd_value) &&
1350 (cmd->request_id == chip->request_id - 1) &&
1351 (cmd->data_size == occ_cmds[chip->cdata->cmd].cmd_size));
1352 }
1353
check_occ_rsp(struct opal_command_buffer * cmd,struct occ_response_buffer * rsp)1354 static inline bool check_occ_rsp(struct opal_command_buffer *cmd,
1355 struct occ_response_buffer *rsp)
1356 {
1357 if (cmd->cmd != rsp->cmd) {
1358 prlog(PR_DEBUG, "OCC: Command value mismatch in OCC response"
1359 "rsp->cmd = %d cmd->cmd = %d\n", rsp->cmd, cmd->cmd);
1360 return false;
1361 }
1362
1363 if (cmd->request_id != rsp->request_id) {
1364 prlog(PR_DEBUG, "OCC: Request ID mismatch in OCC response"
1365 "rsp->request_id = %d cmd->request_id = %d\n",
1366 rsp->request_id, cmd->request_id);
1367 return false;
1368 }
1369
1370 return true;
1371 }
1372
queue_occ_rsp_msg(int token,int rc)1373 static inline void queue_occ_rsp_msg(int token, int rc)
1374 {
1375 int ret;
1376
1377 ret = opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL,
1378 cpu_to_be64(token),
1379 cpu_to_be64(rc));
1380 if (ret)
1381 prerror("OCC: Failed to queue OCC response status message\n");
1382 }
1383
occ_cmd_timeout_handler(struct timer * t __unused,void * data,uint64_t now __unused)1384 static void occ_cmd_timeout_handler(struct timer *t __unused, void *data,
1385 uint64_t now __unused)
1386 {
1387 struct cmd_interface *chip = data;
1388
1389 lock(&chip->queue_lock);
1390 if (!chip->cmd_in_progress)
1391 goto exit;
1392
1393 if (!chip->retry) {
1394 prlog(PR_DEBUG, "OCC: Command timeout, retrying\n");
1395 chip->retry = true;
1396 write_occ_cmd(chip);
1397 } else {
1398 chip->cmd_in_progress = false;
1399 queue_occ_rsp_msg(chip->token, OPAL_TIMEOUT);
1400 prlog(PR_DEBUG, "OCC: Command timeout after retry\n");
1401 }
1402 exit:
1403 unlock(&chip->queue_lock);
1404 }
1405
read_occ_rsp(struct occ_response_buffer * rsp)1406 static int read_occ_rsp(struct occ_response_buffer *rsp)
1407 {
1408 switch (rsp->status) {
1409 case OCC_RSP_SUCCESS:
1410 return OPAL_SUCCESS;
1411 case OCC_RSP_INVALID_COMMAND:
1412 prlog(PR_DEBUG, "OCC: Rsp status: Invalid command\n");
1413 break;
1414 case OCC_RSP_INVALID_CMD_DATA_LENGTH:
1415 prlog(PR_DEBUG, "OCC: Rsp status: Invalid command data length\n");
1416 break;
1417 case OCC_RSP_INVALID_DATA:
1418 prlog(PR_DEBUG, "OCC: Rsp status: Invalid command data\n");
1419 break;
1420 case OCC_RSP_INTERNAL_ERROR:
1421 prlog(PR_DEBUG, "OCC: Rsp status: OCC internal error\n");
1422 break;
1423 default:
1424 break;
1425 }
1426
1427 /* Clear the OCC response flag */
1428 rsp->flag = 0;
1429 return OPAL_INTERNAL_ERROR;
1430 }
1431
handle_occ_rsp(uint32_t chip_id)1432 static void handle_occ_rsp(uint32_t chip_id)
1433 {
1434 struct cmd_interface *chip;
1435 struct opal_command_buffer *cmd;
1436 struct occ_response_buffer *rsp;
1437
1438 chip = get_chip_cmd_interface(chip_id);
1439 if (!chip)
1440 return;
1441
1442 cmd = chip->cmd;
1443 rsp = chip->rsp;
1444
1445 /*Read rsp*/
1446 if (rsp->flag != OCC_FLAG_RSP_READY)
1447 return;
1448 lock(&chip->queue_lock);
1449 if (!chip->cmd_in_progress)
1450 goto exit;
1451
1452 cancel_timer(&chip->timeout);
1453 if (!sanity_check_opal_cmd(cmd, chip) ||
1454 !check_occ_rsp(cmd, rsp)) {
1455 if (!chip->retry) {
1456 prlog(PR_DEBUG, "OCC: Command-response mismatch, retrying\n");
1457 chip->retry = true;
1458 write_occ_cmd(chip);
1459 } else {
1460 chip->cmd_in_progress = false;
1461 queue_occ_rsp_msg(chip->token, OPAL_INTERNAL_ERROR);
1462 prlog(PR_DEBUG, "OCC: Command-response mismatch\n");
1463 }
1464 goto exit;
1465 }
1466
1467 if (rsp->cmd == occ_cmds[OCC_CMD_SELECT_SENSOR_GROUP].cmd_value &&
1468 rsp->status == OCC_RSP_SUCCESS)
1469 chip->enabled_sensor_mask = *(u16 *)chip->cdata->data;
1470
1471 chip->cmd_in_progress = false;
1472 queue_occ_rsp_msg(chip->token, read_occ_rsp(chip->rsp));
1473 exit:
1474 unlock(&chip->queue_lock);
1475 }
1476
occ_get_gpu_presence(struct proc_chip * chip,int gpu_num)1477 bool occ_get_gpu_presence(struct proc_chip *chip, int gpu_num)
1478 {
1479 struct occ_dynamic_data *ddata;
1480 static int max_retries = 20;
1481 static bool found = false;
1482
1483 assert(gpu_num <= 2);
1484
1485 ddata = get_occ_dynamic_data(chip);
1486 while (!found && max_retries) {
1487 if (ddata->major_version == 0 && ddata->minor_version >= 1) {
1488 found = true;
1489 break;
1490 }
1491 time_wait_ms(100);
1492 max_retries--;
1493 ddata = get_occ_dynamic_data(chip);
1494 }
1495
1496 if (!found) {
1497 prlog(PR_INFO, "OCC: No GPU slot presence, assuming GPU present\n");
1498 return true;
1499 }
1500
1501 return (bool)(ddata->gpus_present & 1 << gpu_num);
1502 }
1503
1504 static void occ_add_powercap_sensors(struct dt_node *power_mgt);
1505 static void occ_add_psr_sensors(struct dt_node *power_mgt);
1506
occ_cmd_interface_init(void)1507 static void occ_cmd_interface_init(void)
1508 {
1509 struct occ_dynamic_data *data;
1510 struct occ_pstate_table *pdata;
1511 struct dt_node *power_mgt;
1512 struct proc_chip *chip;
1513 int i = 0, major;
1514
1515 /* Check if the OCC data is valid */
1516 for_each_chip(chip) {
1517 pdata = get_occ_pstate_table(chip);
1518 if (!pdata->valid)
1519 return;
1520 }
1521
1522 chip = next_chip(NULL);
1523 pdata = get_occ_pstate_table(chip);
1524 major = pdata->version >> 4;
1525 if (major != 0x9 || major != 0xA)
1526 return;
1527
1528 for_each_chip(chip)
1529 nr_occs++;
1530
1531 chips = malloc(sizeof(*chips) * nr_occs);
1532 assert(chips);
1533
1534 for_each_chip(chip) {
1535 pdata = get_occ_pstate_table(chip);
1536 data = get_occ_dynamic_data(chip);
1537 chips[i].chip_id = chip->id;
1538 chips[i].occ_state = &data->occ_state;
1539 chips[i].valid = &pdata->valid;
1540 chips[i].cmd = &data->cmd;
1541 chips[i].rsp = &data->rsp;
1542 switch (major) {
1543 case 0x9:
1544 chips[i].occ_role = pdata->v9.occ_role;
1545 break;
1546 case 0xA:
1547 chips[i].occ_role = pdata->v10.occ_role;
1548 break;
1549 }
1550 init_lock(&chips[i].queue_lock);
1551 chips[i].cmd_in_progress = false;
1552 chips[i].request_id = 0;
1553 chips[i].enabled_sensor_mask = OCC_ENABLED_SENSOR_MASK;
1554 init_timer(&chips[i].timeout, occ_cmd_timeout_handler,
1555 &chips[i]);
1556 i++;
1557 }
1558
1559 power_mgt = dt_find_by_path(dt_root, "/ibm,opal/power-mgt");
1560 if (!power_mgt) {
1561 prerror("OCC: dt node /ibm,opal/power-mgt not found\n");
1562 return;
1563 }
1564
1565 /* Add powercap sensors to DT */
1566 occ_add_powercap_sensors(power_mgt);
1567
1568 /* Add power-shifting-ratio CPU-GPU sensors to DT */
1569 occ_add_psr_sensors(power_mgt);
1570 }
1571
1572 /* Powercap interface */
1573 enum sensor_powercap_occ_attr {
1574 POWERCAP_OCC_SOFT_MIN,
1575 POWERCAP_OCC_MAX,
1576 POWERCAP_OCC_CUR,
1577 POWERCAP_OCC_HARD_MIN,
1578 };
1579
occ_add_powercap_sensors(struct dt_node * power_mgt)1580 static void occ_add_powercap_sensors(struct dt_node *power_mgt)
1581 {
1582 struct dt_node *pcap, *node;
1583 u32 handle;
1584
1585 pcap = dt_new(power_mgt, "powercap");
1586 if (!pcap) {
1587 prerror("OCC: Failed to create powercap node\n");
1588 return;
1589 }
1590
1591 dt_add_property_string(pcap, "compatible", "ibm,opal-powercap");
1592 node = dt_new(pcap, "system-powercap");
1593 if (!node) {
1594 prerror("OCC: Failed to create system powercap node\n");
1595 return;
1596 }
1597
1598 handle = powercap_make_handle(POWERCAP_CLASS_OCC, POWERCAP_OCC_CUR);
1599 dt_add_property_cells(node, "powercap-current", handle);
1600
1601 handle = powercap_make_handle(POWERCAP_CLASS_OCC,
1602 POWERCAP_OCC_SOFT_MIN);
1603 dt_add_property_cells(node, "powercap-min", handle);
1604
1605 handle = powercap_make_handle(POWERCAP_CLASS_OCC, POWERCAP_OCC_MAX);
1606 dt_add_property_cells(node, "powercap-max", handle);
1607
1608 handle = powercap_make_handle(POWERCAP_CLASS_OCC,
1609 POWERCAP_OCC_HARD_MIN);
1610 dt_add_property_cells(node, "powercap-hard-min", handle);
1611
1612 }
1613
occ_get_powercap(u32 handle,u32 * pcap)1614 int occ_get_powercap(u32 handle, u32 *pcap)
1615 {
1616 struct occ_pstate_table *pdata;
1617 struct occ_dynamic_data *ddata;
1618 struct proc_chip *chip;
1619
1620 chip = next_chip(NULL);
1621 pdata = get_occ_pstate_table(chip);
1622 ddata = get_occ_dynamic_data(chip);
1623
1624 if (!pdata->valid)
1625 return OPAL_HARDWARE;
1626
1627 switch (powercap_get_attr(handle)) {
1628 case POWERCAP_OCC_SOFT_MIN:
1629 *pcap = ddata->soft_min_pwr_cap;
1630 break;
1631 case POWERCAP_OCC_MAX:
1632 *pcap = ddata->max_pwr_cap;
1633 break;
1634 case POWERCAP_OCC_CUR:
1635 *pcap = ddata->cur_pwr_cap;
1636 break;
1637 case POWERCAP_OCC_HARD_MIN:
1638 *pcap = ddata->hard_min_pwr_cap;
1639 break;
1640 default:
1641 *pcap = 0;
1642 return OPAL_UNSUPPORTED;
1643 }
1644
1645 return OPAL_SUCCESS;
1646 }
1647
1648 static u16 pcap_cdata;
1649 static struct opal_occ_cmd_data pcap_data = {
1650 .data = (u8 *)&pcap_cdata,
1651 .cmd = OCC_CMD_SET_POWER_CAP,
1652 };
1653
occ_set_powercap(u32 handle,int token,u32 pcap)1654 int __attribute__((__const__)) occ_set_powercap(u32 handle, int token, u32 pcap)
1655 {
1656 struct occ_dynamic_data *ddata;
1657 struct proc_chip *chip;
1658 int i;
1659
1660 if (powercap_get_attr(handle) != POWERCAP_OCC_CUR)
1661 return OPAL_PERMISSION;
1662
1663 if (!chips)
1664 return OPAL_HARDWARE;
1665
1666 for (i = 0; i < nr_occs; i++)
1667 if (chips[i].occ_role == OCC_ROLE_MASTER)
1668 break;
1669
1670 if (!(*chips[i].valid))
1671 return OPAL_HARDWARE;
1672
1673 chip = get_chip(chips[i].chip_id);
1674 ddata = get_occ_dynamic_data(chip);
1675
1676 if (pcap == ddata->cur_pwr_cap)
1677 return OPAL_SUCCESS;
1678
1679 if (pcap && (pcap > ddata->max_pwr_cap ||
1680 pcap < ddata->soft_min_pwr_cap))
1681 return OPAL_PARAMETER;
1682
1683 pcap_cdata = pcap;
1684 return opal_occ_command(&chips[i], token, &pcap_data);
1685 };
1686
1687 /* Power-Shifting Ratio */
1688 enum psr_type {
1689 PSR_TYPE_CPU_TO_GPU, /* 0% Cap GPU first, 100% Cap CPU first */
1690 };
1691
occ_get_psr(u32 handle,u32 * ratio)1692 int occ_get_psr(u32 handle, u32 *ratio)
1693 {
1694 struct occ_dynamic_data *ddata;
1695 struct proc_chip *chip;
1696 u8 i = psr_get_rid(handle);
1697
1698 if (psr_get_type(handle) != PSR_TYPE_CPU_TO_GPU)
1699 return OPAL_UNSUPPORTED;
1700
1701 if (i > nr_occs)
1702 return OPAL_UNSUPPORTED;
1703
1704 if (!(*chips[i].valid))
1705 return OPAL_HARDWARE;
1706
1707 chip = get_chip(chips[i].chip_id);
1708 ddata = get_occ_dynamic_data(chip);
1709 *ratio = ddata->pwr_shifting_ratio;
1710 return OPAL_SUCCESS;
1711 }
1712
1713 static u8 psr_cdata;
1714 static struct opal_occ_cmd_data psr_data = {
1715 .data = &psr_cdata,
1716 .cmd = OCC_CMD_SET_POWER_SHIFTING_RATIO,
1717 };
1718
occ_set_psr(u32 handle,int token,u32 ratio)1719 int occ_set_psr(u32 handle, int token, u32 ratio)
1720 {
1721 struct occ_dynamic_data *ddata;
1722 struct proc_chip *chip;
1723 u8 i = psr_get_rid(handle);
1724
1725 if (psr_get_type(handle) != PSR_TYPE_CPU_TO_GPU)
1726 return OPAL_UNSUPPORTED;
1727
1728 if (ratio > 100)
1729 return OPAL_PARAMETER;
1730
1731 if (i > nr_occs)
1732 return OPAL_UNSUPPORTED;
1733
1734 if (!(*chips[i].valid))
1735 return OPAL_HARDWARE;
1736
1737 chip = get_chip(chips[i].chip_id);
1738 ddata = get_occ_dynamic_data(chip);
1739 if (ratio == ddata->pwr_shifting_ratio)
1740 return OPAL_SUCCESS;
1741
1742 psr_cdata = ratio;
1743 return opal_occ_command(&chips[i], token, &psr_data);
1744 }
1745
occ_add_psr_sensors(struct dt_node * power_mgt)1746 static void occ_add_psr_sensors(struct dt_node *power_mgt)
1747 {
1748 struct dt_node *node;
1749 int i;
1750
1751 node = dt_new(power_mgt, "psr");
1752 if (!node) {
1753 prerror("OCC: Failed to create power-shifting-ratio node\n");
1754 return;
1755 }
1756
1757 dt_add_property_string(node, "compatible",
1758 "ibm,opal-power-shift-ratio");
1759 dt_add_property_cells(node, "#address-cells", 1);
1760 dt_add_property_cells(node, "#size-cells", 0);
1761 for (i = 0; i < nr_occs; i++) {
1762 struct dt_node *cnode;
1763 char name[20];
1764 u32 handle = psr_make_handle(PSR_CLASS_OCC, i,
1765 PSR_TYPE_CPU_TO_GPU);
1766
1767 cnode = dt_new_addr(node, "cpu-to-gpu", handle);
1768 if (!cnode) {
1769 prerror("OCC: Failed to create power-shifting-ratio node\n");
1770 return;
1771 }
1772
1773 snprintf(name, 20, "cpu_to_gpu_%d", chips[i].chip_id);
1774 dt_add_property_string(cnode, "label", name);
1775 dt_add_property_cells(cnode, "handle", handle);
1776 dt_add_property_cells(cnode, "reg", chips[i].chip_id);
1777 }
1778 }
1779
1780 /* OCC clear sensor limits CSM/Profiler/Job-scheduler */
1781
1782 enum occ_sensor_limit_group {
1783 OCC_SENSOR_LIMIT_GROUP_CSM = 0x10,
1784 OCC_SENSOR_LIMIT_GROUP_PROFILER = 0x20,
1785 OCC_SENSOR_LIMIT_GROUP_JOB_SCHED = 0x40,
1786 };
1787
1788 static u32 sensor_limit;
1789 static struct opal_occ_cmd_data slimit_data = {
1790 .data = (u8 *)&sensor_limit,
1791 .cmd = OCC_CMD_CLEAR_SENSOR_DATA,
1792 };
1793
occ_sensor_group_clear(u32 group_hndl,int token)1794 int occ_sensor_group_clear(u32 group_hndl, int token)
1795 {
1796 u32 limit = sensor_get_rid(group_hndl);
1797 u8 i = sensor_get_attr(group_hndl);
1798
1799 if (i > nr_occs)
1800 return OPAL_UNSUPPORTED;
1801
1802 switch (limit) {
1803 case OCC_SENSOR_LIMIT_GROUP_CSM:
1804 case OCC_SENSOR_LIMIT_GROUP_PROFILER:
1805 case OCC_SENSOR_LIMIT_GROUP_JOB_SCHED:
1806 break;
1807 default:
1808 return OPAL_UNSUPPORTED;
1809 }
1810
1811 if (!(*chips[i].valid))
1812 return OPAL_HARDWARE;
1813
1814 sensor_limit = limit << 24;
1815 return opal_occ_command(&chips[i], token, &slimit_data);
1816 }
1817
1818 static u16 sensor_enable;
1819 static struct opal_occ_cmd_data sensor_mask_data = {
1820 .data = (u8 *)&sensor_enable,
1821 .cmd = OCC_CMD_SELECT_SENSOR_GROUP,
1822 };
1823
occ_sensor_group_enable(u32 group_hndl,int token,bool enable)1824 int occ_sensor_group_enable(u32 group_hndl, int token, bool enable)
1825 {
1826 u16 type = sensor_get_rid(group_hndl);
1827 u8 i = sensor_get_attr(group_hndl);
1828
1829 if (i > nr_occs)
1830 return OPAL_UNSUPPORTED;
1831
1832 switch (type) {
1833 case OCC_SENSOR_TYPE_GENERIC:
1834 case OCC_SENSOR_TYPE_CURRENT:
1835 case OCC_SENSOR_TYPE_VOLTAGE:
1836 case OCC_SENSOR_TYPE_TEMPERATURE:
1837 case OCC_SENSOR_TYPE_UTILIZATION:
1838 case OCC_SENSOR_TYPE_TIME:
1839 case OCC_SENSOR_TYPE_FREQUENCY:
1840 case OCC_SENSOR_TYPE_POWER:
1841 case OCC_SENSOR_TYPE_PERFORMANCE:
1842 break;
1843 default:
1844 return OPAL_UNSUPPORTED;
1845 }
1846
1847 if (!(*chips[i].valid))
1848 return OPAL_HARDWARE;
1849
1850 if (enable && (type & chips[i].enabled_sensor_mask))
1851 return OPAL_SUCCESS;
1852 else if (!enable && !(type & chips[i].enabled_sensor_mask))
1853 return OPAL_SUCCESS;
1854
1855 sensor_enable = enable ? type | chips[i].enabled_sensor_mask :
1856 ~type & chips[i].enabled_sensor_mask;
1857
1858 return opal_occ_command(&chips[i], token, &sensor_mask_data);
1859 }
1860
occ_add_sensor_groups(struct dt_node * sg,__be32 * phandles,u32 * ptype,int nr_phandles,int chipid)1861 void occ_add_sensor_groups(struct dt_node *sg, __be32 *phandles, u32 *ptype,
1862 int nr_phandles, int chipid)
1863 {
1864 struct group_info {
1865 int type;
1866 const char *str;
1867 u32 ops;
1868 } groups[] = {
1869 { OCC_SENSOR_LIMIT_GROUP_CSM, "csm",
1870 OPAL_SENSOR_GROUP_CLEAR
1871 },
1872 { OCC_SENSOR_LIMIT_GROUP_PROFILER, "profiler",
1873 OPAL_SENSOR_GROUP_CLEAR
1874 },
1875 { OCC_SENSOR_LIMIT_GROUP_JOB_SCHED, "js",
1876 OPAL_SENSOR_GROUP_CLEAR
1877 },
1878 { OCC_SENSOR_TYPE_GENERIC, "generic",
1879 OPAL_SENSOR_GROUP_ENABLE
1880 },
1881 { OCC_SENSOR_TYPE_CURRENT, "curr",
1882 OPAL_SENSOR_GROUP_ENABLE
1883 },
1884 { OCC_SENSOR_TYPE_VOLTAGE, "in",
1885 OPAL_SENSOR_GROUP_ENABLE
1886 },
1887 { OCC_SENSOR_TYPE_TEMPERATURE, "temp",
1888 OPAL_SENSOR_GROUP_ENABLE
1889 },
1890 { OCC_SENSOR_TYPE_UTILIZATION, "utilization",
1891 OPAL_SENSOR_GROUP_ENABLE
1892 },
1893 { OCC_SENSOR_TYPE_TIME, "time",
1894 OPAL_SENSOR_GROUP_ENABLE
1895 },
1896 { OCC_SENSOR_TYPE_FREQUENCY, "frequency",
1897 OPAL_SENSOR_GROUP_ENABLE
1898 },
1899 { OCC_SENSOR_TYPE_POWER, "power",
1900 OPAL_SENSOR_GROUP_ENABLE
1901 },
1902 { OCC_SENSOR_TYPE_PERFORMANCE, "performance",
1903 OPAL_SENSOR_GROUP_ENABLE
1904 },
1905 };
1906 int i, j;
1907
1908 /*
1909 * Dont add sensor groups if cmd-interface is not intialized
1910 */
1911 if (!chips)
1912 return;
1913
1914 for (i = 0; i < nr_occs; i++)
1915 if (chips[i].chip_id == chipid)
1916 break;
1917
1918 for (j = 0; j < ARRAY_SIZE(groups); j++) {
1919 struct dt_node *node;
1920 char name[20];
1921 u32 handle;
1922
1923 snprintf(name, 20, "occ-%s", groups[j].str);
1924 handle = sensor_make_handler(SENSOR_OCC, 0,
1925 groups[j].type, i);
1926 node = dt_new_addr(sg, name, handle);
1927 if (!node) {
1928 prerror("Failed to create sensor group nodes\n");
1929 return;
1930 }
1931
1932 dt_add_property_cells(node, "sensor-group-id", handle);
1933 dt_add_property_string(node, "type", groups[j].str);
1934
1935 if (groups[j].type == OCC_SENSOR_TYPE_CURRENT ||
1936 groups[j].type == OCC_SENSOR_TYPE_VOLTAGE ||
1937 groups[j].type == OCC_SENSOR_TYPE_TEMPERATURE ||
1938 groups[j].type == OCC_SENSOR_TYPE_POWER) {
1939 dt_add_property_string(node, "sensor-type",
1940 groups[j].str);
1941 dt_add_property_string(node, "compatible",
1942 "ibm,opal-sensor");
1943 }
1944
1945 dt_add_property_cells(node, "ibm,chip-id", chipid);
1946 dt_add_property_cells(node, "reg", handle);
1947 if (groups[j].ops == OPAL_SENSOR_GROUP_ENABLE) {
1948 __be32 *_phandles;
1949 int k, pcount = 0;
1950
1951 _phandles = malloc(sizeof(u32) * nr_phandles);
1952 assert(_phandles);
1953 for (k = 0; k < nr_phandles; k++)
1954 if (ptype[k] == groups[j].type)
1955 _phandles[pcount++] = phandles[k];
1956 if (pcount)
1957 dt_add_property(node, "sensors", _phandles,
1958 pcount * sizeof(u32));
1959 free(_phandles);
1960 } else {
1961 dt_add_property(node, "sensors", phandles,
1962 nr_phandles * sizeof(u32));
1963 }
1964 dt_add_property_cells(node, "ops", groups[j].ops);
1965 }
1966 }
1967
1968 /* CPU-OCC PState init */
1969 /* Called after OCC init on P8 and P9 */
occ_pstates_init(void)1970 void occ_pstates_init(void)
1971 {
1972 struct proc_chip *chip;
1973 struct cpu_thread *c;
1974 struct dt_node *power_mgt;
1975 int pstate_nom;
1976 u32 freq_domain_mask;
1977 u8 domain_runs_at;
1978 static bool occ_pstates_initialized;
1979
1980 power_mgt = dt_find_by_path(dt_root, "/ibm,opal/power-mgt");
1981 if (!power_mgt) {
1982 /**
1983 * @fwts-label OCCDTNodeNotFound
1984 * @fwts-advice Device tree node /ibm,opal/power-mgt not
1985 * found. OPAL didn't add pstate information to device tree.
1986 * Probably a firmware bug.
1987 */
1988 prlog(PR_ERR, "OCC: dt node /ibm,opal/power-mgt not found\n");
1989 return;
1990 }
1991
1992 /* Handle fast reboots */
1993 if (occ_pstates_initialized) {
1994 struct dt_node *child;
1995 int i;
1996 const char *props[] = {
1997 "ibm,pstate-core-max",
1998 "ibm,pstate-frequencies-mhz",
1999 "ibm,pstate-ids",
2000 "ibm,pstate-max",
2001 "ibm,pstate-min",
2002 "ibm,pstate-nominal",
2003 "ibm,pstate-turbo",
2004 "ibm,pstate-ultra-turbo",
2005 "ibm,pstate-base",
2006 "#address-cells",
2007 "#size-cells",
2008 };
2009
2010 for (i = 0; i < ARRAY_SIZE(props); i++)
2011 dt_check_del_prop(power_mgt, props[i]);
2012
2013 dt_for_each_child(power_mgt, child)
2014 if (!strncmp(child->name, "occ", 3))
2015 dt_free(child);
2016 }
2017
2018 switch (proc_gen) {
2019 case proc_gen_p8:
2020 homer_opal_data_offset = P8_HOMER_OPAL_DATA_OFFSET;
2021 break;
2022 case proc_gen_p9:
2023 case proc_gen_p10:
2024 homer_opal_data_offset = P9_HOMER_OPAL_DATA_OFFSET;
2025 break;
2026 default:
2027 return;
2028 }
2029
2030 chip = next_chip(NULL);
2031 if (!chip->homer_base) {
2032 log_simple_error(&e_info(OPAL_RC_OCC_PSTATE_INIT),
2033 "OCC: No HOMER detected, assuming no pstates\n");
2034 return;
2035 }
2036
2037 /* Wait for all OCC to boot up */
2038 if(!wait_for_all_occ_init()) {
2039 log_simple_error(&e_info(OPAL_RC_OCC_TIMEOUT),
2040 "OCC: Initialization on all chips did not complete"
2041 "(timed out)\n");
2042 return;
2043 }
2044
2045 /*
2046 * Check boundary conditions and add device tree nodes
2047 * and return nominal pstate to set for the core
2048 */
2049 if (!add_cpu_pstate_properties(power_mgt, &pstate_nom)) {
2050 log_simple_error(&e_info(OPAL_RC_OCC_PSTATE_INIT),
2051 "Skiping core cpufreq init due to OCC error\n");
2052 } else if (proc_gen == proc_gen_p8) {
2053 /*
2054 * Setup host based pstates and set nominal frequency only in
2055 * P8.
2056 */
2057 for_each_chip(chip)
2058 for_each_available_core_in_chip(c, chip->id)
2059 cpu_pstates_prepare_core(chip, c, pstate_nom);
2060 }
2061
2062 if (occ_pstates_initialized)
2063 return;
2064
2065 /* Add opal_poller to poll OCC throttle status of each chip */
2066 for_each_chip(chip)
2067 chip->throttle = 0;
2068 opal_add_poller(occ_throttle_poll, NULL);
2069 occ_pstates_initialized = true;
2070
2071 /* Init OPAL-OCC command-response interface */
2072 occ_cmd_interface_init();
2073
2074 /* TODO Firmware plumbing required so as to have two modes to set
2075 * PMCR based on max in domain or most recently used. As of today,
2076 * it is always max in domain for P9.
2077 */
2078 domain_runs_at = 0;
2079 freq_domain_mask = 0;
2080 if (proc_gen == proc_gen_p8) {
2081 freq_domain_mask = P8_PIR_CORE_MASK;
2082 domain_runs_at = FREQ_MOST_RECENTLY_SET;
2083 } else if (proc_gen == proc_gen_p9) {
2084 freq_domain_mask = P9_PIR_QUAD_MASK;
2085 domain_runs_at = FREQ_MAX_IN_DOMAIN;
2086 } else if (proc_gen == proc_gen_p10) {
2087 freq_domain_mask = P10_PIR_CHIP_MASK;
2088 domain_runs_at = FREQ_MAX_IN_DOMAIN;
2089 } else {
2090 assert(0);
2091 }
2092
2093 dt_add_property_cells(power_mgt, "freq-domain-mask", freq_domain_mask);
2094 dt_add_property_cells(power_mgt, "domain-runs-at", domain_runs_at);
2095 }
2096
find_master_and_slave_occ(uint64_t ** master,uint64_t ** slave,int * nr_masters,int * nr_slaves)2097 int find_master_and_slave_occ(uint64_t **master, uint64_t **slave,
2098 int *nr_masters, int *nr_slaves)
2099 {
2100 struct proc_chip *chip;
2101 int nr_chips = 0, i;
2102 uint64_t chipids[MAX_CHIPS];
2103
2104 for_each_chip(chip) {
2105 chipids[nr_chips++] = chip->id;
2106 }
2107
2108 chip = next_chip(NULL);
2109 /*
2110 * Proc0 is the master OCC for Tuleta/Alpine boxes.
2111 * Hostboot expects the pair of chips for MURANO, so pass the sibling
2112 * chip id along with proc0 to hostboot.
2113 */
2114 *nr_masters = (chip->type == PROC_CHIP_P8_MURANO) ? 2 : 1;
2115 *master = (uint64_t *)malloc(*nr_masters * sizeof(uint64_t));
2116
2117 if (!*master) {
2118 printf("OCC: master array alloc failure\n");
2119 return -ENOMEM;
2120 }
2121
2122 if (nr_chips - *nr_masters > 0) {
2123 *nr_slaves = nr_chips - *nr_masters;
2124 *slave = (uint64_t *)malloc(*nr_slaves * sizeof(uint64_t));
2125 if (!*slave) {
2126 printf("OCC: slave array alloc failure\n");
2127 return -ENOMEM;
2128 }
2129 }
2130
2131 for (i = 0; i < nr_chips; i++) {
2132 if (i < *nr_masters) {
2133 *(*master + i) = chipids[i];
2134 continue;
2135 }
2136 *(*slave + i - *nr_masters) = chipids[i];
2137 }
2138 return 0;
2139 }
2140
2141
occ_msg_queue_occ_reset(void)2142 int occ_msg_queue_occ_reset(void)
2143 {
2144 struct opal_occ_msg occ_msg = { CPU_TO_BE64(OCC_RESET), 0, 0 };
2145 struct proc_chip *chip;
2146 int rc;
2147
2148 lock(&occ_lock);
2149 rc = _opal_queue_msg(OPAL_MSG_OCC, NULL, NULL,
2150 sizeof(struct opal_occ_msg), &occ_msg);
2151 if (rc) {
2152 prlog(PR_INFO, "OCC: Failed to queue OCC_RESET message\n");
2153 goto out;
2154 }
2155 /*
2156 * Set 'valid' byte of occ_pstate_table to 0 since OCC
2157 * may not clear this byte on a reset.
2158 * OCC will set the 'valid' byte to 1 when it becomes
2159 * active again.
2160 */
2161 for_each_chip(chip) {
2162 struct occ_pstate_table *occ_data;
2163
2164 occ_data = get_occ_pstate_table(chip);
2165 occ_data->valid = 0;
2166 chip->throttle = 0;
2167 }
2168 occ_reset = true;
2169 out:
2170 unlock(&occ_lock);
2171 return rc;
2172 }
2173
2174 #define PV_OCC_GP0 0x01000000
2175 #define PV_OCC_GP0_AND 0x01000004
2176 #define PV_OCC_GP0_OR 0x01000005
2177 #define PV_OCC_GP0_PNOR_OWNER PPC_BIT(18) /* 1 = OCC / Host, 0 = BMC */
2178
occ_pnor_set_one_owner(uint32_t chip_id,enum pnor_owner owner)2179 static void occ_pnor_set_one_owner(uint32_t chip_id, enum pnor_owner owner)
2180 {
2181 uint64_t reg, mask;
2182
2183 if (owner == PNOR_OWNER_HOST) {
2184 reg = PV_OCC_GP0_OR;
2185 mask = PV_OCC_GP0_PNOR_OWNER;
2186 } else {
2187 reg = PV_OCC_GP0_AND;
2188 mask = ~PV_OCC_GP0_PNOR_OWNER;
2189 }
2190
2191 xscom_write(chip_id, reg, mask);
2192 }
2193
occ_pnor_set_owner(enum pnor_owner owner)2194 void occ_pnor_set_owner(enum pnor_owner owner)
2195 {
2196 struct proc_chip *chip;
2197
2198 for_each_chip(chip)
2199 occ_pnor_set_one_owner(chip->id, owner);
2200 }
2201
2202
2203 #define P8_OCB_OCI_OCCMISC 0x6a020
2204 #define P8_OCB_OCI_OCCMISC_AND 0x6a021
2205 #define P8_OCB_OCI_OCCMISC_OR 0x6a022
2206
2207 #define P9_OCB_OCI_OCCMISC 0x6c080
2208 #define P9_OCB_OCI_OCCMISC_CLEAR 0x6c081
2209 #define P9_OCB_OCI_OCCMISC_OR 0x6c082
2210
2211 #define OCB_OCI_OCIMISC_IRQ PPC_BIT(0)
2212 #define OCB_OCI_OCIMISC_IRQ_TMGT PPC_BIT(1)
2213 #define OCB_OCI_OCIMISC_IRQ_SLW_TMR PPC_BIT(14)
2214 #define OCB_OCI_OCIMISC_IRQ_OPAL_DUMMY PPC_BIT(15)
2215
2216 #define P8_OCB_OCI_OCIMISC_MASK (OCB_OCI_OCIMISC_IRQ_TMGT | \
2217 OCB_OCI_OCIMISC_IRQ_OPAL_DUMMY | \
2218 OCB_OCI_OCIMISC_IRQ_SLW_TMR)
2219
2220 #define OCB_OCI_OCIMISC_IRQ_I2C PPC_BIT(2)
2221 #define OCB_OCI_OCIMISC_IRQ_SHMEM PPC_BIT(3)
2222 #define P9_OCB_OCI_OCIMISC_MASK (OCB_OCI_OCIMISC_IRQ_TMGT | \
2223 OCB_OCI_OCIMISC_IRQ_I2C | \
2224 OCB_OCI_OCIMISC_IRQ_SHMEM | \
2225 OCB_OCI_OCIMISC_IRQ_OPAL_DUMMY)
2226
occ_send_dummy_interrupt(void)2227 void occ_send_dummy_interrupt(void)
2228 {
2229 struct psi *psi;
2230 struct proc_chip *chip = get_chip(this_cpu()->chip_id);
2231
2232 /* Emulators don't do this */
2233 if (chip_quirk(QUIRK_NO_OCC_IRQ))
2234 return;
2235
2236 /* Find a functional PSI. This ensures an interrupt even if
2237 * the psihb on the current chip is not configured */
2238 if (chip->psi)
2239 psi = chip->psi;
2240 else
2241 psi = psi_find_functional_chip();
2242
2243 if (!psi) {
2244 prlog_once(PR_WARNING, "PSI: no functional PSI HB found, "
2245 "no self interrupts delivered\n");
2246 return;
2247 }
2248
2249 switch (proc_gen) {
2250 case proc_gen_p8:
2251 xscom_write(psi->chip_id, P8_OCB_OCI_OCCMISC_OR,
2252 OCB_OCI_OCIMISC_IRQ |
2253 OCB_OCI_OCIMISC_IRQ_OPAL_DUMMY);
2254 break;
2255 case proc_gen_p9:
2256 xscom_write(psi->chip_id, P9_OCB_OCI_OCCMISC_OR,
2257 OCB_OCI_OCIMISC_IRQ |
2258 OCB_OCI_OCIMISC_IRQ_OPAL_DUMMY);
2259 break;
2260 case proc_gen_p10:
2261 xscom_write(psi->chip_id, P9_OCB_OCI_OCCMISC_OR,
2262 OCB_OCI_OCIMISC_IRQ |
2263 OCB_OCI_OCIMISC_IRQ_OPAL_DUMMY);
2264 break;
2265 default:
2266 break;
2267 }
2268 }
2269
occ_p8_interrupt(uint32_t chip_id)2270 void occ_p8_interrupt(uint32_t chip_id)
2271 {
2272 uint64_t ireg;
2273 int64_t rc;
2274
2275 /* The OCC interrupt is used to mux up to 15 different sources */
2276 rc = xscom_read(chip_id, P8_OCB_OCI_OCCMISC, &ireg);
2277 if (rc) {
2278 prerror("OCC: Failed to read interrupt status !\n");
2279 /* Should we mask it in the XIVR ? */
2280 return;
2281 }
2282 prlog(PR_TRACE, "OCC: IRQ received: %04llx\n", ireg >> 48);
2283
2284 /* Clear the bits */
2285 xscom_write(chip_id, P8_OCB_OCI_OCCMISC_AND, ~ireg);
2286
2287 /* Dispatch */
2288 if (ireg & OCB_OCI_OCIMISC_IRQ_TMGT)
2289 prd_tmgt_interrupt(chip_id);
2290 if (ireg & OCB_OCI_OCIMISC_IRQ_SLW_TMR)
2291 check_timers(true);
2292
2293 /* We may have masked-out OCB_OCI_OCIMISC_IRQ in the previous
2294 * OCCMISC_AND write. Check if there are any new source bits set,
2295 * and trigger another interrupt if so.
2296 */
2297 rc = xscom_read(chip_id, P8_OCB_OCI_OCCMISC, &ireg);
2298 if (!rc && (ireg & P8_OCB_OCI_OCIMISC_MASK))
2299 xscom_write(chip_id, P8_OCB_OCI_OCCMISC_OR,
2300 OCB_OCI_OCIMISC_IRQ);
2301 }
2302
occ_p9_interrupt(uint32_t chip_id)2303 void occ_p9_interrupt(uint32_t chip_id)
2304 {
2305 u64 ireg;
2306 s64 rc;
2307
2308 /* The OCC interrupt is used to mux up to 15 different sources */
2309 rc = xscom_read(chip_id, P9_OCB_OCI_OCCMISC, &ireg);
2310 if (rc) {
2311 prerror("OCC: Failed to read interrupt status !\n");
2312 return;
2313 }
2314 prlog(PR_TRACE, "OCC: IRQ received: %04llx\n", ireg >> 48);
2315
2316 /* Clear the bits */
2317 xscom_write(chip_id, P9_OCB_OCI_OCCMISC_CLEAR, ireg);
2318
2319 /* Dispatch */
2320 if (ireg & OCB_OCI_OCIMISC_IRQ_TMGT)
2321 prd_tmgt_interrupt(chip_id);
2322
2323 if (ireg & OCB_OCI_OCIMISC_IRQ_SHMEM) {
2324 occ_throttle_poll(NULL);
2325 handle_occ_rsp(chip_id);
2326 }
2327
2328 if (ireg & OCB_OCI_OCIMISC_IRQ_I2C)
2329 p9_i2c_bus_owner_change(chip_id);
2330
2331 /* We may have masked-out OCB_OCI_OCIMISC_IRQ in the previous
2332 * OCCMISC_AND write. Check if there are any new source bits set,
2333 * and trigger another interrupt if so.
2334 */
2335 rc = xscom_read(chip_id, P9_OCB_OCI_OCCMISC, &ireg);
2336 if (!rc && (ireg & P9_OCB_OCI_OCIMISC_MASK))
2337 xscom_write(chip_id, P9_OCB_OCI_OCCMISC_OR,
2338 OCB_OCI_OCIMISC_IRQ);
2339 }
2340