1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/x86_archext.h>
27 #include <sys/machsystm.h>
28 #include <sys/x_call.h>
29 #include <sys/cpu_acpi.h>
30 #include <sys/cpupm_throttle.h>
31 #include <sys/dtrace.h>
32 #include <sys/sdt.h>
33 
34 static int cpupm_throttle_init(cpu_t *);
35 static void cpupm_throttle_fini(cpu_t *);
36 static void cpupm_throttle(cpuset_t,  uint32_t);
37 
38 cpupm_state_ops_t cpupm_throttle_ops = {
39 	"Generic ACPI T-state Support",
40 	cpupm_throttle_init,
41 	cpupm_throttle_fini,
42 	cpupm_throttle
43 };
44 
45 /*
46  * Error returns
47  */
48 #define	THROTTLE_RET_SUCCESS		0x00
49 #define	THROTTLE_RET_INCOMPLETE_DATA	0x01
50 #define	THROTTLE_RET_UNSUP_STATE	0x02
51 #define	THROTTLE_RET_TRANS_INCOMPLETE	0x03
52 
53 #define	THROTTLE_LATENCY_WAIT		1
54 
55 /*
56  * MSR register for clock modulation
57  */
58 #define	IA32_CLOCK_MODULATION_MSR	0x19A
59 
60 /*
61  * Debugging support
62  */
63 #ifdef  DEBUG
64 volatile int cpupm_throttle_debug = 0;
65 #define	CTDEBUG(arglist) if (cpupm_throttle_debug) printf arglist;
66 #else
67 #define	CTDEBUG(arglist)
68 #endif
69 
70 /*
71  * Write the _PTC ctrl register. How it is written, depends upon the _PTC
72  * APCI object value.
73  */
74 static int
75 write_ctrl(cpu_acpi_handle_t handle, uint32_t ctrl)
76 {
77 	cpu_acpi_ptc_t *ptc_ctrl;
78 	uint64_t reg;
79 	int ret = 0;
80 
81 	ptc_ctrl = CPU_ACPI_PTC_CTRL(handle);
82 
83 	switch (ptc_ctrl->cr_addrspace_id) {
84 	case ACPI_ADR_SPACE_FIXED_HARDWARE:
85 		/*
86 		 * Read current thermal state because reserved bits must be
87 		 * preserved, compose new value, and write it.The writable
88 		 * bits are 4:1 (1 to 4).
89 		 * Bits 3:1 => On-Demand Clock Modulation Duty Cycle
90 		 * Bit  4   => On-Demand Clock Modulation Enable
91 		 * Left shift ctrl by 1 to allign with bits 1-4 of MSR
92 		 */
93 		reg = rdmsr(IA32_CLOCK_MODULATION_MSR);
94 		reg &= ~((uint64_t)0x1E);
95 		reg |= ctrl;
96 		wrmsr(IA32_CLOCK_MODULATION_MSR, reg);
97 		break;
98 
99 	case ACPI_ADR_SPACE_SYSTEM_IO:
100 		ret = cpu_acpi_write_port(ptc_ctrl->cr_address, ctrl,
101 		    ptc_ctrl->cr_width);
102 		break;
103 
104 	default:
105 		DTRACE_PROBE1(throttle_ctrl_unsupported_type, uint8_t,
106 		    ptc_ctrl->cr_addrspace_id);
107 
108 		ret = -1;
109 	}
110 
111 	DTRACE_PROBE1(throttle_ctrl_write, uint32_t, ctrl);
112 	DTRACE_PROBE1(throttle_ctrl_write_err, int, ret);
113 
114 	return (ret);
115 }
116 
117 static int
118 read_status(cpu_acpi_handle_t handle, uint32_t *stat)
119 {
120 	cpu_acpi_ptc_t *ptc_stat;
121 	uint64_t reg;
122 	int ret = 0;
123 
124 	ptc_stat = CPU_ACPI_PTC_STATUS(handle);
125 
126 	switch (ptc_stat->cr_addrspace_id) {
127 	case ACPI_ADR_SPACE_FIXED_HARDWARE:
128 		reg = rdmsr(IA32_CLOCK_MODULATION_MSR);
129 		*stat = reg & 0x1E;
130 		ret = 0;
131 		break;
132 
133 	case ACPI_ADR_SPACE_SYSTEM_IO:
134 		ret = cpu_acpi_read_port(ptc_stat->cr_address, stat,
135 		    ptc_stat->cr_width);
136 		break;
137 
138 	default:
139 		DTRACE_PROBE1(throttle_status_unsupported_type, uint8_t,
140 		    ptc_stat->cr_addrspace_id);
141 
142 		return (-1);
143 	}
144 
145 	DTRACE_PROBE1(throttle_status_read, uint32_t, *stat);
146 	DTRACE_PROBE1(throttle_status_read_err, int, ret);
147 
148 	return (ret);
149 }
150 
151 /*
152  * Transition the current processor to the requested throttling state.
153  */
154 static void
155 cpupm_tstate_transition(uint32_t req_state)
156 {
157 	cpupm_mach_state_t *mach_state =
158 	    (cpupm_mach_state_t *)CPU->cpu_m.mcpu_pm_mach_state;
159 	cpu_acpi_handle_t handle = mach_state->ms_acpi_handle;
160 	cpu_acpi_tstate_t *req_tstate;
161 	uint32_t ctrl;
162 	uint32_t stat;
163 	int i;
164 
165 	req_tstate = (cpu_acpi_tstate_t *)CPU_ACPI_TSTATES(handle);
166 	req_tstate += req_state;
167 	DTRACE_PROBE1(throttle_transition, uint32_t,
168 	    CPU_ACPI_FREQPER(req_tstate));
169 
170 	/*
171 	 * Initiate the processor t-state change.
172 	 */
173 	ctrl = CPU_ACPI_TSTATE_CTRL(req_tstate);
174 	if (write_ctrl(handle, ctrl) != 0) {
175 		return;
176 	}
177 
178 	/*
179 	 * If status is zero, then transition is synchronous and
180 	 * no status value comparison is required.
181 	 */
182 	if (CPU_ACPI_TSTATE_STAT(req_tstate) == 0) {
183 		return;
184 	}
185 
186 	/* Wait until switch is complete, but bound the loop just in case. */
187 	for (i = CPU_ACPI_TSTATE_TRANSLAT(req_tstate) * 2; i >= 0;
188 	    i -= THROTTLE_LATENCY_WAIT) {
189 		if (read_status(handle, &stat) == 0 &&
190 		    CPU_ACPI_TSTATE_STAT(req_tstate) == stat)
191 			break;
192 		drv_usecwait(THROTTLE_LATENCY_WAIT);
193 	}
194 
195 	if (CPU_ACPI_TSTATE_STAT(req_tstate) != stat) {
196 		DTRACE_PROBE(throttle_transition_incomplete);
197 	}
198 }
199 
200 static void
201 cpupm_throttle(cpuset_t set,  uint32_t throtl_lvl)
202 {
203 	/*
204 	 * If thread is already running on target CPU then just
205 	 * make the transition request. Otherwise, we'll need to
206 	 * make a cross-call.
207 	 */
208 	kpreempt_disable();
209 	if (CPU_IN_SET(set, CPU->cpu_id)) {
210 		cpupm_tstate_transition(throtl_lvl);
211 		CPUSET_DEL(set, CPU->cpu_id);
212 	}
213 	if (!CPUSET_ISNULL(set)) {
214 		xc_call((xc_arg_t)throtl_lvl, NULL, NULL,
215 		    CPUSET2BV(set), (xc_func_t)cpupm_tstate_transition);
216 	}
217 	kpreempt_enable();
218 }
219 
220 static int
221 cpupm_throttle_init(cpu_t *cp)
222 {
223 	cpupm_mach_state_t *mach_state =
224 	    (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
225 	cpu_acpi_handle_t handle = mach_state->ms_acpi_handle;
226 	cpu_acpi_ptc_t *ptc_stat;
227 
228 	if (cpu_acpi_cache_tstate_data(handle) != 0) {
229 		CTDEBUG(("Failed to cache T-state ACPI data\n"));
230 		cpupm_throttle_fini(cp);
231 		return (THROTTLE_RET_INCOMPLETE_DATA);
232 	}
233 
234 	/*
235 	 * Check the address space used for transitions
236 	 */
237 	ptc_stat = CPU_ACPI_PTC_STATUS(handle);
238 	switch (ptc_stat->cr_addrspace_id) {
239 	case ACPI_ADR_SPACE_FIXED_HARDWARE:
240 		CTDEBUG(("T-State transitions will use fixed hardware\n"));
241 		break;
242 	case ACPI_ADR_SPACE_SYSTEM_IO:
243 		CTDEBUG(("T-State transitions will use System IO\n"));
244 		break;
245 	default:
246 		cmn_err(CE_WARN, "!_PTC conifgured for unsupported "
247 		    "address space type = %d.", ptc_stat->cr_addrspace_id);
248 		return (THROTTLE_RET_INCOMPLETE_DATA);
249 	}
250 
251 	cpupm_alloc_domains(cp, CPUPM_T_STATES);
252 
253 	return (THROTTLE_RET_SUCCESS);
254 }
255 
256 static void
257 cpupm_throttle_fini(cpu_t *cp)
258 {
259 	cpupm_mach_state_t *mach_state =
260 	    (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
261 	cpu_acpi_handle_t handle = mach_state->ms_acpi_handle;
262 
263 	cpupm_free_domains(&cpupm_tstate_domains);
264 	cpu_acpi_free_tstate_data(handle);
265 }
266 
267 /*
268  * This routine reads the ACPI _TPC object. It's accessed as a callback
269  * by the cpu driver whenever a _TPC change notification is received.
270  */
271 static int
272 cpupm_throttle_get_max(processorid_t cpu_id)
273 {
274 	cpu_t			*cp = cpu[cpu_id];
275 	cpupm_mach_state_t 	*mach_state =
276 	    (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state);
277 	cpu_acpi_handle_t	handle;
278 	int			throtl_level;
279 	int			max_throttle_lvl;
280 	uint_t			num_throtl;
281 
282 	if (mach_state == NULL) {
283 		return (-1);
284 	}
285 
286 	handle = mach_state->ms_acpi_handle;
287 	ASSERT(handle != NULL);
288 
289 	cpu_acpi_cache_tpc(handle);
290 	throtl_level = CPU_ACPI_TPC(handle);
291 
292 	num_throtl = CPU_ACPI_TSTATES_COUNT(handle);
293 
294 	max_throttle_lvl = num_throtl - 1;
295 	if ((throtl_level < 0) || (throtl_level > max_throttle_lvl)) {
296 		cmn_err(CE_NOTE, "!cpupm_throttle_get_max: CPU %d: "
297 		    "_TPC out of range %d", cp->cpu_id, throtl_level);
298 		throtl_level = 0;
299 	}
300 
301 	return (throtl_level);
302 }
303 
304 /*
305  * Take care of CPU throttling when _TPC notification arrives
306  */
307 void
308 cpupm_throttle_manage_notification(void *ctx)
309 {
310 	cpu_t			*cp = ctx;
311 	processorid_t		cpu_id = cp->cpu_id;
312 	cpupm_mach_state_t	*mach_state =
313 	    (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
314 	boolean_t		is_ready;
315 	int			new_level;
316 
317 	if (mach_state == NULL) {
318 		return;
319 	}
320 
321 	/*
322 	 * We currently refuse to power-manage if the CPU is not ready to
323 	 * take cross calls (cross calls fail silently if CPU is not ready
324 	 * for it).
325 	 *
326 	 * Additionally, for x86 platforms we cannot power-manage
327 	 * any one instance, until all instances have been initialized.
328 	 * That's because we don't know what the CPU domains look like
329 	 * until all instances have been initialized.
330 	 */
331 	is_ready = (cp->cpu_flags & CPU_READY) && cpupm_throttle_ready();
332 	if (!is_ready)
333 		return;
334 
335 	if (!(mach_state->ms_caps & CPUPM_T_STATES))
336 		return;
337 	ASSERT(mach_state->ms_tstate.cma_ops != NULL);
338 
339 	/*
340 	 * Get the new T-State support level
341 	 */
342 	new_level = cpupm_throttle_get_max(cpu_id);
343 
344 	cpupm_state_change(cp, new_level, CPUPM_T_STATES);
345 }
346