1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Xtensa Performance Monitor Module driver
4  * See Tensilica Debug User's Guide for PMU registers documentation.
5  *
6  * Copyright (C) 2015 Cadence Design Systems Inc.
7  */
8 
9 #include <linux/interrupt.h>
10 #include <linux/irqdomain.h>
11 #include <linux/module.h>
12 #include <linux/of.h>
13 #include <linux/perf_event.h>
14 #include <linux/platform_device.h>
15 
16 #include <asm/processor.h>
17 #include <asm/stacktrace.h>
18 
19 /* Global control/status for all perf counters */
20 #define XTENSA_PMU_PMG			0x1000
21 /* Perf counter values */
22 #define XTENSA_PMU_PM(i)		(0x1080 + (i) * 4)
23 /* Perf counter control registers */
24 #define XTENSA_PMU_PMCTRL(i)		(0x1100 + (i) * 4)
25 /* Perf counter status registers */
26 #define XTENSA_PMU_PMSTAT(i)		(0x1180 + (i) * 4)
27 
28 #define XTENSA_PMU_PMG_PMEN		0x1
29 
30 #define XTENSA_PMU_COUNTER_MASK		0xffffffffULL
31 #define XTENSA_PMU_COUNTER_MAX		0x7fffffff
32 
33 #define XTENSA_PMU_PMCTRL_INTEN		0x00000001
34 #define XTENSA_PMU_PMCTRL_KRNLCNT	0x00000008
35 #define XTENSA_PMU_PMCTRL_TRACELEVEL	0x000000f0
36 #define XTENSA_PMU_PMCTRL_SELECT_SHIFT	8
37 #define XTENSA_PMU_PMCTRL_SELECT	0x00001f00
38 #define XTENSA_PMU_PMCTRL_MASK_SHIFT	16
39 #define XTENSA_PMU_PMCTRL_MASK		0xffff0000
40 
41 #define XTENSA_PMU_MASK(select, mask) \
42 	(((select) << XTENSA_PMU_PMCTRL_SELECT_SHIFT) | \
43 	 ((mask) << XTENSA_PMU_PMCTRL_MASK_SHIFT) | \
44 	 XTENSA_PMU_PMCTRL_TRACELEVEL | \
45 	 XTENSA_PMU_PMCTRL_INTEN)
46 
47 #define XTENSA_PMU_PMSTAT_OVFL		0x00000001
48 #define XTENSA_PMU_PMSTAT_INTASRT	0x00000010
49 
50 struct xtensa_pmu_events {
51 	/* Array of events currently on this core */
52 	struct perf_event *event[XCHAL_NUM_PERF_COUNTERS];
53 	/* Bitmap of used hardware counters */
54 	unsigned long used_mask[BITS_TO_LONGS(XCHAL_NUM_PERF_COUNTERS)];
55 };
56 static DEFINE_PER_CPU(struct xtensa_pmu_events, xtensa_pmu_events);
57 
58 static const u32 xtensa_hw_ctl[] = {
59 	[PERF_COUNT_HW_CPU_CYCLES]		= XTENSA_PMU_MASK(0, 0x1),
60 	[PERF_COUNT_HW_INSTRUCTIONS]		= XTENSA_PMU_MASK(2, 0xffff),
61 	[PERF_COUNT_HW_CACHE_REFERENCES]	= XTENSA_PMU_MASK(10, 0x1),
62 	[PERF_COUNT_HW_CACHE_MISSES]		= XTENSA_PMU_MASK(12, 0x1),
63 	/* Taken and non-taken branches + taken loop ends */
64 	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= XTENSA_PMU_MASK(2, 0x490),
65 	/* Instruction-related + other global stall cycles */
66 	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= XTENSA_PMU_MASK(4, 0x1ff),
67 	/* Data-related global stall cycles */
68 	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= XTENSA_PMU_MASK(3, 0x1ff),
69 };
70 
71 #define C(_x) PERF_COUNT_HW_CACHE_##_x
72 
73 static const u32 xtensa_cache_ctl[][C(OP_MAX)][C(RESULT_MAX)] = {
74 	[C(L1D)] = {
75 		[C(OP_READ)] = {
76 			[C(RESULT_ACCESS)]	= XTENSA_PMU_MASK(10, 0x1),
77 			[C(RESULT_MISS)]	= XTENSA_PMU_MASK(10, 0x2),
78 		},
79 		[C(OP_WRITE)] = {
80 			[C(RESULT_ACCESS)]	= XTENSA_PMU_MASK(11, 0x1),
81 			[C(RESULT_MISS)]	= XTENSA_PMU_MASK(11, 0x2),
82 		},
83 	},
84 	[C(L1I)] = {
85 		[C(OP_READ)] = {
86 			[C(RESULT_ACCESS)]	= XTENSA_PMU_MASK(8, 0x1),
87 			[C(RESULT_MISS)]	= XTENSA_PMU_MASK(8, 0x2),
88 		},
89 	},
90 	[C(DTLB)] = {
91 		[C(OP_READ)] = {
92 			[C(RESULT_ACCESS)]	= XTENSA_PMU_MASK(9, 0x1),
93 			[C(RESULT_MISS)]	= XTENSA_PMU_MASK(9, 0x8),
94 		},
95 	},
96 	[C(ITLB)] = {
97 		[C(OP_READ)] = {
98 			[C(RESULT_ACCESS)]	= XTENSA_PMU_MASK(7, 0x1),
99 			[C(RESULT_MISS)]	= XTENSA_PMU_MASK(7, 0x8),
100 		},
101 	},
102 };
103 
xtensa_pmu_cache_event(u64 config)104 static int xtensa_pmu_cache_event(u64 config)
105 {
106 	unsigned int cache_type, cache_op, cache_result;
107 	int ret;
108 
109 	cache_type = (config >>  0) & 0xff;
110 	cache_op = (config >>  8) & 0xff;
111 	cache_result = (config >> 16) & 0xff;
112 
113 	if (cache_type >= ARRAY_SIZE(xtensa_cache_ctl) ||
114 	    cache_op >= C(OP_MAX) ||
115 	    cache_result >= C(RESULT_MAX))
116 		return -EINVAL;
117 
118 	ret = xtensa_cache_ctl[cache_type][cache_op][cache_result];
119 
120 	if (ret == 0)
121 		return -EINVAL;
122 
123 	return ret;
124 }
125 
xtensa_pmu_read_counter(int idx)126 static inline uint32_t xtensa_pmu_read_counter(int idx)
127 {
128 	return get_er(XTENSA_PMU_PM(idx));
129 }
130 
xtensa_pmu_write_counter(int idx,uint32_t v)131 static inline void xtensa_pmu_write_counter(int idx, uint32_t v)
132 {
133 	set_er(v, XTENSA_PMU_PM(idx));
134 }
135 
xtensa_perf_event_update(struct perf_event * event,struct hw_perf_event * hwc,int idx)136 static void xtensa_perf_event_update(struct perf_event *event,
137 				     struct hw_perf_event *hwc, int idx)
138 {
139 	uint64_t prev_raw_count, new_raw_count;
140 	int64_t delta;
141 
142 	do {
143 		prev_raw_count = local64_read(&hwc->prev_count);
144 		new_raw_count = xtensa_pmu_read_counter(event->hw.idx);
145 	} while (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
146 				 new_raw_count) != prev_raw_count);
147 
148 	delta = (new_raw_count - prev_raw_count) & XTENSA_PMU_COUNTER_MASK;
149 
150 	local64_add(delta, &event->count);
151 	local64_sub(delta, &hwc->period_left);
152 }
153 
xtensa_perf_event_set_period(struct perf_event * event,struct hw_perf_event * hwc,int idx)154 static bool xtensa_perf_event_set_period(struct perf_event *event,
155 					 struct hw_perf_event *hwc, int idx)
156 {
157 	bool rc = false;
158 	s64 left;
159 
160 	if (!is_sampling_event(event)) {
161 		left = XTENSA_PMU_COUNTER_MAX;
162 	} else {
163 		s64 period = hwc->sample_period;
164 
165 		left = local64_read(&hwc->period_left);
166 		if (left <= -period) {
167 			left = period;
168 			local64_set(&hwc->period_left, left);
169 			hwc->last_period = period;
170 			rc = true;
171 		} else if (left <= 0) {
172 			left += period;
173 			local64_set(&hwc->period_left, left);
174 			hwc->last_period = period;
175 			rc = true;
176 		}
177 		if (left > XTENSA_PMU_COUNTER_MAX)
178 			left = XTENSA_PMU_COUNTER_MAX;
179 	}
180 
181 	local64_set(&hwc->prev_count, -left);
182 	xtensa_pmu_write_counter(idx, -left);
183 	perf_event_update_userpage(event);
184 
185 	return rc;
186 }
187 
xtensa_pmu_enable(struct pmu * pmu)188 static void xtensa_pmu_enable(struct pmu *pmu)
189 {
190 	set_er(get_er(XTENSA_PMU_PMG) | XTENSA_PMU_PMG_PMEN, XTENSA_PMU_PMG);
191 }
192 
xtensa_pmu_disable(struct pmu * pmu)193 static void xtensa_pmu_disable(struct pmu *pmu)
194 {
195 	set_er(get_er(XTENSA_PMU_PMG) & ~XTENSA_PMU_PMG_PMEN, XTENSA_PMU_PMG);
196 }
197 
xtensa_pmu_event_init(struct perf_event * event)198 static int xtensa_pmu_event_init(struct perf_event *event)
199 {
200 	int ret;
201 
202 	switch (event->attr.type) {
203 	case PERF_TYPE_HARDWARE:
204 		if (event->attr.config >= ARRAY_SIZE(xtensa_hw_ctl) ||
205 		    xtensa_hw_ctl[event->attr.config] == 0)
206 			return -EINVAL;
207 		event->hw.config = xtensa_hw_ctl[event->attr.config];
208 		return 0;
209 
210 	case PERF_TYPE_HW_CACHE:
211 		ret = xtensa_pmu_cache_event(event->attr.config);
212 		if (ret < 0)
213 			return ret;
214 		event->hw.config = ret;
215 		return 0;
216 
217 	case PERF_TYPE_RAW:
218 		/* Not 'previous counter' select */
219 		if ((event->attr.config & XTENSA_PMU_PMCTRL_SELECT) ==
220 		    (1 << XTENSA_PMU_PMCTRL_SELECT_SHIFT))
221 			return -EINVAL;
222 		event->hw.config = (event->attr.config &
223 				    (XTENSA_PMU_PMCTRL_KRNLCNT |
224 				     XTENSA_PMU_PMCTRL_TRACELEVEL |
225 				     XTENSA_PMU_PMCTRL_SELECT |
226 				     XTENSA_PMU_PMCTRL_MASK)) |
227 			XTENSA_PMU_PMCTRL_INTEN;
228 		return 0;
229 
230 	default:
231 		return -ENOENT;
232 	}
233 }
234 
235 /*
236  * Starts/Stops a counter present on the PMU. The PMI handler
237  * should stop the counter when perf_event_overflow() returns
238  * !0. ->start() will be used to continue.
239  */
xtensa_pmu_start(struct perf_event * event,int flags)240 static void xtensa_pmu_start(struct perf_event *event, int flags)
241 {
242 	struct hw_perf_event *hwc = &event->hw;
243 	int idx = hwc->idx;
244 
245 	if (WARN_ON_ONCE(idx == -1))
246 		return;
247 
248 	if (flags & PERF_EF_RELOAD) {
249 		WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
250 		xtensa_perf_event_set_period(event, hwc, idx);
251 	}
252 
253 	hwc->state = 0;
254 
255 	set_er(hwc->config, XTENSA_PMU_PMCTRL(idx));
256 }
257 
xtensa_pmu_stop(struct perf_event * event,int flags)258 static void xtensa_pmu_stop(struct perf_event *event, int flags)
259 {
260 	struct hw_perf_event *hwc = &event->hw;
261 	int idx = hwc->idx;
262 
263 	if (!(hwc->state & PERF_HES_STOPPED)) {
264 		set_er(0, XTENSA_PMU_PMCTRL(idx));
265 		set_er(get_er(XTENSA_PMU_PMSTAT(idx)),
266 		       XTENSA_PMU_PMSTAT(idx));
267 		hwc->state |= PERF_HES_STOPPED;
268 	}
269 
270 	if ((flags & PERF_EF_UPDATE) &&
271 	    !(event->hw.state & PERF_HES_UPTODATE)) {
272 		xtensa_perf_event_update(event, &event->hw, idx);
273 		event->hw.state |= PERF_HES_UPTODATE;
274 	}
275 }
276 
277 /*
278  * Adds/Removes a counter to/from the PMU, can be done inside
279  * a transaction, see the ->*_txn() methods.
280  */
xtensa_pmu_add(struct perf_event * event,int flags)281 static int xtensa_pmu_add(struct perf_event *event, int flags)
282 {
283 	struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
284 	struct hw_perf_event *hwc = &event->hw;
285 	int idx = hwc->idx;
286 
287 	if (__test_and_set_bit(idx, ev->used_mask)) {
288 		idx = find_first_zero_bit(ev->used_mask,
289 					  XCHAL_NUM_PERF_COUNTERS);
290 		if (idx == XCHAL_NUM_PERF_COUNTERS)
291 			return -EAGAIN;
292 
293 		__set_bit(idx, ev->used_mask);
294 		hwc->idx = idx;
295 	}
296 	ev->event[idx] = event;
297 
298 	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
299 
300 	if (flags & PERF_EF_START)
301 		xtensa_pmu_start(event, PERF_EF_RELOAD);
302 
303 	perf_event_update_userpage(event);
304 	return 0;
305 }
306 
xtensa_pmu_del(struct perf_event * event,int flags)307 static void xtensa_pmu_del(struct perf_event *event, int flags)
308 {
309 	struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
310 
311 	xtensa_pmu_stop(event, PERF_EF_UPDATE);
312 	__clear_bit(event->hw.idx, ev->used_mask);
313 	perf_event_update_userpage(event);
314 }
315 
xtensa_pmu_read(struct perf_event * event)316 static void xtensa_pmu_read(struct perf_event *event)
317 {
318 	xtensa_perf_event_update(event, &event->hw, event->hw.idx);
319 }
320 
callchain_trace(struct stackframe * frame,void * data)321 static int callchain_trace(struct stackframe *frame, void *data)
322 {
323 	struct perf_callchain_entry_ctx *entry = data;
324 
325 	perf_callchain_store(entry, frame->pc);
326 	return 0;
327 }
328 
perf_callchain_kernel(struct perf_callchain_entry_ctx * entry,struct pt_regs * regs)329 void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
330 			   struct pt_regs *regs)
331 {
332 	xtensa_backtrace_kernel(regs, entry->max_stack,
333 				callchain_trace, NULL, entry);
334 }
335 
perf_callchain_user(struct perf_callchain_entry_ctx * entry,struct pt_regs * regs)336 void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
337 			 struct pt_regs *regs)
338 {
339 	xtensa_backtrace_user(regs, entry->max_stack,
340 			      callchain_trace, entry);
341 }
342 
perf_event_print_debug(void)343 void perf_event_print_debug(void)
344 {
345 	unsigned long flags;
346 	unsigned i;
347 
348 	local_irq_save(flags);
349 	pr_info("CPU#%d: PMG: 0x%08lx\n", smp_processor_id(),
350 		get_er(XTENSA_PMU_PMG));
351 	for (i = 0; i < XCHAL_NUM_PERF_COUNTERS; ++i)
352 		pr_info("PM%d: 0x%08lx, PMCTRL%d: 0x%08lx, PMSTAT%d: 0x%08lx\n",
353 			i, get_er(XTENSA_PMU_PM(i)),
354 			i, get_er(XTENSA_PMU_PMCTRL(i)),
355 			i, get_er(XTENSA_PMU_PMSTAT(i)));
356 	local_irq_restore(flags);
357 }
358 
xtensa_pmu_irq_handler(int irq,void * dev_id)359 irqreturn_t xtensa_pmu_irq_handler(int irq, void *dev_id)
360 {
361 	irqreturn_t rc = IRQ_NONE;
362 	struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
363 	unsigned i;
364 
365 	for_each_set_bit(i, ev->used_mask, XCHAL_NUM_PERF_COUNTERS) {
366 		uint32_t v = get_er(XTENSA_PMU_PMSTAT(i));
367 		struct perf_event *event = ev->event[i];
368 		struct hw_perf_event *hwc = &event->hw;
369 		u64 last_period;
370 
371 		if (!(v & XTENSA_PMU_PMSTAT_OVFL))
372 			continue;
373 
374 		set_er(v, XTENSA_PMU_PMSTAT(i));
375 		xtensa_perf_event_update(event, hwc, i);
376 		last_period = hwc->last_period;
377 		if (xtensa_perf_event_set_period(event, hwc, i)) {
378 			struct perf_sample_data data;
379 			struct pt_regs *regs = get_irq_regs();
380 
381 			perf_sample_data_init(&data, 0, last_period);
382 			if (perf_event_overflow(event, &data, regs))
383 				xtensa_pmu_stop(event, 0);
384 		}
385 
386 		rc = IRQ_HANDLED;
387 	}
388 	return rc;
389 }
390 
391 static struct pmu xtensa_pmu = {
392 	.pmu_enable = xtensa_pmu_enable,
393 	.pmu_disable = xtensa_pmu_disable,
394 	.event_init = xtensa_pmu_event_init,
395 	.add = xtensa_pmu_add,
396 	.del = xtensa_pmu_del,
397 	.start = xtensa_pmu_start,
398 	.stop = xtensa_pmu_stop,
399 	.read = xtensa_pmu_read,
400 };
401 
xtensa_pmu_setup(unsigned int cpu)402 static int xtensa_pmu_setup(unsigned int cpu)
403 {
404 	unsigned i;
405 
406 	set_er(0, XTENSA_PMU_PMG);
407 	for (i = 0; i < XCHAL_NUM_PERF_COUNTERS; ++i) {
408 		set_er(0, XTENSA_PMU_PMCTRL(i));
409 		set_er(get_er(XTENSA_PMU_PMSTAT(i)), XTENSA_PMU_PMSTAT(i));
410 	}
411 	return 0;
412 }
413 
xtensa_pmu_init(void)414 static int __init xtensa_pmu_init(void)
415 {
416 	int ret;
417 	int irq = irq_create_mapping(NULL, XCHAL_PROFILING_INTERRUPT);
418 
419 	ret = cpuhp_setup_state(CPUHP_AP_PERF_XTENSA_STARTING,
420 				"perf/xtensa:starting", xtensa_pmu_setup,
421 				NULL);
422 	if (ret) {
423 		pr_err("xtensa_pmu: failed to register CPU-hotplug.\n");
424 		return ret;
425 	}
426 #if XTENSA_FAKE_NMI
427 	enable_irq(irq);
428 #else
429 	ret = request_irq(irq, xtensa_pmu_irq_handler, IRQF_PERCPU,
430 			  "pmu", NULL);
431 	if (ret < 0)
432 		return ret;
433 #endif
434 
435 	ret = perf_pmu_register(&xtensa_pmu, "cpu", PERF_TYPE_RAW);
436 	if (ret)
437 		free_irq(irq, NULL);
438 
439 	return ret;
440 }
441 early_initcall(xtensa_pmu_init);
442