1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2008-2017 Andes Technology Corporation
4  *
5  * Reference ARMv7: Jean Pihet <jpihet@mvista.com>
6  * 2010 (c) MontaVista Software, LLC.
7  */
8 
9 #include <linux/perf_event.h>
10 #include <linux/bitmap.h>
11 #include <linux/export.h>
12 #include <linux/kernel.h>
13 #include <linux/of.h>
14 #include <linux/platform_device.h>
15 #include <linux/slab.h>
16 #include <linux/spinlock.h>
17 #include <linux/pm_runtime.h>
18 #include <linux/ftrace.h>
19 #include <linux/uaccess.h>
20 #include <linux/sched/clock.h>
21 #include <linux/percpu-defs.h>
22 
23 #include <asm/pmu.h>
24 #include <asm/irq_regs.h>
25 #include <asm/nds32.h>
26 #include <asm/stacktrace.h>
27 #include <asm/perf_event.h>
28 #include <nds32_intrinsic.h>
29 
30 /* Set at runtime when we know what CPU type we are. */
31 static struct nds32_pmu *cpu_pmu;
32 
33 static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events);
34 static void nds32_pmu_start(struct nds32_pmu *cpu_pmu);
35 static void nds32_pmu_stop(struct nds32_pmu *cpu_pmu);
36 static struct platform_device_id cpu_pmu_plat_device_ids[] = {
37 	{.name = "nds32-pfm"},
38 	{},
39 };
40 
nds32_pmu_map_cache_event(const unsigned int (* cache_map)[PERF_COUNT_HW_CACHE_MAX][PERF_COUNT_HW_CACHE_OP_MAX][PERF_COUNT_HW_CACHE_RESULT_MAX],u64 config)41 static int nds32_pmu_map_cache_event(const unsigned int (*cache_map)
42 				  [PERF_COUNT_HW_CACHE_MAX]
43 				  [PERF_COUNT_HW_CACHE_OP_MAX]
44 				  [PERF_COUNT_HW_CACHE_RESULT_MAX], u64 config)
45 {
46 	unsigned int cache_type, cache_op, cache_result, ret;
47 
48 	cache_type = (config >> 0) & 0xff;
49 	if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
50 		return -EINVAL;
51 
52 	cache_op = (config >> 8) & 0xff;
53 	if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
54 		return -EINVAL;
55 
56 	cache_result = (config >> 16) & 0xff;
57 	if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
58 		return -EINVAL;
59 
60 	ret = (int)(*cache_map)[cache_type][cache_op][cache_result];
61 
62 	if (ret == CACHE_OP_UNSUPPORTED)
63 		return -ENOENT;
64 
65 	return ret;
66 }
67 
68 static int
nds32_pmu_map_hw_event(const unsigned int (* event_map)[PERF_COUNT_HW_MAX],u64 config)69 nds32_pmu_map_hw_event(const unsigned int (*event_map)[PERF_COUNT_HW_MAX],
70 		       u64 config)
71 {
72 	int mapping;
73 
74 	if (config >= PERF_COUNT_HW_MAX)
75 		return -ENOENT;
76 
77 	mapping = (*event_map)[config];
78 	return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping;
79 }
80 
nds32_pmu_map_raw_event(u32 raw_event_mask,u64 config)81 static int nds32_pmu_map_raw_event(u32 raw_event_mask, u64 config)
82 {
83 	int ev_type = (int)(config & raw_event_mask);
84 	int idx = config >> 8;
85 
86 	switch (idx) {
87 	case 0:
88 		ev_type = PFM_OFFSET_MAGIC_0 + ev_type;
89 		if (ev_type >= SPAV3_0_SEL_LAST || ev_type <= SPAV3_0_SEL_BASE)
90 			return -ENOENT;
91 		break;
92 	case 1:
93 		ev_type = PFM_OFFSET_MAGIC_1 + ev_type;
94 		if (ev_type >= SPAV3_1_SEL_LAST || ev_type <= SPAV3_1_SEL_BASE)
95 			return -ENOENT;
96 		break;
97 	case 2:
98 		ev_type = PFM_OFFSET_MAGIC_2 + ev_type;
99 		if (ev_type >= SPAV3_2_SEL_LAST || ev_type <= SPAV3_2_SEL_BASE)
100 			return -ENOENT;
101 		break;
102 	default:
103 		return -ENOENT;
104 	}
105 
106 	return ev_type;
107 }
108 
109 int
nds32_pmu_map_event(struct perf_event * event,const unsigned int (* event_map)[PERF_COUNT_HW_MAX],const unsigned int (* cache_map)[PERF_COUNT_HW_CACHE_MAX][PERF_COUNT_HW_CACHE_OP_MAX][PERF_COUNT_HW_CACHE_RESULT_MAX],u32 raw_event_mask)110 nds32_pmu_map_event(struct perf_event *event,
111 		    const unsigned int (*event_map)[PERF_COUNT_HW_MAX],
112 		    const unsigned int (*cache_map)
113 		    [PERF_COUNT_HW_CACHE_MAX]
114 		    [PERF_COUNT_HW_CACHE_OP_MAX]
115 		    [PERF_COUNT_HW_CACHE_RESULT_MAX], u32 raw_event_mask)
116 {
117 	u64 config = event->attr.config;
118 
119 	switch (event->attr.type) {
120 	case PERF_TYPE_HARDWARE:
121 		return nds32_pmu_map_hw_event(event_map, config);
122 	case PERF_TYPE_HW_CACHE:
123 		return nds32_pmu_map_cache_event(cache_map, config);
124 	case PERF_TYPE_RAW:
125 		return nds32_pmu_map_raw_event(raw_event_mask, config);
126 	}
127 
128 	return -ENOENT;
129 }
130 
nds32_spav3_map_event(struct perf_event * event)131 static int nds32_spav3_map_event(struct perf_event *event)
132 {
133 	return nds32_pmu_map_event(event, &nds32_pfm_perf_map,
134 				&nds32_pfm_perf_cache_map, SOFTWARE_EVENT_MASK);
135 }
136 
nds32_pfm_getreset_flags(void)137 static inline u32 nds32_pfm_getreset_flags(void)
138 {
139 	/* Read overflow status */
140 	u32 val = __nds32__mfsr(NDS32_SR_PFM_CTL);
141 	u32 old_val = val;
142 
143 	/* Write overflow bit to clear status, and others keep it 0 */
144 	u32 ov_flag = PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2];
145 
146 	__nds32__mtsr(val | ov_flag, NDS32_SR_PFM_CTL);
147 
148 	return old_val;
149 }
150 
nds32_pfm_has_overflowed(u32 pfm)151 static inline int nds32_pfm_has_overflowed(u32 pfm)
152 {
153 	u32 ov_flag = PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2];
154 
155 	return pfm & ov_flag;
156 }
157 
nds32_pfm_counter_has_overflowed(u32 pfm,int idx)158 static inline int nds32_pfm_counter_has_overflowed(u32 pfm, int idx)
159 {
160 	u32 mask = 0;
161 
162 	switch (idx) {
163 	case 0:
164 		mask = PFM_CTL_OVF[0];
165 		break;
166 	case 1:
167 		mask = PFM_CTL_OVF[1];
168 		break;
169 	case 2:
170 		mask = PFM_CTL_OVF[2];
171 		break;
172 	default:
173 		pr_err("%s index wrong\n", __func__);
174 		break;
175 	}
176 	return pfm & mask;
177 }
178 
179 /*
180  * Set the next IRQ period, based on the hwc->period_left value.
181  * To be called with the event disabled in hw:
182  */
nds32_pmu_event_set_period(struct perf_event * event)183 int nds32_pmu_event_set_period(struct perf_event *event)
184 {
185 	struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
186 	struct hw_perf_event *hwc = &event->hw;
187 	s64 left = local64_read(&hwc->period_left);
188 	s64 period = hwc->sample_period;
189 	int ret = 0;
190 
191 	/* The period may have been changed by PERF_EVENT_IOC_PERIOD */
192 	if (unlikely(period != hwc->last_period))
193 		left = period - (hwc->last_period - left);
194 
195 	if (unlikely(left <= -period)) {
196 		left = period;
197 		local64_set(&hwc->period_left, left);
198 		hwc->last_period = period;
199 		ret = 1;
200 	}
201 
202 	if (unlikely(left <= 0)) {
203 		left += period;
204 		local64_set(&hwc->period_left, left);
205 		hwc->last_period = period;
206 		ret = 1;
207 	}
208 
209 	if (left > (s64)nds32_pmu->max_period)
210 		left = nds32_pmu->max_period;
211 
212 	/*
213 	 * The hw event starts counting from this event offset,
214 	 * mark it to be able to extract future "deltas":
215 	 */
216 	local64_set(&hwc->prev_count, (u64)(-left));
217 
218 	nds32_pmu->write_counter(event, (u64)(-left) & nds32_pmu->max_period);
219 
220 	perf_event_update_userpage(event);
221 
222 	return ret;
223 }
224 
nds32_pmu_handle_irq(int irq_num,void * dev)225 static irqreturn_t nds32_pmu_handle_irq(int irq_num, void *dev)
226 {
227 	u32 pfm;
228 	struct perf_sample_data data;
229 	struct nds32_pmu *cpu_pmu = (struct nds32_pmu *)dev;
230 	struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events();
231 	struct pt_regs *regs;
232 	int idx;
233 	/*
234 	 * Get and reset the IRQ flags
235 	 */
236 	pfm = nds32_pfm_getreset_flags();
237 
238 	/*
239 	 * Did an overflow occur?
240 	 */
241 	if (!nds32_pfm_has_overflowed(pfm))
242 		return IRQ_NONE;
243 
244 	/*
245 	 * Handle the counter(s) overflow(s)
246 	 */
247 	regs = get_irq_regs();
248 
249 	nds32_pmu_stop(cpu_pmu);
250 	for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
251 		struct perf_event *event = cpuc->events[idx];
252 		struct hw_perf_event *hwc;
253 
254 		/* Ignore if we don't have an event. */
255 		if (!event)
256 			continue;
257 
258 		/*
259 		 * We have a single interrupt for all counters. Check that
260 		 * each counter has overflowed before we process it.
261 		 */
262 		if (!nds32_pfm_counter_has_overflowed(pfm, idx))
263 			continue;
264 
265 		hwc = &event->hw;
266 		nds32_pmu_event_update(event);
267 		perf_sample_data_init(&data, 0, hwc->last_period);
268 		if (!nds32_pmu_event_set_period(event))
269 			continue;
270 
271 		if (perf_event_overflow(event, &data, regs))
272 			cpu_pmu->disable(event);
273 	}
274 	nds32_pmu_start(cpu_pmu);
275 	/*
276 	 * Handle the pending perf events.
277 	 *
278 	 * Note: this call *must* be run with interrupts disabled. For
279 	 * platforms that can have the PMU interrupts raised as an NMI, this
280 	 * will not work.
281 	 */
282 	irq_work_run();
283 
284 	return IRQ_HANDLED;
285 }
286 
nds32_pfm_counter_valid(struct nds32_pmu * cpu_pmu,int idx)287 static inline int nds32_pfm_counter_valid(struct nds32_pmu *cpu_pmu, int idx)
288 {
289 	return ((idx >= 0) && (idx < cpu_pmu->num_events));
290 }
291 
nds32_pfm_disable_counter(int idx)292 static inline int nds32_pfm_disable_counter(int idx)
293 {
294 	unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL);
295 	u32 mask = 0;
296 
297 	mask = PFM_CTL_EN[idx];
298 	val &= ~mask;
299 	val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
300 	__nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
301 	return idx;
302 }
303 
304 /*
305  * Add an event filter to a given event.
306  */
nds32_pmu_set_event_filter(struct hw_perf_event * event,struct perf_event_attr * attr)307 static int nds32_pmu_set_event_filter(struct hw_perf_event *event,
308 				      struct perf_event_attr *attr)
309 {
310 	unsigned long config_base = 0;
311 	int idx = event->idx;
312 	unsigned long no_kernel_tracing = 0;
313 	unsigned long no_user_tracing = 0;
314 	/* If index is -1, do not do anything */
315 	if (idx == -1)
316 		return 0;
317 
318 	no_kernel_tracing = PFM_CTL_KS[idx];
319 	no_user_tracing = PFM_CTL_KU[idx];
320 	/*
321 	 * Default: enable both kernel and user mode tracing.
322 	 */
323 	if (attr->exclude_user)
324 		config_base |= no_user_tracing;
325 
326 	if (attr->exclude_kernel)
327 		config_base |= no_kernel_tracing;
328 
329 	/*
330 	 * Install the filter into config_base as this is used to
331 	 * construct the event type.
332 	 */
333 	event->config_base |= config_base;
334 	return 0;
335 }
336 
nds32_pfm_write_evtsel(int idx,u32 evnum)337 static inline void nds32_pfm_write_evtsel(int idx, u32 evnum)
338 {
339 	u32 offset = 0;
340 	u32 ori_val = __nds32__mfsr(NDS32_SR_PFM_CTL);
341 	u32 ev_mask = 0;
342 	u32 no_kernel_mask = 0;
343 	u32 no_user_mask = 0;
344 	u32 val;
345 
346 	offset = PFM_CTL_OFFSEL[idx];
347 	/* Clear previous mode selection, and write new one */
348 	no_kernel_mask = PFM_CTL_KS[idx];
349 	no_user_mask = PFM_CTL_KU[idx];
350 	ori_val &= ~no_kernel_mask;
351 	ori_val &= ~no_user_mask;
352 	if (evnum & no_kernel_mask)
353 		ori_val |= no_kernel_mask;
354 
355 	if (evnum & no_user_mask)
356 		ori_val |= no_user_mask;
357 
358 	/* Clear previous event selection */
359 	ev_mask = PFM_CTL_SEL[idx];
360 	ori_val &= ~ev_mask;
361 	evnum &= SOFTWARE_EVENT_MASK;
362 
363 	/* undo the linear mapping */
364 	evnum = get_converted_evet_hw_num(evnum);
365 	val = ori_val | (evnum << offset);
366 	val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
367 	__nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
368 }
369 
nds32_pfm_enable_counter(int idx)370 static inline int nds32_pfm_enable_counter(int idx)
371 {
372 	unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL);
373 	u32 mask = 0;
374 
375 	mask = PFM_CTL_EN[idx];
376 	val |= mask;
377 	val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
378 	__nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
379 	return idx;
380 }
381 
nds32_pfm_enable_intens(int idx)382 static inline int nds32_pfm_enable_intens(int idx)
383 {
384 	unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL);
385 	u32 mask = 0;
386 
387 	mask = PFM_CTL_IE[idx];
388 	val |= mask;
389 	val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
390 	__nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
391 	return idx;
392 }
393 
nds32_pfm_disable_intens(int idx)394 static inline int nds32_pfm_disable_intens(int idx)
395 {
396 	unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL);
397 	u32 mask = 0;
398 
399 	mask = PFM_CTL_IE[idx];
400 	val &= ~mask;
401 	val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
402 	__nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
403 	return idx;
404 }
405 
event_requires_mode_exclusion(struct perf_event_attr * attr)406 static int event_requires_mode_exclusion(struct perf_event_attr *attr)
407 {
408 	/* Other modes NDS32 does not support */
409 	return attr->exclude_user || attr->exclude_kernel;
410 }
411 
nds32_pmu_enable_event(struct perf_event * event)412 static void nds32_pmu_enable_event(struct perf_event *event)
413 {
414 	unsigned long flags;
415 	unsigned int evnum = 0;
416 	struct hw_perf_event *hwc = &event->hw;
417 	struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu);
418 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
419 	int idx = hwc->idx;
420 
421 	if (!nds32_pfm_counter_valid(cpu_pmu, idx)) {
422 		pr_err("CPU enabling wrong pfm counter IRQ enable\n");
423 		return;
424 	}
425 
426 	/*
427 	 * Enable counter and interrupt, and set the counter to count
428 	 * the event that we're interested in.
429 	 */
430 	raw_spin_lock_irqsave(&events->pmu_lock, flags);
431 
432 	/*
433 	 * Disable counter
434 	 */
435 	nds32_pfm_disable_counter(idx);
436 
437 	/*
438 	 * Check whether we need to exclude the counter from certain modes.
439 	 */
440 	if ((!cpu_pmu->set_event_filter ||
441 	     cpu_pmu->set_event_filter(hwc, &event->attr)) &&
442 	     event_requires_mode_exclusion(&event->attr)) {
443 		pr_notice
444 		("NDS32 performance counters do not support mode exclusion\n");
445 		hwc->config_base = 0;
446 	}
447 	/* Write event */
448 	evnum = hwc->config_base;
449 	nds32_pfm_write_evtsel(idx, evnum);
450 
451 	/*
452 	 * Enable interrupt for this counter
453 	 */
454 	nds32_pfm_enable_intens(idx);
455 
456 	/*
457 	 * Enable counter
458 	 */
459 	nds32_pfm_enable_counter(idx);
460 
461 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
462 }
463 
nds32_pmu_disable_event(struct perf_event * event)464 static void nds32_pmu_disable_event(struct perf_event *event)
465 {
466 	unsigned long flags;
467 	struct hw_perf_event *hwc = &event->hw;
468 	struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu);
469 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
470 	int idx = hwc->idx;
471 
472 	if (!nds32_pfm_counter_valid(cpu_pmu, idx)) {
473 		pr_err("CPU disabling wrong pfm counter IRQ enable %d\n", idx);
474 		return;
475 	}
476 
477 	/*
478 	 * Disable counter and interrupt
479 	 */
480 	raw_spin_lock_irqsave(&events->pmu_lock, flags);
481 
482 	/*
483 	 * Disable counter
484 	 */
485 	nds32_pfm_disable_counter(idx);
486 
487 	/*
488 	 * Disable interrupt for this counter
489 	 */
490 	nds32_pfm_disable_intens(idx);
491 
492 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
493 }
494 
nds32_pmu_read_counter(struct perf_event * event)495 static inline u32 nds32_pmu_read_counter(struct perf_event *event)
496 {
497 	struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu);
498 	struct hw_perf_event *hwc = &event->hw;
499 	int idx = hwc->idx;
500 	u32 count = 0;
501 
502 	if (!nds32_pfm_counter_valid(cpu_pmu, idx)) {
503 		pr_err("CPU reading wrong counter %d\n", idx);
504 	} else {
505 		switch (idx) {
506 		case PFMC0:
507 			count = __nds32__mfsr(NDS32_SR_PFMC0);
508 			break;
509 		case PFMC1:
510 			count = __nds32__mfsr(NDS32_SR_PFMC1);
511 			break;
512 		case PFMC2:
513 			count = __nds32__mfsr(NDS32_SR_PFMC2);
514 			break;
515 		default:
516 			pr_err
517 			    ("%s: CPU has no performance counters %d\n",
518 			     __func__, idx);
519 		}
520 	}
521 	return count;
522 }
523 
nds32_pmu_write_counter(struct perf_event * event,u32 value)524 static inline void nds32_pmu_write_counter(struct perf_event *event, u32 value)
525 {
526 	struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu);
527 	struct hw_perf_event *hwc = &event->hw;
528 	int idx = hwc->idx;
529 
530 	if (!nds32_pfm_counter_valid(cpu_pmu, idx)) {
531 		pr_err("CPU writing wrong counter %d\n", idx);
532 	} else {
533 		switch (idx) {
534 		case PFMC0:
535 			__nds32__mtsr_isb(value, NDS32_SR_PFMC0);
536 			break;
537 		case PFMC1:
538 			__nds32__mtsr_isb(value, NDS32_SR_PFMC1);
539 			break;
540 		case PFMC2:
541 			__nds32__mtsr_isb(value, NDS32_SR_PFMC2);
542 			break;
543 		default:
544 			pr_err
545 			    ("%s: CPU has no performance counters %d\n",
546 			     __func__, idx);
547 		}
548 	}
549 }
550 
nds32_pmu_get_event_idx(struct pmu_hw_events * cpuc,struct perf_event * event)551 static int nds32_pmu_get_event_idx(struct pmu_hw_events *cpuc,
552 				   struct perf_event *event)
553 {
554 	int idx;
555 	struct hw_perf_event *hwc = &event->hw;
556 	/*
557 	 * Current implementation maps cycles, instruction count and cache-miss
558 	 * to specific counter.
559 	 * However, multiple of the 3 counters are able to count these events.
560 	 *
561 	 *
562 	 * SOFTWARE_EVENT_MASK mask for getting event num ,
563 	 * This is defined by Jia-Rung, you can change the polocies.
564 	 * However, do not exceed 8 bits. This is hardware specific.
565 	 * The last number is SPAv3_2_SEL_LAST.
566 	 */
567 	unsigned long evtype = hwc->config_base & SOFTWARE_EVENT_MASK;
568 
569 	idx = get_converted_event_idx(evtype);
570 	/*
571 	 * Try to get the counter for correpsonding event
572 	 */
573 	if (evtype == SPAV3_0_SEL_TOTAL_CYCLES) {
574 		if (!test_and_set_bit(idx, cpuc->used_mask))
575 			return idx;
576 		if (!test_and_set_bit(NDS32_IDX_COUNTER0, cpuc->used_mask))
577 			return NDS32_IDX_COUNTER0;
578 		if (!test_and_set_bit(NDS32_IDX_COUNTER1, cpuc->used_mask))
579 			return NDS32_IDX_COUNTER1;
580 	} else if (evtype == SPAV3_1_SEL_COMPLETED_INSTRUCTION) {
581 		if (!test_and_set_bit(idx, cpuc->used_mask))
582 			return idx;
583 		else if (!test_and_set_bit(NDS32_IDX_COUNTER1, cpuc->used_mask))
584 			return NDS32_IDX_COUNTER1;
585 		else if (!test_and_set_bit
586 			 (NDS32_IDX_CYCLE_COUNTER, cpuc->used_mask))
587 			return NDS32_IDX_CYCLE_COUNTER;
588 	} else {
589 		if (!test_and_set_bit(idx, cpuc->used_mask))
590 			return idx;
591 	}
592 	return -EAGAIN;
593 }
594 
nds32_pmu_start(struct nds32_pmu * cpu_pmu)595 static void nds32_pmu_start(struct nds32_pmu *cpu_pmu)
596 {
597 	unsigned long flags;
598 	unsigned int val;
599 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
600 
601 	raw_spin_lock_irqsave(&events->pmu_lock, flags);
602 
603 	/* Enable all counters , NDS PFM has 3 counters */
604 	val = __nds32__mfsr(NDS32_SR_PFM_CTL);
605 	val |= (PFM_CTL_EN[0] | PFM_CTL_EN[1] | PFM_CTL_EN[2]);
606 	val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
607 	__nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
608 
609 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
610 }
611 
nds32_pmu_stop(struct nds32_pmu * cpu_pmu)612 static void nds32_pmu_stop(struct nds32_pmu *cpu_pmu)
613 {
614 	unsigned long flags;
615 	unsigned int val;
616 	struct pmu_hw_events *events = cpu_pmu->get_hw_events();
617 
618 	raw_spin_lock_irqsave(&events->pmu_lock, flags);
619 
620 	/* Disable all counters , NDS PFM has 3 counters */
621 	val = __nds32__mfsr(NDS32_SR_PFM_CTL);
622 	val &= ~(PFM_CTL_EN[0] | PFM_CTL_EN[1] | PFM_CTL_EN[2]);
623 	val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
624 	__nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
625 
626 	raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
627 }
628 
nds32_pmu_reset(void * info)629 static void nds32_pmu_reset(void *info)
630 {
631 	u32 val = 0;
632 
633 	val |= (PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
634 	__nds32__mtsr(val, NDS32_SR_PFM_CTL);
635 	__nds32__mtsr(0, NDS32_SR_PFM_CTL);
636 	__nds32__mtsr(0, NDS32_SR_PFMC0);
637 	__nds32__mtsr(0, NDS32_SR_PFMC1);
638 	__nds32__mtsr(0, NDS32_SR_PFMC2);
639 }
640 
nds32_pmu_init(struct nds32_pmu * cpu_pmu)641 static void nds32_pmu_init(struct nds32_pmu *cpu_pmu)
642 {
643 	cpu_pmu->handle_irq = nds32_pmu_handle_irq;
644 	cpu_pmu->enable = nds32_pmu_enable_event;
645 	cpu_pmu->disable = nds32_pmu_disable_event;
646 	cpu_pmu->read_counter = nds32_pmu_read_counter;
647 	cpu_pmu->write_counter = nds32_pmu_write_counter;
648 	cpu_pmu->get_event_idx = nds32_pmu_get_event_idx;
649 	cpu_pmu->start = nds32_pmu_start;
650 	cpu_pmu->stop = nds32_pmu_stop;
651 	cpu_pmu->reset = nds32_pmu_reset;
652 	cpu_pmu->max_period = 0xFFFFFFFF;	/* Maximum counts */
653 };
654 
nds32_read_num_pfm_events(void)655 static u32 nds32_read_num_pfm_events(void)
656 {
657 	/* NDS32 SPAv3 PMU support 3 counter */
658 	return 3;
659 }
660 
device_pmu_init(struct nds32_pmu * cpu_pmu)661 static int device_pmu_init(struct nds32_pmu *cpu_pmu)
662 {
663 	nds32_pmu_init(cpu_pmu);
664 	/*
665 	 * This name should be devive-specific name, whatever you like :)
666 	 * I think "PMU" will be a good generic name.
667 	 */
668 	cpu_pmu->name = "nds32v3-pmu";
669 	cpu_pmu->map_event = nds32_spav3_map_event;
670 	cpu_pmu->num_events = nds32_read_num_pfm_events();
671 	cpu_pmu->set_event_filter = nds32_pmu_set_event_filter;
672 	return 0;
673 }
674 
675 /*
676  * CPU PMU identification and probing.
677  */
probe_current_pmu(struct nds32_pmu * pmu)678 static int probe_current_pmu(struct nds32_pmu *pmu)
679 {
680 	int ret;
681 
682 	get_cpu();
683 	ret = -ENODEV;
684 	/*
685 	 * If ther are various CPU types with its own PMU, initialize with
686 	 *
687 	 * the corresponding one
688 	 */
689 	device_pmu_init(pmu);
690 	put_cpu();
691 	return ret;
692 }
693 
nds32_pmu_enable(struct pmu * pmu)694 static void nds32_pmu_enable(struct pmu *pmu)
695 {
696 	struct nds32_pmu *nds32_pmu = to_nds32_pmu(pmu);
697 	struct pmu_hw_events *hw_events = nds32_pmu->get_hw_events();
698 	int enabled = bitmap_weight(hw_events->used_mask,
699 				    nds32_pmu->num_events);
700 
701 	if (enabled)
702 		nds32_pmu->start(nds32_pmu);
703 }
704 
nds32_pmu_disable(struct pmu * pmu)705 static void nds32_pmu_disable(struct pmu *pmu)
706 {
707 	struct nds32_pmu *nds32_pmu = to_nds32_pmu(pmu);
708 
709 	nds32_pmu->stop(nds32_pmu);
710 }
711 
nds32_pmu_release_hardware(struct nds32_pmu * nds32_pmu)712 static void nds32_pmu_release_hardware(struct nds32_pmu *nds32_pmu)
713 {
714 	nds32_pmu->free_irq(nds32_pmu);
715 	pm_runtime_put_sync(&nds32_pmu->plat_device->dev);
716 }
717 
nds32_pmu_dispatch_irq(int irq,void * dev)718 static irqreturn_t nds32_pmu_dispatch_irq(int irq, void *dev)
719 {
720 	struct nds32_pmu *nds32_pmu = (struct nds32_pmu *)dev;
721 	int ret;
722 	u64 start_clock, finish_clock;
723 
724 	start_clock = local_clock();
725 	ret = nds32_pmu->handle_irq(irq, dev);
726 	finish_clock = local_clock();
727 
728 	perf_sample_event_took(finish_clock - start_clock);
729 	return ret;
730 }
731 
nds32_pmu_reserve_hardware(struct nds32_pmu * nds32_pmu)732 static int nds32_pmu_reserve_hardware(struct nds32_pmu *nds32_pmu)
733 {
734 	int err;
735 	struct platform_device *pmu_device = nds32_pmu->plat_device;
736 
737 	if (!pmu_device)
738 		return -ENODEV;
739 
740 	pm_runtime_get_sync(&pmu_device->dev);
741 	err = nds32_pmu->request_irq(nds32_pmu, nds32_pmu_dispatch_irq);
742 	if (err) {
743 		nds32_pmu_release_hardware(nds32_pmu);
744 		return err;
745 	}
746 
747 	return 0;
748 }
749 
750 static int
validate_event(struct pmu * pmu,struct pmu_hw_events * hw_events,struct perf_event * event)751 validate_event(struct pmu *pmu, struct pmu_hw_events *hw_events,
752 	       struct perf_event *event)
753 {
754 	struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
755 
756 	if (is_software_event(event))
757 		return 1;
758 
759 	if (event->pmu != pmu)
760 		return 0;
761 
762 	if (event->state < PERF_EVENT_STATE_OFF)
763 		return 1;
764 
765 	if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec)
766 		return 1;
767 
768 	return nds32_pmu->get_event_idx(hw_events, event) >= 0;
769 }
770 
validate_group(struct perf_event * event)771 static int validate_group(struct perf_event *event)
772 {
773 	struct perf_event *sibling, *leader = event->group_leader;
774 	struct pmu_hw_events fake_pmu;
775 	DECLARE_BITMAP(fake_used_mask, MAX_COUNTERS);
776 	/*
777 	 * Initialize the fake PMU. We only need to populate the
778 	 * used_mask for the purposes of validation.
779 	 */
780 	memset(fake_used_mask, 0, sizeof(fake_used_mask));
781 
782 	if (!validate_event(event->pmu, &fake_pmu, leader))
783 		return -EINVAL;
784 
785 	for_each_sibling_event(sibling, leader) {
786 		if (!validate_event(event->pmu, &fake_pmu, sibling))
787 			return -EINVAL;
788 	}
789 
790 	if (!validate_event(event->pmu, &fake_pmu, event))
791 		return -EINVAL;
792 
793 	return 0;
794 }
795 
__hw_perf_event_init(struct perf_event * event)796 static int __hw_perf_event_init(struct perf_event *event)
797 {
798 	struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
799 	struct hw_perf_event *hwc = &event->hw;
800 	int mapping;
801 
802 	mapping = nds32_pmu->map_event(event);
803 
804 	if (mapping < 0) {
805 		pr_debug("event %x:%llx not supported\n", event->attr.type,
806 			 event->attr.config);
807 		return mapping;
808 	}
809 
810 	/*
811 	 * We don't assign an index until we actually place the event onto
812 	 * hardware. Use -1 to signify that we haven't decided where to put it
813 	 * yet. For SMP systems, each core has it's own PMU so we can't do any
814 	 * clever allocation or constraints checking at this point.
815 	 */
816 	hwc->idx = -1;
817 	hwc->config_base = 0;
818 	hwc->config = 0;
819 	hwc->event_base = 0;
820 
821 	/*
822 	 * Check whether we need to exclude the counter from certain modes.
823 	 */
824 	if ((!nds32_pmu->set_event_filter ||
825 	     nds32_pmu->set_event_filter(hwc, &event->attr)) &&
826 	    event_requires_mode_exclusion(&event->attr)) {
827 		pr_debug
828 			("NDS performance counters do not support mode exclusion\n");
829 		return -EOPNOTSUPP;
830 	}
831 
832 	/*
833 	 * Store the event encoding into the config_base field.
834 	 */
835 	hwc->config_base |= (unsigned long)mapping;
836 
837 	if (!hwc->sample_period) {
838 		/*
839 		 * For non-sampling runs, limit the sample_period to half
840 		 * of the counter width. That way, the new counter value
841 		 * is far less likely to overtake the previous one unless
842 		 * you have some serious IRQ latency issues.
843 		 */
844 		hwc->sample_period = nds32_pmu->max_period >> 1;
845 		hwc->last_period = hwc->sample_period;
846 		local64_set(&hwc->period_left, hwc->sample_period);
847 	}
848 
849 	if (event->group_leader != event) {
850 		if (validate_group(event) != 0)
851 			return -EINVAL;
852 	}
853 
854 	return 0;
855 }
856 
nds32_pmu_event_init(struct perf_event * event)857 static int nds32_pmu_event_init(struct perf_event *event)
858 {
859 	struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
860 	int err = 0;
861 	atomic_t *active_events = &nds32_pmu->active_events;
862 
863 	/* does not support taken branch sampling */
864 	if (has_branch_stack(event))
865 		return -EOPNOTSUPP;
866 
867 	if (nds32_pmu->map_event(event) == -ENOENT)
868 		return -ENOENT;
869 
870 	if (!atomic_inc_not_zero(active_events)) {
871 		if (atomic_read(active_events) == 0) {
872 			/* Register irq handler */
873 			err = nds32_pmu_reserve_hardware(nds32_pmu);
874 		}
875 
876 		if (!err)
877 			atomic_inc(active_events);
878 	}
879 
880 	if (err)
881 		return err;
882 
883 	err = __hw_perf_event_init(event);
884 
885 	return err;
886 }
887 
nds32_start(struct perf_event * event,int flags)888 static void nds32_start(struct perf_event *event, int flags)
889 {
890 	struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
891 	struct hw_perf_event *hwc = &event->hw;
892 	/*
893 	 * NDS pmu always has to reprogram the period, so ignore
894 	 * PERF_EF_RELOAD, see the comment below.
895 	 */
896 	if (flags & PERF_EF_RELOAD)
897 		WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
898 
899 	hwc->state = 0;
900 	/* Set the period for the event. */
901 	nds32_pmu_event_set_period(event);
902 
903 	nds32_pmu->enable(event);
904 }
905 
nds32_pmu_add(struct perf_event * event,int flags)906 static int nds32_pmu_add(struct perf_event *event, int flags)
907 {
908 	struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
909 	struct pmu_hw_events *hw_events = nds32_pmu->get_hw_events();
910 	struct hw_perf_event *hwc = &event->hw;
911 	int idx;
912 	int err = 0;
913 
914 	perf_pmu_disable(event->pmu);
915 
916 	/* If we don't have a space for the counter then finish early. */
917 	idx = nds32_pmu->get_event_idx(hw_events, event);
918 	if (idx < 0) {
919 		err = idx;
920 		goto out;
921 	}
922 
923 	/*
924 	 * If there is an event in the counter we are going to use then make
925 	 * sure it is disabled.
926 	 */
927 	event->hw.idx = idx;
928 	nds32_pmu->disable(event);
929 	hw_events->events[idx] = event;
930 
931 	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
932 	if (flags & PERF_EF_START)
933 		nds32_start(event, PERF_EF_RELOAD);
934 
935 	/* Propagate our changes to the userspace mapping. */
936 	perf_event_update_userpage(event);
937 
938 out:
939 	perf_pmu_enable(event->pmu);
940 	return err;
941 }
942 
nds32_pmu_event_update(struct perf_event * event)943 u64 nds32_pmu_event_update(struct perf_event *event)
944 {
945 	struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
946 	struct hw_perf_event *hwc = &event->hw;
947 	u64 delta, prev_raw_count, new_raw_count;
948 
949 again:
950 	prev_raw_count = local64_read(&hwc->prev_count);
951 	new_raw_count = nds32_pmu->read_counter(event);
952 
953 	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
954 			    new_raw_count) != prev_raw_count) {
955 		goto again;
956 	}
957 	/*
958 	 * Whether overflow or not, "unsigned substraction"
959 	 * will always get their delta
960 	 */
961 	delta = (new_raw_count - prev_raw_count) & nds32_pmu->max_period;
962 
963 	local64_add(delta, &event->count);
964 	local64_sub(delta, &hwc->period_left);
965 
966 	return new_raw_count;
967 }
968 
nds32_stop(struct perf_event * event,int flags)969 static void nds32_stop(struct perf_event *event, int flags)
970 {
971 	struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
972 	struct hw_perf_event *hwc = &event->hw;
973 	/*
974 	 * NDS pmu always has to update the counter, so ignore
975 	 * PERF_EF_UPDATE, see comments in nds32_start().
976 	 */
977 	if (!(hwc->state & PERF_HES_STOPPED)) {
978 		nds32_pmu->disable(event);
979 		nds32_pmu_event_update(event);
980 		hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
981 	}
982 }
983 
nds32_pmu_del(struct perf_event * event,int flags)984 static void nds32_pmu_del(struct perf_event *event, int flags)
985 {
986 	struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
987 	struct pmu_hw_events *hw_events = nds32_pmu->get_hw_events();
988 	struct hw_perf_event *hwc = &event->hw;
989 	int idx = hwc->idx;
990 
991 	nds32_stop(event, PERF_EF_UPDATE);
992 	hw_events->events[idx] = NULL;
993 	clear_bit(idx, hw_events->used_mask);
994 
995 	perf_event_update_userpage(event);
996 }
997 
nds32_pmu_read(struct perf_event * event)998 static void nds32_pmu_read(struct perf_event *event)
999 {
1000 	nds32_pmu_event_update(event);
1001 }
1002 
1003 /* Please refer to SPAv3 for more hardware specific details */
1004 PMU_FORMAT_ATTR(event, "config:0-63");
1005 
1006 static struct attribute *nds32_arch_formats_attr[] = {
1007 	&format_attr_event.attr,
1008 	NULL,
1009 };
1010 
1011 static struct attribute_group nds32_pmu_format_group = {
1012 	.name = "format",
1013 	.attrs = nds32_arch_formats_attr,
1014 };
1015 
nds32_pmu_cpumask_show(struct device * dev,struct device_attribute * attr,char * buf)1016 static ssize_t nds32_pmu_cpumask_show(struct device *dev,
1017 				      struct device_attribute *attr,
1018 				      char *buf)
1019 {
1020 	return 0;
1021 }
1022 
1023 static DEVICE_ATTR(cpus, 0444, nds32_pmu_cpumask_show, NULL);
1024 
1025 static struct attribute *nds32_pmu_common_attrs[] = {
1026 	&dev_attr_cpus.attr,
1027 	NULL,
1028 };
1029 
1030 static struct attribute_group nds32_pmu_common_group = {
1031 	.attrs = nds32_pmu_common_attrs,
1032 };
1033 
1034 static const struct attribute_group *nds32_pmu_attr_groups[] = {
1035 	&nds32_pmu_format_group,
1036 	&nds32_pmu_common_group,
1037 	NULL,
1038 };
1039 
nds32_init(struct nds32_pmu * nds32_pmu)1040 static void nds32_init(struct nds32_pmu *nds32_pmu)
1041 {
1042 	atomic_set(&nds32_pmu->active_events, 0);
1043 
1044 	nds32_pmu->pmu = (struct pmu) {
1045 		.pmu_enable = nds32_pmu_enable,
1046 		.pmu_disable = nds32_pmu_disable,
1047 		.attr_groups = nds32_pmu_attr_groups,
1048 		.event_init = nds32_pmu_event_init,
1049 		.add = nds32_pmu_add,
1050 		.del = nds32_pmu_del,
1051 		.start = nds32_start,
1052 		.stop = nds32_stop,
1053 		.read = nds32_pmu_read,
1054 	};
1055 }
1056 
nds32_pmu_register(struct nds32_pmu * nds32_pmu,int type)1057 int nds32_pmu_register(struct nds32_pmu *nds32_pmu, int type)
1058 {
1059 	nds32_init(nds32_pmu);
1060 	pm_runtime_enable(&nds32_pmu->plat_device->dev);
1061 	pr_info("enabled with %s PMU driver, %d counters available\n",
1062 		nds32_pmu->name, nds32_pmu->num_events);
1063 	return perf_pmu_register(&nds32_pmu->pmu, nds32_pmu->name, type);
1064 }
1065 
cpu_pmu_get_cpu_events(void)1066 static struct pmu_hw_events *cpu_pmu_get_cpu_events(void)
1067 {
1068 	return this_cpu_ptr(&cpu_hw_events);
1069 }
1070 
cpu_pmu_request_irq(struct nds32_pmu * cpu_pmu,irq_handler_t handler)1071 static int cpu_pmu_request_irq(struct nds32_pmu *cpu_pmu, irq_handler_t handler)
1072 {
1073 	int err, irq, irqs;
1074 	struct platform_device *pmu_device = cpu_pmu->plat_device;
1075 
1076 	if (!pmu_device)
1077 		return -ENODEV;
1078 
1079 	irqs = min(pmu_device->num_resources, num_possible_cpus());
1080 	if (irqs < 1) {
1081 		pr_err("no irqs for PMUs defined\n");
1082 		return -ENODEV;
1083 	}
1084 
1085 	irq = platform_get_irq(pmu_device, 0);
1086 	err = request_irq(irq, handler, IRQF_NOBALANCING, "nds32-pfm",
1087 			  cpu_pmu);
1088 	if (err) {
1089 		pr_err("unable to request IRQ%d for NDS PMU counters\n",
1090 		       irq);
1091 		return err;
1092 	}
1093 	return 0;
1094 }
1095 
cpu_pmu_free_irq(struct nds32_pmu * cpu_pmu)1096 static void cpu_pmu_free_irq(struct nds32_pmu *cpu_pmu)
1097 {
1098 	int irq;
1099 	struct platform_device *pmu_device = cpu_pmu->plat_device;
1100 
1101 	irq = platform_get_irq(pmu_device, 0);
1102 	if (irq >= 0)
1103 		free_irq(irq, cpu_pmu);
1104 }
1105 
cpu_pmu_init(struct nds32_pmu * cpu_pmu)1106 static void cpu_pmu_init(struct nds32_pmu *cpu_pmu)
1107 {
1108 	int cpu;
1109 	struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu);
1110 
1111 	raw_spin_lock_init(&events->pmu_lock);
1112 
1113 	cpu_pmu->get_hw_events = cpu_pmu_get_cpu_events;
1114 	cpu_pmu->request_irq = cpu_pmu_request_irq;
1115 	cpu_pmu->free_irq = cpu_pmu_free_irq;
1116 
1117 	/* Ensure the PMU has sane values out of reset. */
1118 	if (cpu_pmu->reset)
1119 		on_each_cpu(cpu_pmu->reset, cpu_pmu, 1);
1120 }
1121 
1122 static const struct of_device_id cpu_pmu_of_device_ids[] = {
1123 	{.compatible = "andestech,nds32v3-pmu",
1124 	 .data = device_pmu_init},
1125 	{},
1126 };
1127 
cpu_pmu_device_probe(struct platform_device * pdev)1128 static int cpu_pmu_device_probe(struct platform_device *pdev)
1129 {
1130 	const struct of_device_id *of_id;
1131 	int (*init_fn)(struct nds32_pmu *nds32_pmu);
1132 	struct device_node *node = pdev->dev.of_node;
1133 	struct nds32_pmu *pmu;
1134 	int ret = -ENODEV;
1135 
1136 	if (cpu_pmu) {
1137 		pr_notice("[perf] attempt to register multiple PMU devices!\n");
1138 		return -ENOSPC;
1139 	}
1140 
1141 	pmu = kzalloc(sizeof(*pmu), GFP_KERNEL);
1142 	if (!pmu)
1143 		return -ENOMEM;
1144 
1145 	of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node);
1146 	if (node && of_id) {
1147 		init_fn = of_id->data;
1148 		ret = init_fn(pmu);
1149 	} else {
1150 		ret = probe_current_pmu(pmu);
1151 	}
1152 
1153 	if (ret) {
1154 		pr_notice("[perf] failed to probe PMU!\n");
1155 		goto out_free;
1156 	}
1157 
1158 	cpu_pmu = pmu;
1159 	cpu_pmu->plat_device = pdev;
1160 	cpu_pmu_init(cpu_pmu);
1161 	ret = nds32_pmu_register(cpu_pmu, PERF_TYPE_RAW);
1162 
1163 	if (!ret)
1164 		return 0;
1165 
1166 out_free:
1167 	pr_notice("[perf] failed to register PMU devices!\n");
1168 	kfree(pmu);
1169 	return ret;
1170 }
1171 
1172 static struct platform_driver cpu_pmu_driver = {
1173 	.driver = {
1174 		   .name = "nds32-pfm",
1175 		   .of_match_table = cpu_pmu_of_device_ids,
1176 		   },
1177 	.probe = cpu_pmu_device_probe,
1178 	.id_table = cpu_pmu_plat_device_ids,
1179 };
1180 
register_pmu_driver(void)1181 static int __init register_pmu_driver(void)
1182 {
1183 	int err = 0;
1184 
1185 	err = platform_driver_register(&cpu_pmu_driver);
1186 	if (err)
1187 		pr_notice("[perf] PMU initialization failed\n");
1188 	else
1189 		pr_notice("[perf] PMU initialization done\n");
1190 
1191 	return err;
1192 }
1193 
1194 device_initcall(register_pmu_driver);
1195 
1196 /*
1197  * References: arch/nds32/kernel/traps.c:__dump()
1198  * You will need to know the NDS ABI first.
1199  */
unwind_frame_kernel(struct stackframe * frame)1200 static int unwind_frame_kernel(struct stackframe *frame)
1201 {
1202 	int graph = 0;
1203 #ifdef CONFIG_FRAME_POINTER
1204 	/* 0x3 means misalignment */
1205 	if (!kstack_end((void *)frame->fp) &&
1206 	    !((unsigned long)frame->fp & 0x3) &&
1207 	    ((unsigned long)frame->fp >= TASK_SIZE)) {
1208 		/*
1209 		 *	The array index is based on the ABI, the below graph
1210 		 *	illustrate the reasons.
1211 		 *	Function call procedure: "smw" and "lmw" will always
1212 		 *	update SP and FP for you automatically.
1213 		 *
1214 		 *	Stack                                 Relative Address
1215 		 *	|  |                                          0
1216 		 *	----
1217 		 *	|LP| <-- SP(before smw)  <-- FP(after smw)   -1
1218 		 *	----
1219 		 *	|FP|                                         -2
1220 		 *	----
1221 		 *	|  | <-- SP(after smw)                       -3
1222 		 */
1223 		frame->lp = ((unsigned long *)frame->fp)[-1];
1224 		frame->fp = ((unsigned long *)frame->fp)[FP_OFFSET];
1225 		/* make sure CONFIG_FUNCTION_GRAPH_TRACER is turned on */
1226 		if (__kernel_text_address(frame->lp))
1227 			frame->lp = ftrace_graph_ret_addr
1228 						(NULL, &graph, frame->lp, NULL);
1229 
1230 		return 0;
1231 	} else {
1232 		return -EPERM;
1233 	}
1234 #else
1235 	/*
1236 	 * You can refer to arch/nds32/kernel/traps.c:__dump()
1237 	 * Treat "sp" as "fp", but the "sp" is one frame ahead of "fp".
1238 	 * And, the "sp" is not always correct.
1239 	 *
1240 	 *   Stack                                 Relative Address
1241 	 *   |  |                                          0
1242 	 *   ----
1243 	 *   |LP| <-- SP(before smw)                      -1
1244 	 *   ----
1245 	 *   |  | <-- SP(after smw)                       -2
1246 	 *   ----
1247 	 */
1248 	if (!kstack_end((void *)frame->sp)) {
1249 		frame->lp = ((unsigned long *)frame->sp)[1];
1250 		/* TODO: How to deal with the value in first
1251 		 * "sp" is not correct?
1252 		 */
1253 		if (__kernel_text_address(frame->lp))
1254 			frame->lp = ftrace_graph_ret_addr
1255 						(tsk, &graph, frame->lp, NULL);
1256 
1257 		frame->sp = ((unsigned long *)frame->sp) + 1;
1258 
1259 		return 0;
1260 	} else {
1261 		return -EPERM;
1262 	}
1263 #endif
1264 }
1265 
1266 static void notrace
walk_stackframe(struct stackframe * frame,int (* fn_record)(struct stackframe *,void *),void * data)1267 walk_stackframe(struct stackframe *frame,
1268 		int (*fn_record)(struct stackframe *, void *),
1269 		void *data)
1270 {
1271 	while (1) {
1272 		int ret;
1273 
1274 		if (fn_record(frame, data))
1275 			break;
1276 
1277 		ret = unwind_frame_kernel(frame);
1278 		if (ret < 0)
1279 			break;
1280 	}
1281 }
1282 
1283 /*
1284  * Gets called by walk_stackframe() for every stackframe. This will be called
1285  * whist unwinding the stackframe and is like a subroutine return so we use
1286  * the PC.
1287  */
callchain_trace(struct stackframe * fr,void * data)1288 static int callchain_trace(struct stackframe *fr, void *data)
1289 {
1290 	struct perf_callchain_entry_ctx *entry = data;
1291 
1292 	perf_callchain_store(entry, fr->lp);
1293 	return 0;
1294 }
1295 
1296 /*
1297  * Get the return address for a single stackframe and return a pointer to the
1298  * next frame tail.
1299  */
1300 static unsigned long
user_backtrace(struct perf_callchain_entry_ctx * entry,unsigned long fp)1301 user_backtrace(struct perf_callchain_entry_ctx *entry, unsigned long fp)
1302 {
1303 	struct frame_tail buftail;
1304 	unsigned long lp = 0;
1305 	unsigned long *user_frame_tail =
1306 		(unsigned long *)(fp - (unsigned long)sizeof(buftail));
1307 
1308 	/* Check accessibility of one struct frame_tail beyond */
1309 	if (!access_ok(user_frame_tail, sizeof(buftail)))
1310 		return 0;
1311 	if (__copy_from_user_inatomic
1312 		(&buftail, user_frame_tail, sizeof(buftail)))
1313 		return 0;
1314 
1315 	/*
1316 	 * Refer to unwind_frame_kernel() for more illurstration
1317 	 */
1318 	lp = buftail.stack_lp;  /* ((unsigned long *)fp)[-1] */
1319 	fp = buftail.stack_fp;  /* ((unsigned long *)fp)[FP_OFFSET] */
1320 	perf_callchain_store(entry, lp);
1321 	return fp;
1322 }
1323 
1324 static unsigned long
user_backtrace_opt_size(struct perf_callchain_entry_ctx * entry,unsigned long fp)1325 user_backtrace_opt_size(struct perf_callchain_entry_ctx *entry,
1326 			unsigned long fp)
1327 {
1328 	struct frame_tail_opt_size buftail;
1329 	unsigned long lp = 0;
1330 
1331 	unsigned long *user_frame_tail =
1332 		(unsigned long *)(fp - (unsigned long)sizeof(buftail));
1333 
1334 	/* Check accessibility of one struct frame_tail beyond */
1335 	if (!access_ok(user_frame_tail, sizeof(buftail)))
1336 		return 0;
1337 	if (__copy_from_user_inatomic
1338 		(&buftail, user_frame_tail, sizeof(buftail)))
1339 		return 0;
1340 
1341 	/*
1342 	 * Refer to unwind_frame_kernel() for more illurstration
1343 	 */
1344 	lp = buftail.stack_lp;  /* ((unsigned long *)fp)[-1] */
1345 	fp = buftail.stack_fp;  /* ((unsigned long *)fp)[FP_OFFSET] */
1346 
1347 	perf_callchain_store(entry, lp);
1348 	return fp;
1349 }
1350 
1351 /*
1352  * This will be called when the target is in user mode
1353  * This function will only be called when we use
1354  * "PERF_SAMPLE_CALLCHAIN" in
1355  * kernel/events/core.c:perf_prepare_sample()
1356  *
1357  * How to trigger perf_callchain_[user/kernel] :
1358  * $ perf record -e cpu-clock --call-graph fp ./program
1359  * $ perf report --call-graph
1360  */
1361 unsigned long leaf_fp;
1362 void
perf_callchain_user(struct perf_callchain_entry_ctx * entry,struct pt_regs * regs)1363 perf_callchain_user(struct perf_callchain_entry_ctx *entry,
1364 		    struct pt_regs *regs)
1365 {
1366 	unsigned long fp = 0;
1367 	unsigned long gp = 0;
1368 	unsigned long lp = 0;
1369 	unsigned long sp = 0;
1370 	unsigned long *user_frame_tail;
1371 
1372 	leaf_fp = 0;
1373 
1374 	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1375 		/* We don't support guest os callchain now */
1376 		return;
1377 	}
1378 
1379 	perf_callchain_store(entry, regs->ipc);
1380 	fp = regs->fp;
1381 	gp = regs->gp;
1382 	lp = regs->lp;
1383 	sp = regs->sp;
1384 	if (entry->nr < PERF_MAX_STACK_DEPTH &&
1385 	    (unsigned long)fp && !((unsigned long)fp & 0x7) && fp > sp) {
1386 		user_frame_tail =
1387 			(unsigned long *)(fp - (unsigned long)sizeof(fp));
1388 
1389 		if (!access_ok(user_frame_tail, sizeof(fp)))
1390 			return;
1391 
1392 		if (__copy_from_user_inatomic
1393 			(&leaf_fp, user_frame_tail, sizeof(fp)))
1394 			return;
1395 
1396 		if (leaf_fp == lp) {
1397 			/*
1398 			 * Maybe this is non leaf function
1399 			 * with optimize for size,
1400 			 * or maybe this is the function
1401 			 * with optimize for size
1402 			 */
1403 			struct frame_tail buftail;
1404 
1405 			user_frame_tail =
1406 				(unsigned long *)(fp -
1407 					(unsigned long)sizeof(buftail));
1408 
1409 			if (!access_ok(user_frame_tail, sizeof(buftail)))
1410 				return;
1411 
1412 			if (__copy_from_user_inatomic
1413 				(&buftail, user_frame_tail, sizeof(buftail)))
1414 				return;
1415 
1416 			if (buftail.stack_fp == gp) {
1417 				/* non leaf function with optimize
1418 				 * for size condition
1419 				 */
1420 				struct frame_tail_opt_size buftail_opt_size;
1421 
1422 				user_frame_tail =
1423 					(unsigned long *)(fp - (unsigned long)
1424 						sizeof(buftail_opt_size));
1425 
1426 				if (!access_ok(user_frame_tail,
1427 					       sizeof(buftail_opt_size)))
1428 					return;
1429 
1430 				if (__copy_from_user_inatomic
1431 				   (&buftail_opt_size, user_frame_tail,
1432 				   sizeof(buftail_opt_size)))
1433 					return;
1434 
1435 				perf_callchain_store(entry, lp);
1436 				fp = buftail_opt_size.stack_fp;
1437 
1438 				while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
1439 				       (unsigned long)fp &&
1440 						!((unsigned long)fp & 0x7) &&
1441 						fp > sp) {
1442 					sp = fp;
1443 					fp = user_backtrace_opt_size(entry, fp);
1444 				}
1445 
1446 			} else {
1447 				/* this is the function
1448 				 * without optimize for size
1449 				 */
1450 				fp = buftail.stack_fp;
1451 				perf_callchain_store(entry, lp);
1452 				while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
1453 				       (unsigned long)fp &&
1454 						!((unsigned long)fp & 0x7) &&
1455 						fp > sp) {
1456 					sp = fp;
1457 					fp = user_backtrace(entry, fp);
1458 				}
1459 			}
1460 		} else {
1461 			/* this is leaf function */
1462 			fp = leaf_fp;
1463 			perf_callchain_store(entry, lp);
1464 
1465 			/* previous function callcahin  */
1466 			while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
1467 			       (unsigned long)fp &&
1468 				   !((unsigned long)fp & 0x7) && fp > sp) {
1469 				sp = fp;
1470 				fp = user_backtrace(entry, fp);
1471 			}
1472 		}
1473 		return;
1474 	}
1475 }
1476 
1477 /* This will be called when the target is in kernel mode */
1478 void
perf_callchain_kernel(struct perf_callchain_entry_ctx * entry,struct pt_regs * regs)1479 perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
1480 		      struct pt_regs *regs)
1481 {
1482 	struct stackframe fr;
1483 
1484 	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1485 		/* We don't support guest os callchain now */
1486 		return;
1487 	}
1488 	fr.fp = regs->fp;
1489 	fr.lp = regs->lp;
1490 	fr.sp = regs->sp;
1491 	walk_stackframe(&fr, callchain_trace, entry);
1492 }
1493 
perf_instruction_pointer(struct pt_regs * regs)1494 unsigned long perf_instruction_pointer(struct pt_regs *regs)
1495 {
1496 	/* However, NDS32 does not support virtualization */
1497 	if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
1498 		return perf_guest_cbs->get_guest_ip();
1499 
1500 	return instruction_pointer(regs);
1501 }
1502 
perf_misc_flags(struct pt_regs * regs)1503 unsigned long perf_misc_flags(struct pt_regs *regs)
1504 {
1505 	int misc = 0;
1506 
1507 	/* However, NDS32 does not support virtualization */
1508 	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1509 		if (perf_guest_cbs->is_user_mode())
1510 			misc |= PERF_RECORD_MISC_GUEST_USER;
1511 		else
1512 			misc |= PERF_RECORD_MISC_GUEST_KERNEL;
1513 	} else {
1514 		if (user_mode(regs))
1515 			misc |= PERF_RECORD_MISC_USER;
1516 		else
1517 			misc |= PERF_RECORD_MISC_KERNEL;
1518 	}
1519 
1520 	return misc;
1521 }
1522