1 /* SPDX-License-Identifier: GPL-2.0 */
2 /* Copyright (C) 2008-2018 Andes Technology Corporation */
3 
4 #ifndef __ASM_PMU_H
5 #define __ASM_PMU_H
6 
7 #include <linux/interrupt.h>
8 #include <linux/perf_event.h>
9 #include <asm/unistd.h>
10 #include <asm/bitfield.h>
11 
12 /* Has special meaning for perf core implementation */
13 #define HW_OP_UNSUPPORTED		0x0
14 #define C(_x)				PERF_COUNT_HW_CACHE_##_x
15 #define CACHE_OP_UNSUPPORTED		0x0
16 
17 /* Enough for both software and hardware defined events */
18 #define SOFTWARE_EVENT_MASK		0xFF
19 
20 #define PFM_OFFSET_MAGIC_0		2	/* DO NOT START FROM 0 */
21 #define PFM_OFFSET_MAGIC_1		(PFM_OFFSET_MAGIC_0 + 36)
22 #define PFM_OFFSET_MAGIC_2		(PFM_OFFSET_MAGIC_1 + 36)
23 
24 enum { PFMC0, PFMC1, PFMC2, MAX_COUNTERS };
25 
26 u32 PFM_CTL_OVF[3] = { PFM_CTL_mskOVF0, PFM_CTL_mskOVF1,
27 		       PFM_CTL_mskOVF2 };
28 u32 PFM_CTL_EN[3] = { PFM_CTL_mskEN0, PFM_CTL_mskEN1,
29 		      PFM_CTL_mskEN2 };
30 u32 PFM_CTL_OFFSEL[3] = { PFM_CTL_offSEL0, PFM_CTL_offSEL1,
31 			  PFM_CTL_offSEL2 };
32 u32 PFM_CTL_IE[3] = { PFM_CTL_mskIE0, PFM_CTL_mskIE1, PFM_CTL_mskIE2 };
33 u32 PFM_CTL_KS[3] = { PFM_CTL_mskKS0, PFM_CTL_mskKS1, PFM_CTL_mskKS2 };
34 u32 PFM_CTL_KU[3] = { PFM_CTL_mskKU0, PFM_CTL_mskKU1, PFM_CTL_mskKU2 };
35 u32 PFM_CTL_SEL[3] = { PFM_CTL_mskSEL0, PFM_CTL_mskSEL1, PFM_CTL_mskSEL2 };
36 /*
37  * Perf Events' indices
38  */
39 #define NDS32_IDX_CYCLE_COUNTER			0
40 #define NDS32_IDX_COUNTER0			1
41 #define NDS32_IDX_COUNTER1			2
42 
43 /* The events for a given PMU register set. */
44 struct pmu_hw_events {
45 	/*
46 	 * The events that are active on the PMU for the given index.
47 	 */
48 	struct perf_event *events[MAX_COUNTERS];
49 
50 	/*
51 	 * A 1 bit for an index indicates that the counter is being used for
52 	 * an event. A 0 means that the counter can be used.
53 	 */
54 	unsigned long used_mask[BITS_TO_LONGS(MAX_COUNTERS)];
55 
56 	/*
57 	 * Hardware lock to serialize accesses to PMU registers. Needed for the
58 	 * read/modify/write sequences.
59 	 */
60 	raw_spinlock_t pmu_lock;
61 };
62 
63 struct nds32_pmu {
64 	struct pmu pmu;
65 	cpumask_t active_irqs;
66 	char *name;
67 	 irqreturn_t (*handle_irq)(int irq_num, void *dev);
68 	void (*enable)(struct perf_event *event);
69 	void (*disable)(struct perf_event *event);
70 	int (*get_event_idx)(struct pmu_hw_events *hw_events,
71 			     struct perf_event *event);
72 	int (*set_event_filter)(struct hw_perf_event *evt,
73 				struct perf_event_attr *attr);
74 	u32 (*read_counter)(struct perf_event *event);
75 	void (*write_counter)(struct perf_event *event, u32 val);
76 	void (*start)(struct nds32_pmu *nds32_pmu);
77 	void (*stop)(struct nds32_pmu *nds32_pmu);
78 	void (*reset)(void *data);
79 	int (*request_irq)(struct nds32_pmu *nds32_pmu, irq_handler_t handler);
80 	void (*free_irq)(struct nds32_pmu *nds32_pmu);
81 	int (*map_event)(struct perf_event *event);
82 	int num_events;
83 	atomic_t active_events;
84 	u64 max_period;
85 	struct platform_device *plat_device;
86 	struct pmu_hw_events *(*get_hw_events)(void);
87 };
88 
89 #define to_nds32_pmu(p)			(container_of(p, struct nds32_pmu, pmu))
90 
91 int nds32_pmu_register(struct nds32_pmu *nds32_pmu, int type);
92 
93 u64 nds32_pmu_event_update(struct perf_event *event);
94 
95 int nds32_pmu_event_set_period(struct perf_event *event);
96 
97 /*
98  * Common NDS32 SPAv3 event types
99  *
100  * Note: An implementation may not be able to count all of these events
101  * but the encodings are considered to be `reserved' in the case that
102  * they are not available.
103  *
104  * SEL_TOTAL_CYCLES will add an offset is due to ZERO is defined as
105  * NOT_SUPPORTED EVENT mapping in generic perf code.
106  * You will need to deal it in the event writing implementation.
107  */
108 enum spav3_counter_0_perf_types {
109 	SPAV3_0_SEL_BASE = -1 + PFM_OFFSET_MAGIC_0,	/* counting symbol */
110 	SPAV3_0_SEL_TOTAL_CYCLES = 0 + PFM_OFFSET_MAGIC_0,
111 	SPAV3_0_SEL_COMPLETED_INSTRUCTION = 1 + PFM_OFFSET_MAGIC_0,
112 	SPAV3_0_SEL_LAST	/* counting symbol */
113 };
114 
115 enum spav3_counter_1_perf_types {
116 	SPAV3_1_SEL_BASE = -1 + PFM_OFFSET_MAGIC_1,	/* counting symbol */
117 	SPAV3_1_SEL_TOTAL_CYCLES = 0 + PFM_OFFSET_MAGIC_1,
118 	SPAV3_1_SEL_COMPLETED_INSTRUCTION = 1 + PFM_OFFSET_MAGIC_1,
119 	SPAV3_1_SEL_CONDITIONAL_BRANCH = 2 + PFM_OFFSET_MAGIC_1,
120 	SPAV3_1_SEL_TAKEN_CONDITIONAL_BRANCH = 3 + PFM_OFFSET_MAGIC_1,
121 	SPAV3_1_SEL_PREFETCH_INSTRUCTION = 4 + PFM_OFFSET_MAGIC_1,
122 	SPAV3_1_SEL_RET_INST = 5 + PFM_OFFSET_MAGIC_1,
123 	SPAV3_1_SEL_JR_INST = 6 + PFM_OFFSET_MAGIC_1,
124 	SPAV3_1_SEL_JAL_JRAL_INST = 7 + PFM_OFFSET_MAGIC_1,
125 	SPAV3_1_SEL_NOP_INST = 8 + PFM_OFFSET_MAGIC_1,
126 	SPAV3_1_SEL_SCW_INST = 9 + PFM_OFFSET_MAGIC_1,
127 	SPAV3_1_SEL_ISB_DSB_INST = 10 + PFM_OFFSET_MAGIC_1,
128 	SPAV3_1_SEL_CCTL_INST = 11 + PFM_OFFSET_MAGIC_1,
129 	SPAV3_1_SEL_TAKEN_INTERRUPTS = 12 + PFM_OFFSET_MAGIC_1,
130 	SPAV3_1_SEL_LOADS_COMPLETED = 13 + PFM_OFFSET_MAGIC_1,
131 	SPAV3_1_SEL_UITLB_ACCESS = 14 + PFM_OFFSET_MAGIC_1,
132 	SPAV3_1_SEL_UDTLB_ACCESS = 15 + PFM_OFFSET_MAGIC_1,
133 	SPAV3_1_SEL_MTLB_ACCESS = 16 + PFM_OFFSET_MAGIC_1,
134 	SPAV3_1_SEL_CODE_CACHE_ACCESS = 17 + PFM_OFFSET_MAGIC_1,
135 	SPAV3_1_SEL_DATA_DEPENDENCY_STALL_CYCLES = 18 + PFM_OFFSET_MAGIC_1,
136 	SPAV3_1_SEL_DATA_CACHE_MISS_STALL_CYCLES = 19 + PFM_OFFSET_MAGIC_1,
137 	SPAV3_1_SEL_DATA_CACHE_ACCESS = 20 + PFM_OFFSET_MAGIC_1,
138 	SPAV3_1_SEL_DATA_CACHE_MISS = 21 + PFM_OFFSET_MAGIC_1,
139 	SPAV3_1_SEL_LOAD_DATA_CACHE_ACCESS = 22 + PFM_OFFSET_MAGIC_1,
140 	SPAV3_1_SEL_STORE_DATA_CACHE_ACCESS = 23 + PFM_OFFSET_MAGIC_1,
141 	SPAV3_1_SEL_ILM_ACCESS = 24 + PFM_OFFSET_MAGIC_1,
142 	SPAV3_1_SEL_LSU_BIU_CYCLES = 25 + PFM_OFFSET_MAGIC_1,
143 	SPAV3_1_SEL_HPTWK_BIU_CYCLES = 26 + PFM_OFFSET_MAGIC_1,
144 	SPAV3_1_SEL_DMA_BIU_CYCLES = 27 + PFM_OFFSET_MAGIC_1,
145 	SPAV3_1_SEL_CODE_CACHE_FILL_BIU_CYCLES = 28 + PFM_OFFSET_MAGIC_1,
146 	SPAV3_1_SEL_LEGAL_UNALIGN_DCACHE_ACCESS = 29 + PFM_OFFSET_MAGIC_1,
147 	SPAV3_1_SEL_PUSH25 = 30 + PFM_OFFSET_MAGIC_1,
148 	SPAV3_1_SEL_SYSCALLS_INST = 31 + PFM_OFFSET_MAGIC_1,
149 	SPAV3_1_SEL_LAST	/* counting symbol */
150 };
151 
152 enum spav3_counter_2_perf_types {
153 	SPAV3_2_SEL_BASE = -1 + PFM_OFFSET_MAGIC_2,	/* counting symbol */
154 	SPAV3_2_SEL_TOTAL_CYCLES = 0 + PFM_OFFSET_MAGIC_2,
155 	SPAV3_2_SEL_COMPLETED_INSTRUCTION = 1 + PFM_OFFSET_MAGIC_2,
156 	SPAV3_2_SEL_CONDITIONAL_BRANCH_MISPREDICT = 2 + PFM_OFFSET_MAGIC_2,
157 	SPAV3_2_SEL_TAKEN_CONDITIONAL_BRANCH_MISPREDICT =
158 	    3 + PFM_OFFSET_MAGIC_2,
159 	SPAV3_2_SEL_PREFETCH_INSTRUCTION_CACHE_HIT = 4 + PFM_OFFSET_MAGIC_2,
160 	SPAV3_1_SEL_RET_MISPREDICT = 5 + PFM_OFFSET_MAGIC_2,
161 	SPAV3_1_SEL_IMMEDIATE_J_INST = 6 + PFM_OFFSET_MAGIC_2,
162 	SPAV3_1_SEL_MULTIPLY_INST = 7 + PFM_OFFSET_MAGIC_2,
163 	SPAV3_1_SEL_16_BIT_INST = 8 + PFM_OFFSET_MAGIC_2,
164 	SPAV3_1_SEL_FAILED_SCW_INST = 9 + PFM_OFFSET_MAGIC_2,
165 	SPAV3_1_SEL_LD_AFTER_ST_CONFLICT_REPLAYS = 10 + PFM_OFFSET_MAGIC_2,
166 	SPAV3_1_SEL_TAKEN_EXCEPTIONS = 12 + PFM_OFFSET_MAGIC_2,
167 	SPAV3_1_SEL_STORES_COMPLETED = 13 + PFM_OFFSET_MAGIC_2,
168 	SPAV3_2_SEL_UITLB_MISS = 14 + PFM_OFFSET_MAGIC_2,
169 	SPAV3_2_SEL_UDTLB_MISS = 15 + PFM_OFFSET_MAGIC_2,
170 	SPAV3_2_SEL_MTLB_MISS = 16 + PFM_OFFSET_MAGIC_2,
171 	SPAV3_2_SEL_CODE_CACHE_MISS = 17 + PFM_OFFSET_MAGIC_2,
172 	SPAV3_1_SEL_EMPTY_INST_QUEUE_STALL_CYCLES = 18 + PFM_OFFSET_MAGIC_2,
173 	SPAV3_1_SEL_DATA_WRITE_BACK = 19 + PFM_OFFSET_MAGIC_2,
174 	SPAV3_2_SEL_DATA_CACHE_MISS = 21 + PFM_OFFSET_MAGIC_2,
175 	SPAV3_2_SEL_LOAD_DATA_CACHE_MISS = 22 + PFM_OFFSET_MAGIC_2,
176 	SPAV3_2_SEL_STORE_DATA_CACHE_MISS = 23 + PFM_OFFSET_MAGIC_2,
177 	SPAV3_1_SEL_DLM_ACCESS = 24 + PFM_OFFSET_MAGIC_2,
178 	SPAV3_1_SEL_LSU_BIU_REQUEST = 25 + PFM_OFFSET_MAGIC_2,
179 	SPAV3_1_SEL_HPTWK_BIU_REQUEST = 26 + PFM_OFFSET_MAGIC_2,
180 	SPAV3_1_SEL_DMA_BIU_REQUEST = 27 + PFM_OFFSET_MAGIC_2,
181 	SPAV3_1_SEL_CODE_CACHE_FILL_BIU_REQUEST = 28 + PFM_OFFSET_MAGIC_2,
182 	SPAV3_1_SEL_EXTERNAL_EVENTS = 29 + PFM_OFFSET_MAGIC_2,
183 	SPAV3_1_SEL_POP25 = 30 + PFM_OFFSET_MAGIC_2,
184 	SPAV3_2_SEL_LAST	/* counting symbol */
185 };
186 
187 /* Get converted event counter index */
get_converted_event_idx(unsigned long event)188 static inline int get_converted_event_idx(unsigned long event)
189 {
190 	int idx;
191 
192 	if ((event) > SPAV3_0_SEL_BASE && event < SPAV3_0_SEL_LAST) {
193 		idx = 0;
194 	} else if ((event) > SPAV3_1_SEL_BASE && event < SPAV3_1_SEL_LAST) {
195 		idx = 1;
196 	} else if ((event) > SPAV3_2_SEL_BASE && event < SPAV3_2_SEL_LAST) {
197 		idx = 2;
198 	} else {
199 		pr_err("GET_CONVERTED_EVENT_IDX PFM counter range error\n");
200 		return -EPERM;
201 	}
202 
203 	return idx;
204 }
205 
206 /* Get converted hardware event number */
get_converted_evet_hw_num(u32 event)207 static inline u32 get_converted_evet_hw_num(u32 event)
208 {
209 	if (event > SPAV3_0_SEL_BASE && event < SPAV3_0_SEL_LAST)
210 		event -= PFM_OFFSET_MAGIC_0;
211 	else if (event > SPAV3_1_SEL_BASE && event < SPAV3_1_SEL_LAST)
212 		event -= PFM_OFFSET_MAGIC_1;
213 	else if (event > SPAV3_2_SEL_BASE && event < SPAV3_2_SEL_LAST)
214 		event -= PFM_OFFSET_MAGIC_2;
215 	else if (event != 0)
216 		pr_err("GET_CONVERTED_EVENT_HW_NUM PFM counter range error\n");
217 
218 	return event;
219 }
220 
221 /*
222  * NDS32 HW events mapping
223  *
224  * The hardware events that we support. We do support cache operations but
225  * we have harvard caches and no way to combine instruction and data
226  * accesses/misses in hardware.
227  */
228 static const unsigned int nds32_pfm_perf_map[PERF_COUNT_HW_MAX] = {
229 	[PERF_COUNT_HW_CPU_CYCLES] = SPAV3_0_SEL_TOTAL_CYCLES,
230 	[PERF_COUNT_HW_INSTRUCTIONS] = SPAV3_1_SEL_COMPLETED_INSTRUCTION,
231 	[PERF_COUNT_HW_CACHE_REFERENCES] = SPAV3_1_SEL_DATA_CACHE_ACCESS,
232 	[PERF_COUNT_HW_CACHE_MISSES] = SPAV3_2_SEL_DATA_CACHE_MISS,
233 	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = HW_OP_UNSUPPORTED,
234 	[PERF_COUNT_HW_BRANCH_MISSES] = HW_OP_UNSUPPORTED,
235 	[PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
236 	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = HW_OP_UNSUPPORTED,
237 	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = HW_OP_UNSUPPORTED,
238 	[PERF_COUNT_HW_REF_CPU_CYCLES] = HW_OP_UNSUPPORTED
239 };
240 
241 static const unsigned int nds32_pfm_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
242 	[PERF_COUNT_HW_CACHE_OP_MAX]
243 	[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
244 	[C(L1D)] = {
245 		    [C(OP_READ)] = {
246 				    [C(RESULT_ACCESS)] =
247 				    SPAV3_1_SEL_LOAD_DATA_CACHE_ACCESS,
248 				    [C(RESULT_MISS)] =
249 				    SPAV3_2_SEL_LOAD_DATA_CACHE_MISS,
250 				    },
251 		    [C(OP_WRITE)] = {
252 				     [C(RESULT_ACCESS)] =
253 				     SPAV3_1_SEL_STORE_DATA_CACHE_ACCESS,
254 				     [C(RESULT_MISS)] =
255 				     SPAV3_2_SEL_STORE_DATA_CACHE_MISS,
256 				     },
257 		    [C(OP_PREFETCH)] = {
258 					[C(RESULT_ACCESS)] =
259 						CACHE_OP_UNSUPPORTED,
260 					[C(RESULT_MISS)] =
261 						CACHE_OP_UNSUPPORTED,
262 					},
263 		    },
264 	[C(L1I)] = {
265 		    [C(OP_READ)] = {
266 				    [C(RESULT_ACCESS)] =
267 				    SPAV3_1_SEL_CODE_CACHE_ACCESS,
268 				    [C(RESULT_MISS)] =
269 				    SPAV3_2_SEL_CODE_CACHE_MISS,
270 				    },
271 		    [C(OP_WRITE)] = {
272 				     [C(RESULT_ACCESS)] =
273 				     SPAV3_1_SEL_CODE_CACHE_ACCESS,
274 				     [C(RESULT_MISS)] =
275 				     SPAV3_2_SEL_CODE_CACHE_MISS,
276 				     },
277 		    [C(OP_PREFETCH)] = {
278 					[C(RESULT_ACCESS)] =
279 					CACHE_OP_UNSUPPORTED,
280 					[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
281 					},
282 		    },
283 	/* TODO: L2CC */
284 	[C(LL)] = {
285 		   [C(OP_READ)] = {
286 				   [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
287 				   [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
288 				   },
289 		   [C(OP_WRITE)] = {
290 				    [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
291 				    [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
292 				    },
293 		   [C(OP_PREFETCH)] = {
294 				       [C(RESULT_ACCESS)] =
295 				       CACHE_OP_UNSUPPORTED,
296 				       [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
297 				       },
298 		   },
299 	/* NDS32 PMU does not support TLB read/write hit/miss,
300 	 * However, it can count access/miss, which mixed with read and write.
301 	 * Therefore, only READ counter will use it.
302 	 * We do as possible as we can.
303 	 */
304 	[C(DTLB)] = {
305 		     [C(OP_READ)] = {
306 				     [C(RESULT_ACCESS)] =
307 					SPAV3_1_SEL_UDTLB_ACCESS,
308 				     [C(RESULT_MISS)] =
309 					SPAV3_2_SEL_UDTLB_MISS,
310 				     },
311 		     [C(OP_WRITE)] = {
312 				      [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
313 				      [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
314 				      },
315 		     [C(OP_PREFETCH)] = {
316 					 [C(RESULT_ACCESS)] =
317 					 CACHE_OP_UNSUPPORTED,
318 					 [C(RESULT_MISS)] =
319 					 CACHE_OP_UNSUPPORTED,
320 					 },
321 		     },
322 	[C(ITLB)] = {
323 		     [C(OP_READ)] = {
324 				     [C(RESULT_ACCESS)] =
325 					SPAV3_1_SEL_UITLB_ACCESS,
326 				     [C(RESULT_MISS)] =
327 					SPAV3_2_SEL_UITLB_MISS,
328 				     },
329 		     [C(OP_WRITE)] = {
330 				      [C(RESULT_ACCESS)] =
331 					CACHE_OP_UNSUPPORTED,
332 				      [C(RESULT_MISS)] =
333 					CACHE_OP_UNSUPPORTED,
334 				      },
335 		     [C(OP_PREFETCH)] = {
336 					 [C(RESULT_ACCESS)] =
337 						CACHE_OP_UNSUPPORTED,
338 					 [C(RESULT_MISS)] =
339 						CACHE_OP_UNSUPPORTED,
340 					 },
341 		     },
342 	[C(BPU)] = {		/* What is BPU? */
343 		    [C(OP_READ)] = {
344 				    [C(RESULT_ACCESS)] =
345 					CACHE_OP_UNSUPPORTED,
346 				    [C(RESULT_MISS)] =
347 					CACHE_OP_UNSUPPORTED,
348 				    },
349 		    [C(OP_WRITE)] = {
350 				     [C(RESULT_ACCESS)] =
351 					CACHE_OP_UNSUPPORTED,
352 				     [C(RESULT_MISS)] =
353 					CACHE_OP_UNSUPPORTED,
354 				     },
355 		    [C(OP_PREFETCH)] = {
356 					[C(RESULT_ACCESS)] =
357 						CACHE_OP_UNSUPPORTED,
358 					[C(RESULT_MISS)] =
359 						CACHE_OP_UNSUPPORTED,
360 					},
361 		    },
362 	[C(NODE)] = {		/* What is NODE? */
363 		     [C(OP_READ)] = {
364 				     [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
365 				     [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
366 				     },
367 		     [C(OP_WRITE)] = {
368 				      [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
369 				      [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
370 				      },
371 		     [C(OP_PREFETCH)] = {
372 					 [C(RESULT_ACCESS)] =
373 						CACHE_OP_UNSUPPORTED,
374 					 [C(RESULT_MISS)] =
375 						CACHE_OP_UNSUPPORTED,
376 					 },
377 		     },
378 };
379 
380 int nds32_pmu_map_event(struct perf_event *event,
381 			const unsigned int (*event_map)[PERF_COUNT_HW_MAX],
382 			const unsigned int (*cache_map)[PERF_COUNT_HW_CACHE_MAX]
383 			[PERF_COUNT_HW_CACHE_OP_MAX]
384 			[PERF_COUNT_HW_CACHE_RESULT_MAX], u32 raw_event_mask);
385 
386 #endif /* __ASM_PMU_H */
387