1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * dlfilter-show-cycles.c: Print the number of cycles at the start of each line
4  * Copyright (c) 2021, Intel Corporation.
5  */
6 #include <perf/perf_dlfilter.h>
7 #include <string.h>
8 #include <stdio.h>
9 
10 #define MAX_CPU 4096
11 
12 enum {
13 	INSTR_CYC,
14 	BRNCH_CYC,
15 	OTHER_CYC,
16 	MAX_ENTRY
17 };
18 
19 static __u64 cycles[MAX_CPU][MAX_ENTRY];
20 static __u64 cycles_rpt[MAX_CPU][MAX_ENTRY];
21 
22 #define BITS		16
23 #define TABLESZ		(1 << BITS)
24 #define TABLEMAX	(TABLESZ / 2)
25 #define MASK		(TABLESZ - 1)
26 
27 static struct entry {
28 	__u32 used;
29 	__s32 tid;
30 	__u64 cycles[MAX_ENTRY];
31 	__u64 cycles_rpt[MAX_ENTRY];
32 } table[TABLESZ];
33 
34 static int tid_cnt;
35 
36 static int event_entry(const char *event)
37 {
38 	if (!event)
39 		return OTHER_CYC;
40 	if (!strncmp(event, "instructions", 12))
41 		return INSTR_CYC;
42 	if (!strncmp(event, "branches", 8))
43 		return BRNCH_CYC;
44 	return OTHER_CYC;
45 }
46 
47 static struct entry *find_entry(__s32 tid)
48 {
49 	__u32 pos = tid & MASK;
50 	struct entry *e;
51 
52 	e = &table[pos];
53 	while (e->used) {
54 		if (e->tid == tid)
55 			return e;
56 		if (++pos == TABLESZ)
57 			pos = 0;
58 		e = &table[pos];
59 	}
60 
61 	if (tid_cnt >= TABLEMAX) {
62 		fprintf(stderr, "Too many threads\n");
63 		return NULL;
64 	}
65 
66 	tid_cnt += 1;
67 	e->used = 1;
68 	e->tid = tid;
69 	return e;
70 }
71 
72 static void add_entry(__s32 tid, int pos, __u64 cnt)
73 {
74 	struct entry *e = find_entry(tid);
75 
76 	if (e)
77 		e->cycles[pos] += cnt;
78 }
79 
80 int filter_event_early(void *data, const struct perf_dlfilter_sample *sample, void *ctx)
81 {
82 	__s32 cpu = sample->cpu;
83 	__s32 tid = sample->tid;
84 	int pos;
85 
86 	if (!sample->cyc_cnt)
87 		return 0;
88 
89 	pos = event_entry(sample->event);
90 
91 	if (cpu >= 0 && cpu < MAX_CPU)
92 		cycles[cpu][pos] += sample->cyc_cnt;
93 	else if (tid != -1)
94 		add_entry(tid, pos, sample->cyc_cnt);
95 	return 0;
96 }
97 
98 static void print_vals(__u64 cycles, __u64 delta)
99 {
100 	if (delta)
101 		printf("%10llu %10llu ", (unsigned long long)cycles, (unsigned long long)delta);
102 	else
103 		printf("%10llu %10s ", (unsigned long long)cycles, "");
104 }
105 
106 int filter_event(void *data, const struct perf_dlfilter_sample *sample, void *ctx)
107 {
108 	__s32 cpu = sample->cpu;
109 	__s32 tid = sample->tid;
110 	int pos;
111 
112 	pos = event_entry(sample->event);
113 
114 	if (cpu >= 0 && cpu < MAX_CPU) {
115 		print_vals(cycles[cpu][pos], cycles[cpu][pos] - cycles_rpt[cpu][pos]);
116 		cycles_rpt[cpu][pos] = cycles[cpu][pos];
117 		return 0;
118 	}
119 
120 	if (tid != -1) {
121 		struct entry *e = find_entry(tid);
122 
123 		if (e) {
124 			print_vals(e->cycles[pos], e->cycles[pos] - e->cycles_rpt[pos]);
125 			e->cycles_rpt[pos] = e->cycles[pos];
126 			return 0;
127 		}
128 	}
129 
130 	printf("%22s", "");
131 	return 0;
132 }
133 
134 const char *filter_description(const char **long_description)
135 {
136 	static char *long_desc = "Cycle counts are accumulated per CPU (or "
137 		"per thread if CPU is not recorded) from IPC information, and "
138 		"printed together with the change since the last print, at the "
139 		"start of each line. Separate counts are kept for branches, "
140 		"instructions or other events.";
141 
142 	*long_description = long_desc;
143 	return "Print the number of cycles at the start of each line";
144 }
145