1libperf-sampling(7)
2===================
3
4NAME
5----
6libperf-sampling - sampling interface
7
8
9DESCRIPTION
10-----------
11The sampling interface provides API to meassure and get count for specific perf events.
12
13The following test tries to explain count on `sampling.c` example.
14
15It is by no means complete guide to sampling, but shows libperf basic API for sampling.
16
17The `sampling.c` comes with libbperf package and can be compiled and run like:
18
19[source,bash]
20--
21$ gcc -o sampling sampling.c -lperf
22$ sudo ./sampling
23cpu   0, pid      0, tid      0, ip     ffffffffad06c4e6, period                    1
24cpu   0, pid   4465, tid   4469, ip     ffffffffad118748, period             18322959
25cpu   0, pid      0, tid      0, ip     ffffffffad115722, period             33544846
26cpu   0, pid   4465, tid   4470, ip         7f84fe0cdad6, period             23687474
27cpu   0, pid      0, tid      0, ip     ffffffffad9e0349, period             34255790
28cpu   0, pid   4465, tid   4469, ip     ffffffffad136581, period             38664069
29cpu   0, pid      0, tid      0, ip     ffffffffad9e55e2, period             21922384
30cpu   0, pid   4465, tid   4470, ip         7f84fe0ebebf, period             17655175
31...
32--
33
34It requires root access, because it uses hardware cycles event.
35
36The `sampling.c` example profiles/samples all CPUs with hardware cycles, in a nutshel it:
37
38- creates events
39- adds them to the event list
40- opens and enables events through the event list
41- sleeps for 3 seconds
42- disables events
43- reads and displays recorded samples
44- destroys the event list
45
46The first thing you need to do before using libperf is to call init function:
47
48[source,c]
49--
50 12 static int libperf_print(enum libperf_print_level level,
51 13                          const char *fmt, va_list ap)
52 14 {
53 15         return vfprintf(stderr, fmt, ap);
54 16 }
55
56 23 int main(int argc, char **argv)
57 24 {
58 ...
59 40         libperf_init(libperf_print);
60--
61
62It will setup the library and sets function for debug output from library.
63
64The `libperf_print` callback will receive any message with its debug level,
65defined as:
66
67[source,c]
68--
69enum libperf_print_level {
70        LIBPERF_ERR,
71        LIBPERF_WARN,
72        LIBPERF_INFO,
73        LIBPERF_DEBUG,
74        LIBPERF_DEBUG2,
75        LIBPERF_DEBUG3,
76};
77--
78
79Once the setup is complete we start by defining cycles event using the `struct perf_event_attr`:
80
81[source,c]
82--
83 29         struct perf_event_attr attr = {
84 30                 .type        = PERF_TYPE_HARDWARE,
85 31                 .config      = PERF_COUNT_HW_CPU_CYCLES,
86 32                 .disabled    = 1,
87 33                 .freq        = 1,
88 34                 .sample_freq = 10,
89 35                 .sample_type = PERF_SAMPLE_IP|PERF_SAMPLE_TID|PERF_SAMPLE_CPU|PERF_SAMPLE_PERIOD,
90 36         };
91--
92
93Next step is to prepare cpus map.
94
95In this case we will monitor all the available CPUs:
96
97[source,c]
98--
99 42         cpus = perf_cpu_map__new(NULL);
100 43         if (!cpus) {
101 44                 fprintf(stderr, "failed to create cpus\n");
102 45                 return -1;
103 46         }
104--
105
106Now we create libperf's event list, which will serve as holder for the cycles event:
107
108[source,c]
109--
110 48         evlist = perf_evlist__new();
111 49         if (!evlist) {
112 50                 fprintf(stderr, "failed to create evlist\n");
113 51                 goto out_cpus;
114 52         }
115--
116
117We create libperf's event for the cycles attribute we defined earlier and add it to the list:
118
119[source,c]
120--
121 54         evsel = perf_evsel__new(&attr);
122 55         if (!evsel) {
123 56                 fprintf(stderr, "failed to create cycles\n");
124 57                 goto out_cpus;
125 58         }
126 59
127 60         perf_evlist__add(evlist, evsel);
128--
129
130Configure event list with the cpus map and open event:
131
132[source,c]
133--
134 62         perf_evlist__set_maps(evlist, cpus, NULL);
135 63
136 64         err = perf_evlist__open(evlist);
137 65         if (err) {
138 66                 fprintf(stderr, "failed to open evlist\n");
139 67                 goto out_evlist;
140 68         }
141--
142
143Once the events list is open, we can create memory maps AKA perf ring buffers:
144
145[source,c]
146--
147 70         err = perf_evlist__mmap(evlist, 4);
148 71         if (err) {
149 72                 fprintf(stderr, "failed to mmap evlist\n");
150 73                 goto out_evlist;
151 74         }
152--
153
154The event is created as disabled (note the `disabled = 1` assignment above),
155so we need to enable the events list explicitely.
156
157From this moment the cycles event is sampling.
158
159We will sleep for 3 seconds while the ring buffers get data from all CPUs, then we disable the events list.
160
161[source,c]
162--
163 76         perf_evlist__enable(evlist);
164 77         sleep(3);
165 78         perf_evlist__disable(evlist);
166--
167
168Following code walks through the ring buffers and reads stored events/samples:
169
170[source,c]
171--
172 80         perf_evlist__for_each_mmap(evlist, map, false) {
173 81                 if (perf_mmap__read_init(map) < 0)
174 82                         continue;
175 83
176 84                 while ((event = perf_mmap__read_event(map)) != NULL) {
177
178                            /* process event */
179
180108                         perf_mmap__consume(map);
181109                 }
182110                 perf_mmap__read_done(map);
183111         }
184
185--
186
187Each sample needs to get parsed:
188
189[source,c]
190--
191 85                         int cpu, pid, tid;
192 86                         __u64 ip, period, *array;
193 87                         union u64_swap u;
194 88
195 89                         array = event->sample.array;
196 90
197 91                         ip = *array;
198 92                         array++;
199 93
200 94                         u.val64 = *array;
201 95                         pid = u.val32[0];
202 96                         tid = u.val32[1];
203 97                         array++;
204 98
205 99                         u.val64 = *array;
206100                         cpu = u.val32[0];
207101                         array++;
208102
209103                         period = *array;
210104
211105                         fprintf(stdout, "cpu %3d, pid %6d, tid %6d, ip %20llx, period %20llu\n",
212106                                 cpu, pid, tid, ip, period);
213--
214
215And finaly cleanup.
216
217We close the whole events list (both events) and remove it together with the threads map:
218
219[source,c]
220--
221113 out_evlist:
222114         perf_evlist__delete(evlist);
223115 out_cpus:
224116         perf_cpu_map__put(cpus);
225117         return err;
226118 }
227--
228
229REPORTING BUGS
230--------------
231Report bugs to <linux-perf-users@vger.kernel.org>.
232
233LICENSE
234-------
235libperf is Free Software licensed under the GNU LGPL 2.1
236
237RESOURCES
238---------
239https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
240
241SEE ALSO
242--------
243libperf(3), libperf-counting(7)
244