1 /**
2 * collectd - src/intel_pmu.c
3 *
4 * Copyright(c) 2017-2020 Intel Corporation. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 * Authors:
25 * Serhiy Pshyk <serhiyx.pshyk@intel.com>
26 * Kamil Wiatrowski <kamilx.wiatrowski@intel.com>
27 **/
28
29 #include "collectd.h"
30 #include "utils/common/common.h"
31
32 #include "utils/config_cores/config_cores.h"
33
34 #include <jevents.h>
35 #include <jsession.h>
36
37 #define PMU_PLUGIN "intel_pmu"
38
39 #define HW_CACHE_READ_ACCESS \
40 (((PERF_COUNT_HW_CACHE_OP_READ) << 8) | \
41 ((PERF_COUNT_HW_CACHE_RESULT_ACCESS) << 16))
42
43 #define HW_CACHE_WRITE_ACCESS \
44 (((PERF_COUNT_HW_CACHE_OP_WRITE) << 8) | \
45 ((PERF_COUNT_HW_CACHE_RESULT_ACCESS) << 16))
46
47 #define HW_CACHE_PREFETCH_ACCESS \
48 (((PERF_COUNT_HW_CACHE_OP_PREFETCH) << 8) | \
49 ((PERF_COUNT_HW_CACHE_RESULT_ACCESS) << 16))
50
51 #define HW_CACHE_READ_MISS \
52 (((PERF_COUNT_HW_CACHE_OP_READ) << 8) | \
53 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))
54
55 #define HW_CACHE_WRITE_MISS \
56 (((PERF_COUNT_HW_CACHE_OP_WRITE) << 8) | \
57 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))
58
59 #define HW_CACHE_PREFETCH_MISS \
60 (((PERF_COUNT_HW_CACHE_OP_PREFETCH) << 8) | \
61 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))
62
63 struct event_info {
64 char *name;
65 uint64_t config;
66 };
67 typedef struct event_info event_info_t;
68
69 struct intel_pmu_ctx_s {
70 bool hw_cache_events;
71 bool kernel_pmu_events;
72 bool sw_events;
73 char event_list_fn[PATH_MAX];
74 char **hw_events;
75 size_t hw_events_count;
76 core_groups_list_t cores;
77 struct eventlist *event_list;
78 bool dispatch_cloned_pmus;
79 };
80 typedef struct intel_pmu_ctx_s intel_pmu_ctx_t;
81
82 event_info_t g_kernel_pmu_events[] = {
83 {.name = "cpu-cycles", .config = PERF_COUNT_HW_CPU_CYCLES},
84 {.name = "instructions", .config = PERF_COUNT_HW_INSTRUCTIONS},
85 {.name = "cache-references", .config = PERF_COUNT_HW_CACHE_REFERENCES},
86 {.name = "cache-misses", .config = PERF_COUNT_HW_CACHE_MISSES},
87 {.name = "branches", .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS},
88 {.name = "branch-misses", .config = PERF_COUNT_HW_BRANCH_MISSES},
89 {.name = "bus-cycles", .config = PERF_COUNT_HW_BUS_CYCLES},
90 };
91
92 event_info_t g_hw_cache_events[] = {
93
94 {.name = "L1-dcache-loads",
95 .config = (PERF_COUNT_HW_CACHE_L1D | HW_CACHE_READ_ACCESS)},
96 {.name = "L1-dcache-load-misses",
97 .config = (PERF_COUNT_HW_CACHE_L1D | HW_CACHE_READ_MISS)},
98 {.name = "L1-dcache-stores",
99 .config = (PERF_COUNT_HW_CACHE_L1D | HW_CACHE_WRITE_ACCESS)},
100 {.name = "L1-dcache-store-misses",
101 .config = (PERF_COUNT_HW_CACHE_L1D | HW_CACHE_WRITE_MISS)},
102 {.name = "L1-dcache-prefetches",
103 .config = (PERF_COUNT_HW_CACHE_L1D | HW_CACHE_PREFETCH_ACCESS)},
104 {.name = "L1-dcache-prefetch-misses",
105 .config = (PERF_COUNT_HW_CACHE_L1D | HW_CACHE_PREFETCH_MISS)},
106
107 {.name = "L1-icache-loads",
108 .config = (PERF_COUNT_HW_CACHE_L1I | HW_CACHE_READ_ACCESS)},
109 {.name = "L1-icache-load-misses",
110 .config = (PERF_COUNT_HW_CACHE_L1I | HW_CACHE_READ_MISS)},
111 {.name = "L1-icache-prefetches",
112 .config = (PERF_COUNT_HW_CACHE_L1I | HW_CACHE_PREFETCH_ACCESS)},
113 {.name = "L1-icache-prefetch-misses",
114 .config = (PERF_COUNT_HW_CACHE_L1I | HW_CACHE_PREFETCH_MISS)},
115
116 {.name = "LLC-loads",
117 .config = (PERF_COUNT_HW_CACHE_LL | HW_CACHE_READ_ACCESS)},
118 {.name = "LLC-load-misses",
119 .config = (PERF_COUNT_HW_CACHE_LL | HW_CACHE_READ_MISS)},
120 {.name = "LLC-stores",
121 .config = (PERF_COUNT_HW_CACHE_LL | HW_CACHE_WRITE_ACCESS)},
122 {.name = "LLC-store-misses",
123 .config = (PERF_COUNT_HW_CACHE_LL | HW_CACHE_WRITE_MISS)},
124 {.name = "LLC-prefetches",
125 .config = (PERF_COUNT_HW_CACHE_LL | HW_CACHE_PREFETCH_ACCESS)},
126 {.name = "LLC-prefetch-misses",
127 .config = (PERF_COUNT_HW_CACHE_LL | HW_CACHE_PREFETCH_MISS)},
128
129 {.name = "dTLB-loads",
130 .config = (PERF_COUNT_HW_CACHE_DTLB | HW_CACHE_READ_ACCESS)},
131 {.name = "dTLB-load-misses",
132 .config = (PERF_COUNT_HW_CACHE_DTLB | HW_CACHE_READ_MISS)},
133 {.name = "dTLB-stores",
134 .config = (PERF_COUNT_HW_CACHE_DTLB | HW_CACHE_WRITE_ACCESS)},
135 {.name = "dTLB-store-misses",
136 .config = (PERF_COUNT_HW_CACHE_DTLB | HW_CACHE_WRITE_MISS)},
137 {.name = "dTLB-prefetches",
138 .config = (PERF_COUNT_HW_CACHE_DTLB | HW_CACHE_PREFETCH_ACCESS)},
139 {.name = "dTLB-prefetch-misses",
140 .config = (PERF_COUNT_HW_CACHE_DTLB | HW_CACHE_PREFETCH_MISS)},
141
142 {.name = "iTLB-loads",
143 .config = (PERF_COUNT_HW_CACHE_ITLB | HW_CACHE_READ_ACCESS)},
144 {.name = "iTLB-load-misses",
145 .config = (PERF_COUNT_HW_CACHE_ITLB | HW_CACHE_READ_MISS)},
146
147 {.name = "branch-loads",
148 .config = (PERF_COUNT_HW_CACHE_BPU | HW_CACHE_READ_ACCESS)},
149 {.name = "branch-load-misses",
150 .config = (PERF_COUNT_HW_CACHE_BPU | HW_CACHE_READ_MISS)},
151 };
152
153 event_info_t g_sw_events[] = {
154 {.name = "cpu-clock", .config = PERF_COUNT_SW_CPU_CLOCK},
155
156 {.name = "task-clock", .config = PERF_COUNT_SW_TASK_CLOCK},
157
158 {.name = "context-switches", .config = PERF_COUNT_SW_CONTEXT_SWITCHES},
159
160 {.name = "cpu-migrations", .config = PERF_COUNT_SW_CPU_MIGRATIONS},
161
162 {.name = "page-faults", .config = PERF_COUNT_SW_PAGE_FAULTS},
163
164 {.name = "minor-faults", .config = PERF_COUNT_SW_PAGE_FAULTS_MIN},
165
166 {.name = "major-faults", .config = PERF_COUNT_SW_PAGE_FAULTS_MAJ},
167
168 {.name = "alignment-faults", .config = PERF_COUNT_SW_ALIGNMENT_FAULTS},
169
170 {.name = "emulation-faults", .config = PERF_COUNT_SW_EMULATION_FAULTS},
171 };
172
173 static intel_pmu_ctx_t g_ctx;
174
175 #if COLLECT_DEBUG
pmu_dump_events()176 static void pmu_dump_events() {
177
178 DEBUG(PMU_PLUGIN ": Events:");
179
180 struct event *e;
181
182 for (e = g_ctx.event_list->eventlist; e; e = e->next) {
183 DEBUG(PMU_PLUGIN ": event : %s", e->event);
184 DEBUG(PMU_PLUGIN ": group_lead: %d", e->group_leader);
185 DEBUG(PMU_PLUGIN ": in_group : %d", e->ingroup);
186 DEBUG(PMU_PLUGIN ": end_group : %d", e->end_group);
187 DEBUG(PMU_PLUGIN ": type : %#x", e->attr.type);
188 DEBUG(PMU_PLUGIN ": config : %#x", (unsigned)e->attr.config);
189 DEBUG(PMU_PLUGIN ": size : %d", e->attr.size);
190 if (e->attr.sample_period > 0)
191 DEBUG(PMU_PLUGIN ": period : %lld", e->attr.sample_period);
192 if (e->extra.decoded)
193 DEBUG(PMU_PLUGIN ": perf : %s", e->extra.decoded);
194 DEBUG(PMU_PLUGIN ": uncore : %d", e->uncore);
195 }
196 }
197
pmu_dump_config(void)198 static void pmu_dump_config(void) {
199
200 DEBUG(PMU_PLUGIN ": Config:");
201 DEBUG(PMU_PLUGIN ": dispatch_cloned_pmus: %d", g_ctx.dispatch_cloned_pmus);
202 DEBUG(PMU_PLUGIN ": hw_cache_events : %d", g_ctx.hw_cache_events);
203 DEBUG(PMU_PLUGIN ": kernel_pmu_events : %d", g_ctx.kernel_pmu_events);
204 DEBUG(PMU_PLUGIN ": software_events : %d", g_ctx.sw_events);
205
206 for (size_t i = 0; i < g_ctx.hw_events_count; i++) {
207 DEBUG(PMU_PLUGIN ": hardware_events[%" PRIsz "] : %s", i,
208 g_ctx.hw_events[i]);
209 }
210 }
211
pmu_dump_cgroups(void)212 static void pmu_dump_cgroups(void) {
213
214 DEBUG(PMU_PLUGIN ": num cpus : %d", g_ctx.event_list->num_cpus);
215 DEBUG(PMU_PLUGIN ": num sockets: %d", g_ctx.event_list->num_sockets);
216 for (size_t i = 0; i < g_ctx.event_list->num_sockets; i++) {
217 DEBUG(PMU_PLUGIN ": socket [%" PRIsz "] core: %d", i,
218 g_ctx.event_list->socket_cpus[i]);
219 }
220
221 DEBUG(PMU_PLUGIN ": Core groups:");
222
223 for (size_t i = 0; i < g_ctx.cores.num_cgroups; i++) {
224 core_group_t *cgroup = g_ctx.cores.cgroups + i;
225 const size_t cores_size = cgroup->num_cores * 4 + 1;
226 char *cores = calloc(cores_size, sizeof(*cores));
227 if (cores == NULL) {
228 DEBUG(PMU_PLUGIN ": Failed to allocate string to list cores.");
229 return;
230 }
231 for (size_t j = 0; j < cgroup->num_cores; j++)
232 if (snprintf(cores + strlen(cores), cores_size - strlen(cores), " %d",
233 cgroup->cores[j]) < 0) {
234 DEBUG(PMU_PLUGIN ": Failed to write list of cores to string.");
235 sfree(cores);
236 return;
237 }
238
239 DEBUG(PMU_PLUGIN ": group[%" PRIsz "]", i);
240 DEBUG(PMU_PLUGIN ": description: %s", cgroup->desc);
241 DEBUG(PMU_PLUGIN ": cores count: %" PRIsz, cgroup->num_cores);
242 DEBUG(PMU_PLUGIN ": cores :%s", cores);
243 sfree(cores);
244 }
245 }
246
247 #endif /* COLLECT_DEBUG */
248
pmu_validate_cgroups(core_group_t * cgroups,size_t len,int max_cores)249 static int pmu_validate_cgroups(core_group_t *cgroups, size_t len,
250 int max_cores) {
251 /* i - group index, j - core index */
252 for (size_t i = 0; i < len; i++) {
253 for (size_t j = 0; j < cgroups[i].num_cores; j++) {
254 int core = (int)cgroups[i].cores[j];
255
256 /* Core index cannot exceed number of cores in system,
257 note that max_cores include both online and offline CPUs. */
258 if (core >= max_cores) {
259 ERROR(PMU_PLUGIN ": Core %d is not valid, max core index: %d.", core,
260 max_cores - 1);
261 return -1;
262 }
263 }
264 /* Check if cores are set in remaining groups */
265 for (size_t k = i + 1; k < len; k++)
266 if (config_cores_cmp_cgroups(&cgroups[i], &cgroups[k]) != 0) {
267 ERROR(PMU_PLUGIN ": Same cores cannot be set in different groups.");
268 return -1;
269 }
270 }
271 return 0;
272 }
273
pmu_config_hw_events(oconfig_item_t * ci)274 static int pmu_config_hw_events(oconfig_item_t *ci) {
275
276 if (strcasecmp("HardwareEvents", ci->key) != 0) {
277 return -EINVAL;
278 }
279
280 if (g_ctx.hw_events) {
281 ERROR(PMU_PLUGIN ": Duplicate config for HardwareEvents.");
282 return -EINVAL;
283 }
284
285 g_ctx.hw_events = calloc(ci->values_num, sizeof(*g_ctx.hw_events));
286 if (g_ctx.hw_events == NULL) {
287 ERROR(PMU_PLUGIN ": Failed to allocate hw events.");
288 return -ENOMEM;
289 }
290
291 for (int i = 0; i < ci->values_num; i++) {
292 if (ci->values[i].type != OCONFIG_TYPE_STRING) {
293 WARNING(PMU_PLUGIN ": The %s option requires string arguments.", ci->key);
294 continue;
295 }
296
297 g_ctx.hw_events[g_ctx.hw_events_count] = strdup(ci->values[i].value.string);
298 if (g_ctx.hw_events[g_ctx.hw_events_count] == NULL) {
299 ERROR(PMU_PLUGIN ": Failed to allocate hw events entry.");
300 return -ENOMEM;
301 }
302
303 g_ctx.hw_events_count++;
304 }
305
306 return 0;
307 }
308
pmu_config(oconfig_item_t * ci)309 static int pmu_config(oconfig_item_t *ci) {
310
311 DEBUG(PMU_PLUGIN ": %s:%d", __FUNCTION__, __LINE__);
312
313 for (int i = 0; i < ci->children_num; i++) {
314 int ret = 0;
315 oconfig_item_t *child = ci->children + i;
316
317 if (strcasecmp("ReportHardwareCacheEvents", child->key) == 0) {
318 ret = cf_util_get_boolean(child, &g_ctx.hw_cache_events);
319 } else if (strcasecmp("ReportKernelPMUEvents", child->key) == 0) {
320 ret = cf_util_get_boolean(child, &g_ctx.kernel_pmu_events);
321 } else if (strcasecmp("EventList", child->key) == 0) {
322 ret = cf_util_get_string_buffer(child, g_ctx.event_list_fn,
323 sizeof(g_ctx.event_list_fn));
324 } else if (strcasecmp("HardwareEvents", child->key) == 0) {
325 ret = pmu_config_hw_events(child);
326 } else if (strcasecmp("ReportSoftwareEvents", child->key) == 0) {
327 ret = cf_util_get_boolean(child, &g_ctx.sw_events);
328 } else if (strcasecmp("Cores", child->key) == 0) {
329 ret = config_cores_parse(child, &g_ctx.cores);
330 } else if (strcasecmp("DispatchMultiPmu", child->key) == 0) {
331 ret = cf_util_get_boolean(child, &g_ctx.dispatch_cloned_pmus);
332 } else {
333 ERROR(PMU_PLUGIN ": Unknown configuration parameter \"%s\".", child->key);
334 ret = -1;
335 }
336
337 if (ret != 0) {
338 DEBUG(PMU_PLUGIN ": %s:%d ret=%d", __FUNCTION__, __LINE__, ret);
339 return ret;
340 }
341 }
342
343 #if COLLECT_DEBUG
344 pmu_dump_config();
345 #endif
346
347 return 0;
348 }
349
pmu_submit_counter(const char * cgroup,const char * event,const uint32_t * event_type,counter_t value,meta_data_t * meta)350 static void pmu_submit_counter(const char *cgroup, const char *event,
351 const uint32_t *event_type, counter_t value,
352 meta_data_t *meta) {
353 value_list_t vl = VALUE_LIST_INIT;
354
355 vl.values = &(value_t){.counter = value};
356 vl.values_len = 1;
357
358 sstrncpy(vl.plugin, PMU_PLUGIN, sizeof(vl.plugin));
359 sstrncpy(vl.plugin_instance, cgroup, sizeof(vl.plugin_instance));
360 if (meta)
361 vl.meta = meta;
362 sstrncpy(vl.type, "counter", sizeof(vl.type));
363 if (event_type)
364 ssnprintf(vl.type_instance, sizeof(vl.type_instance), "%s:type=%d", event,
365 *event_type);
366 else
367 sstrncpy(vl.type_instance, event, sizeof(vl.type_instance));
368
369 plugin_dispatch_values(&vl);
370 }
371
pmu_meta_data_create(const struct efd * efd)372 meta_data_t *pmu_meta_data_create(const struct efd *efd) {
373 meta_data_t *meta = NULL;
374
375 /* create meta data only if value was scaled */
376 if (efd->val[1] == efd->val[2] || !efd->val[2]) {
377 return NULL;
378 }
379
380 meta = meta_data_create();
381 if (meta == NULL) {
382 ERROR(PMU_PLUGIN ": meta_data_create failed.");
383 return NULL;
384 }
385
386 DEBUG(PMU_PLUGIN ": scaled value = [raw]%lu * [enabled]%lu / [running]%lu",
387 efd->val[0], efd->val[1], efd->val[2]);
388 meta_data_add_unsigned_int(meta, "intel_pmu:raw_count", efd->val[0]);
389 meta_data_add_unsigned_int(meta, "intel_pmu:time_enabled", efd->val[1]);
390 meta_data_add_unsigned_int(meta, "intel_pmu:time_running", efd->val[2]);
391
392 return meta;
393 }
394
pmu_dispatch_data(void)395 static void pmu_dispatch_data(void) {
396
397 struct event *e;
398
399 for (e = g_ctx.event_list->eventlist; e; e = e->next) {
400 const uint32_t *event_type = NULL;
401 if (e->orig && !g_ctx.dispatch_cloned_pmus)
402 continue;
403 if ((e->extra.multi_pmu || e->orig) && g_ctx.dispatch_cloned_pmus)
404 event_type = &e->attr.type;
405
406 for (size_t i = 0; i < g_ctx.cores.num_cgroups; i++) {
407 core_group_t *cgroup = g_ctx.cores.cgroups + i;
408 uint64_t cgroup_value = 0;
409 int event_enabled_cgroup = 0;
410 meta_data_t *meta = NULL;
411
412 for (size_t j = 0; j < cgroup->num_cores; j++) {
413 int core = (int)cgroup->cores[j];
414 if (e->efd[core].fd < 0)
415 continue;
416
417 event_enabled_cgroup++;
418
419 /* If there are more events than counters, the kernel uses time
420 * multiplexing. With multiplexing, at the end of the run,
421 * the counter is scaled basing on total time enabled vs time running.
422 * final_count = raw_count * time_enabled/time_running
423 */
424 if (e->extra.multi_pmu && !g_ctx.dispatch_cloned_pmus)
425 cgroup_value += event_scaled_value_sum(e, core);
426 else {
427 cgroup_value += event_scaled_value(e, core);
428
429 /* get meta data with information about scaling */
430 if (cgroup->num_cores == 1)
431 meta = pmu_meta_data_create(&e->efd[core]);
432 }
433 }
434
435 if (event_enabled_cgroup > 0) {
436 #if COLLECT_DEBUG
437 if (event_type)
438 DEBUG(PMU_PLUGIN ": %s:type=%d/%s = %lu", e->event, *event_type,
439 cgroup->desc, cgroup_value);
440 else
441 DEBUG(PMU_PLUGIN ": %s/%s = %lu", e->event, cgroup->desc,
442 cgroup_value);
443 #endif
444 /* dispatch per core group value */
445 pmu_submit_counter(cgroup->desc, e->event, event_type, cgroup_value,
446 meta);
447 meta_data_destroy(meta);
448 }
449 }
450 }
451 }
452
pmu_read(user_data_t * ud)453 static int pmu_read(__attribute__((unused)) user_data_t *ud) {
454 int ret;
455 struct event *e;
456
457 DEBUG(PMU_PLUGIN ": %s:%d", __FUNCTION__, __LINE__);
458
459 /* read all events only for configured cores */
460 for (e = g_ctx.event_list->eventlist; e; e = e->next) {
461 for (size_t i = 0; i < g_ctx.cores.num_cgroups; i++) {
462 core_group_t *cgroup = g_ctx.cores.cgroups + i;
463 for (size_t j = 0; j < cgroup->num_cores; j++) {
464 int core = (int)cgroup->cores[j];
465 if (e->efd[core].fd < 0)
466 continue;
467
468 ret = read_event(e, core);
469 if (ret != 0) {
470 ERROR(PMU_PLUGIN ": Failed to read value of %s/%d event.", e->event,
471 core);
472 return ret;
473 }
474 }
475 }
476 }
477
478 pmu_dispatch_data();
479
480 return 0;
481 }
482
pmu_add_events(struct eventlist * el,uint32_t type,event_info_t * events,size_t count)483 static int pmu_add_events(struct eventlist *el, uint32_t type,
484 event_info_t *events, size_t count) {
485
486 for (size_t i = 0; i < count; i++) {
487 /* Allocate memory for event struct that contains array of efd structs
488 for all cores */
489 struct event *e =
490 calloc(1, sizeof(struct event) + sizeof(struct efd) * el->num_cpus);
491 if (e == NULL) {
492 ERROR(PMU_PLUGIN ": Failed to allocate event structure");
493 return -ENOMEM;
494 }
495
496 e->attr.type = type;
497 e->attr.config = events[i].config;
498 e->attr.size = PERF_ATTR_SIZE_VER0;
499 if (!el->eventlist)
500 el->eventlist = e;
501 if (el->eventlist_last)
502 el->eventlist_last->next = e;
503 el->eventlist_last = e;
504 e->event = strdup(events[i].name);
505 }
506
507 return 0;
508 }
509
pmu_add_cloned_pmus(struct eventlist * el,struct event * e)510 static int pmu_add_cloned_pmus(struct eventlist *el, struct event *e) {
511 struct perf_event_attr attr = e->attr;
512 int ret;
513
514 while ((ret = jevent_next_pmu(&e->extra, &attr)) == 1) {
515 /* Allocate memory for event struct that contains array of efd structs
516 for all cores */
517 struct event *ne =
518 calloc(1, sizeof(struct event) + sizeof(struct efd) * el->num_cpus);
519 if (ne == NULL) {
520 return -ENOMEM;
521 }
522 for (size_t i = 0; i < el->num_cpus; i++)
523 ne->efd[i].fd = -1;
524
525 ne->attr = attr;
526 ne->orig = e;
527 ne->uncore = e->uncore;
528 e->num_clones++;
529 jevent_copy_extra(&ne->extra, &e->extra);
530
531 ne->next = NULL;
532 if (!el->eventlist)
533 el->eventlist = ne;
534 if (el->eventlist_last)
535 el->eventlist_last->next = ne;
536 el->eventlist_last = ne;
537 ne->event = strdup(e->event);
538 }
539
540 if (ret < 0) {
541 ERROR(PMU_PLUGIN ": Cannot find PMU for event %s", e->event);
542 return ret;
543 }
544
545 return 0;
546 }
547
pmu_add_hw_events(struct eventlist * el,char ** e,size_t count)548 static int pmu_add_hw_events(struct eventlist *el, char **e, size_t count) {
549
550 for (size_t i = 0; i < count; i++) {
551
552 size_t group_events_count = 0;
553
554 char *events = strdup(e[i]);
555 if (!events)
556 return -1;
557
558 bool group = strrchr(events, ',') != NULL ? true : false;
559
560 char *s, *tmp = NULL;
561 for (s = strtok_r(events, ",", &tmp); s; s = strtok_r(NULL, ",", &tmp)) {
562
563 /* Allocate memory for event struct that contains array of efd structs
564 for all cores */
565 struct event *e =
566 calloc(1, sizeof(struct event) + sizeof(struct efd) * el->num_cpus);
567 if (e == NULL) {
568 free(events);
569 return -ENOMEM;
570 }
571 for (size_t j = 0; j < el->num_cpus; j++)
572 e->efd[j].fd = -1;
573
574 if (resolve_event_extra(s, &e->attr, &e->extra) != 0) {
575 WARNING(PMU_PLUGIN ": Cannot resolve %s", s);
576 sfree(e);
577 continue;
578 }
579
580 e->uncore = jevent_pmu_uncore(e->extra.decoded);
581
582 /* Multiple events parsed in one entry */
583 if (group) {
584 if (e->extra.multi_pmu) {
585 ERROR(PMU_PLUGIN ": Cannot handle multi pmu event %s in a group\n",
586 s);
587 jevent_free_extra(&e->extra);
588 sfree(e);
589 sfree(events);
590 return -1;
591 }
592 if (group_events_count == 0)
593 /* Mark first added event as group leader */
594 e->group_leader = 1;
595
596 e->ingroup = 1;
597 }
598
599 e->next = NULL;
600 if (!el->eventlist)
601 el->eventlist = e;
602 if (el->eventlist_last)
603 el->eventlist_last->next = e;
604 el->eventlist_last = e;
605 e->event = strdup(s);
606
607 if (e->extra.multi_pmu && pmu_add_cloned_pmus(el, e) != 0)
608 return -1;
609
610 group_events_count++;
611 }
612
613 /* Multiple events parsed in one entry */
614 if (group && group_events_count > 0) {
615 /* Mark last added event as group end */
616 el->eventlist_last->end_group = 1;
617 }
618
619 free(events);
620 }
621
622 return 0;
623 }
624
pmu_free_events(struct eventlist * el)625 static void pmu_free_events(struct eventlist *el) {
626
627 if (el == NULL)
628 return;
629
630 free_eventlist(el);
631 }
632
pmu_setup_events(struct eventlist * el,bool measure_all,int measure_pid)633 static int pmu_setup_events(struct eventlist *el, bool measure_all,
634 int measure_pid) {
635 struct event *e, *leader = NULL;
636 int ret = -1;
637
638 for (e = el->eventlist; e; e = e->next) {
639
640 for (size_t i = 0; i < g_ctx.cores.num_cgroups; i++) {
641 core_group_t *cgroup = g_ctx.cores.cgroups + i;
642 for (size_t j = 0; j < cgroup->num_cores; j++) {
643 int core = (int)cgroup->cores[j];
644
645 if (e->uncore) {
646 bool match = false;
647 for (size_t k = 0; k < el->num_sockets; k++)
648 if (el->socket_cpus[k] == core) {
649 match = true;
650 break;
651 }
652 if (!match)
653 continue;
654 }
655
656 if (setup_event(e, core, leader, measure_all, measure_pid) < 0) {
657 WARNING(PMU_PLUGIN ": perf event '%s' is not available (cpu=%d).",
658 e->event, core);
659 } else {
660 /* success if at least one event was set */
661 ret = 0;
662 }
663 }
664 }
665
666 if (e->group_leader)
667 leader = e;
668 if (e->end_group)
669 leader = NULL;
670 }
671
672 return ret;
673 }
674
pmu_init(void)675 static int pmu_init(void) {
676 int ret;
677
678 DEBUG(PMU_PLUGIN ": %s:%d", __FUNCTION__, __LINE__);
679
680 g_ctx.event_list = alloc_eventlist();
681 if (g_ctx.event_list == NULL) {
682 ERROR(PMU_PLUGIN ": Failed to allocate event list.");
683 return -ENOMEM;
684 }
685
686 if (g_ctx.cores.num_cgroups == 0) {
687 ret = config_cores_default(g_ctx.event_list->num_cpus, &g_ctx.cores);
688 if (ret != 0) {
689 ERROR(PMU_PLUGIN ": Failed to set default core groups.");
690 goto init_error;
691 }
692 } else {
693 ret = pmu_validate_cgroups(g_ctx.cores.cgroups, g_ctx.cores.num_cgroups,
694 g_ctx.event_list->num_cpus);
695 if (ret != 0) {
696 ERROR(PMU_PLUGIN ": Invalid core groups configuration.");
697 goto init_error;
698 }
699 }
700 #if COLLECT_DEBUG
701 pmu_dump_cgroups();
702 #endif
703
704 if (g_ctx.hw_cache_events) {
705 ret =
706 pmu_add_events(g_ctx.event_list, PERF_TYPE_HW_CACHE, g_hw_cache_events,
707 STATIC_ARRAY_SIZE(g_hw_cache_events));
708 if (ret != 0) {
709 ERROR(PMU_PLUGIN ": Failed to add hw cache events.");
710 goto init_error;
711 }
712 }
713
714 if (g_ctx.kernel_pmu_events) {
715 ret = pmu_add_events(g_ctx.event_list, PERF_TYPE_HARDWARE,
716 g_kernel_pmu_events,
717 STATIC_ARRAY_SIZE(g_kernel_pmu_events));
718 if (ret != 0) {
719 ERROR(PMU_PLUGIN ": Failed to add kernel PMU events.");
720 goto init_error;
721 }
722 }
723
724 /* parse events names if config option is present and is not empty */
725 if (g_ctx.hw_events_count) {
726
727 ret = read_events(g_ctx.event_list_fn);
728 if (ret != 0) {
729 ERROR(PMU_PLUGIN ": Failed to read event list file '%s'.",
730 g_ctx.event_list_fn);
731 return ret;
732 }
733
734 ret = pmu_add_hw_events(g_ctx.event_list, g_ctx.hw_events,
735 g_ctx.hw_events_count);
736 if (ret != 0) {
737 ERROR(PMU_PLUGIN ": Failed to add hardware events.");
738 goto init_error;
739 }
740 }
741
742 if (g_ctx.sw_events) {
743 ret = pmu_add_events(g_ctx.event_list, PERF_TYPE_SOFTWARE, g_sw_events,
744 STATIC_ARRAY_SIZE(g_sw_events));
745 if (ret != 0) {
746 ERROR(PMU_PLUGIN ": Failed to add software events.");
747 goto init_error;
748 }
749 }
750
751 #if COLLECT_DEBUG
752 pmu_dump_events();
753 #endif
754
755 if (g_ctx.event_list->eventlist != NULL) {
756 /* measure all processes */
757 ret = pmu_setup_events(g_ctx.event_list, true, -1);
758 if (ret != 0) {
759 ERROR(PMU_PLUGIN ": Failed to setup perf events for the event list.");
760 goto init_error;
761 }
762 } else {
763 WARNING(PMU_PLUGIN
764 ": Events list is empty. No events were setup for monitoring.");
765 }
766
767 return 0;
768
769 init_error:
770
771 pmu_free_events(g_ctx.event_list);
772 g_ctx.event_list = NULL;
773 for (size_t i = 0; i < g_ctx.hw_events_count; i++) {
774 sfree(g_ctx.hw_events[i]);
775 }
776 sfree(g_ctx.hw_events);
777 g_ctx.hw_events_count = 0;
778
779 config_cores_cleanup(&g_ctx.cores);
780
781 return ret;
782 }
783
pmu_shutdown(void)784 static int pmu_shutdown(void) {
785
786 DEBUG(PMU_PLUGIN ": %s:%d", __FUNCTION__, __LINE__);
787
788 pmu_free_events(g_ctx.event_list);
789 g_ctx.event_list = NULL;
790 for (size_t i = 0; i < g_ctx.hw_events_count; i++) {
791 sfree(g_ctx.hw_events[i]);
792 }
793 sfree(g_ctx.hw_events);
794 g_ctx.hw_events_count = 0;
795
796 config_cores_cleanup(&g_ctx.cores);
797
798 return 0;
799 }
800
module_register(void)801 void module_register(void) {
802 plugin_register_init(PMU_PLUGIN, pmu_init);
803 plugin_register_complex_config(PMU_PLUGIN, pmu_config);
804 plugin_register_complex_read(NULL, PMU_PLUGIN, pmu_read, 0, NULL);
805 plugin_register_shutdown(PMU_PLUGIN, pmu_shutdown);
806 }
807