1 /**
2  * collectd - src/intel_pmu.c
3  *
4  * Copyright(c) 2017-2020 Intel Corporation. All rights reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  *
24  * Authors:
25  *   Serhiy Pshyk <serhiyx.pshyk@intel.com>
26  *   Kamil Wiatrowski <kamilx.wiatrowski@intel.com>
27  **/
28 
29 #include "collectd.h"
30 #include "utils/common/common.h"
31 
32 #include "utils/config_cores/config_cores.h"
33 
34 #include <jevents.h>
35 #include <jsession.h>
36 
37 #define PMU_PLUGIN "intel_pmu"
38 
39 #define HW_CACHE_READ_ACCESS                                                   \
40   (((PERF_COUNT_HW_CACHE_OP_READ) << 8) |                                      \
41    ((PERF_COUNT_HW_CACHE_RESULT_ACCESS) << 16))
42 
43 #define HW_CACHE_WRITE_ACCESS                                                  \
44   (((PERF_COUNT_HW_CACHE_OP_WRITE) << 8) |                                     \
45    ((PERF_COUNT_HW_CACHE_RESULT_ACCESS) << 16))
46 
47 #define HW_CACHE_PREFETCH_ACCESS                                               \
48   (((PERF_COUNT_HW_CACHE_OP_PREFETCH) << 8) |                                  \
49    ((PERF_COUNT_HW_CACHE_RESULT_ACCESS) << 16))
50 
51 #define HW_CACHE_READ_MISS                                                     \
52   (((PERF_COUNT_HW_CACHE_OP_READ) << 8) |                                      \
53    ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))
54 
55 #define HW_CACHE_WRITE_MISS                                                    \
56   (((PERF_COUNT_HW_CACHE_OP_WRITE) << 8) |                                     \
57    ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))
58 
59 #define HW_CACHE_PREFETCH_MISS                                                 \
60   (((PERF_COUNT_HW_CACHE_OP_PREFETCH) << 8) |                                  \
61    ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))
62 
63 struct event_info {
64   char *name;
65   uint64_t config;
66 };
67 typedef struct event_info event_info_t;
68 
69 struct intel_pmu_ctx_s {
70   bool hw_cache_events;
71   bool kernel_pmu_events;
72   bool sw_events;
73   char event_list_fn[PATH_MAX];
74   char **hw_events;
75   size_t hw_events_count;
76   core_groups_list_t cores;
77   struct eventlist *event_list;
78   bool dispatch_cloned_pmus;
79 };
80 typedef struct intel_pmu_ctx_s intel_pmu_ctx_t;
81 
82 event_info_t g_kernel_pmu_events[] = {
83     {.name = "cpu-cycles", .config = PERF_COUNT_HW_CPU_CYCLES},
84     {.name = "instructions", .config = PERF_COUNT_HW_INSTRUCTIONS},
85     {.name = "cache-references", .config = PERF_COUNT_HW_CACHE_REFERENCES},
86     {.name = "cache-misses", .config = PERF_COUNT_HW_CACHE_MISSES},
87     {.name = "branches", .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS},
88     {.name = "branch-misses", .config = PERF_COUNT_HW_BRANCH_MISSES},
89     {.name = "bus-cycles", .config = PERF_COUNT_HW_BUS_CYCLES},
90 };
91 
92 event_info_t g_hw_cache_events[] = {
93 
94     {.name = "L1-dcache-loads",
95      .config = (PERF_COUNT_HW_CACHE_L1D | HW_CACHE_READ_ACCESS)},
96     {.name = "L1-dcache-load-misses",
97      .config = (PERF_COUNT_HW_CACHE_L1D | HW_CACHE_READ_MISS)},
98     {.name = "L1-dcache-stores",
99      .config = (PERF_COUNT_HW_CACHE_L1D | HW_CACHE_WRITE_ACCESS)},
100     {.name = "L1-dcache-store-misses",
101      .config = (PERF_COUNT_HW_CACHE_L1D | HW_CACHE_WRITE_MISS)},
102     {.name = "L1-dcache-prefetches",
103      .config = (PERF_COUNT_HW_CACHE_L1D | HW_CACHE_PREFETCH_ACCESS)},
104     {.name = "L1-dcache-prefetch-misses",
105      .config = (PERF_COUNT_HW_CACHE_L1D | HW_CACHE_PREFETCH_MISS)},
106 
107     {.name = "L1-icache-loads",
108      .config = (PERF_COUNT_HW_CACHE_L1I | HW_CACHE_READ_ACCESS)},
109     {.name = "L1-icache-load-misses",
110      .config = (PERF_COUNT_HW_CACHE_L1I | HW_CACHE_READ_MISS)},
111     {.name = "L1-icache-prefetches",
112      .config = (PERF_COUNT_HW_CACHE_L1I | HW_CACHE_PREFETCH_ACCESS)},
113     {.name = "L1-icache-prefetch-misses",
114      .config = (PERF_COUNT_HW_CACHE_L1I | HW_CACHE_PREFETCH_MISS)},
115 
116     {.name = "LLC-loads",
117      .config = (PERF_COUNT_HW_CACHE_LL | HW_CACHE_READ_ACCESS)},
118     {.name = "LLC-load-misses",
119      .config = (PERF_COUNT_HW_CACHE_LL | HW_CACHE_READ_MISS)},
120     {.name = "LLC-stores",
121      .config = (PERF_COUNT_HW_CACHE_LL | HW_CACHE_WRITE_ACCESS)},
122     {.name = "LLC-store-misses",
123      .config = (PERF_COUNT_HW_CACHE_LL | HW_CACHE_WRITE_MISS)},
124     {.name = "LLC-prefetches",
125      .config = (PERF_COUNT_HW_CACHE_LL | HW_CACHE_PREFETCH_ACCESS)},
126     {.name = "LLC-prefetch-misses",
127      .config = (PERF_COUNT_HW_CACHE_LL | HW_CACHE_PREFETCH_MISS)},
128 
129     {.name = "dTLB-loads",
130      .config = (PERF_COUNT_HW_CACHE_DTLB | HW_CACHE_READ_ACCESS)},
131     {.name = "dTLB-load-misses",
132      .config = (PERF_COUNT_HW_CACHE_DTLB | HW_CACHE_READ_MISS)},
133     {.name = "dTLB-stores",
134      .config = (PERF_COUNT_HW_CACHE_DTLB | HW_CACHE_WRITE_ACCESS)},
135     {.name = "dTLB-store-misses",
136      .config = (PERF_COUNT_HW_CACHE_DTLB | HW_CACHE_WRITE_MISS)},
137     {.name = "dTLB-prefetches",
138      .config = (PERF_COUNT_HW_CACHE_DTLB | HW_CACHE_PREFETCH_ACCESS)},
139     {.name = "dTLB-prefetch-misses",
140      .config = (PERF_COUNT_HW_CACHE_DTLB | HW_CACHE_PREFETCH_MISS)},
141 
142     {.name = "iTLB-loads",
143      .config = (PERF_COUNT_HW_CACHE_ITLB | HW_CACHE_READ_ACCESS)},
144     {.name = "iTLB-load-misses",
145      .config = (PERF_COUNT_HW_CACHE_ITLB | HW_CACHE_READ_MISS)},
146 
147     {.name = "branch-loads",
148      .config = (PERF_COUNT_HW_CACHE_BPU | HW_CACHE_READ_ACCESS)},
149     {.name = "branch-load-misses",
150      .config = (PERF_COUNT_HW_CACHE_BPU | HW_CACHE_READ_MISS)},
151 };
152 
153 event_info_t g_sw_events[] = {
154     {.name = "cpu-clock", .config = PERF_COUNT_SW_CPU_CLOCK},
155 
156     {.name = "task-clock", .config = PERF_COUNT_SW_TASK_CLOCK},
157 
158     {.name = "context-switches", .config = PERF_COUNT_SW_CONTEXT_SWITCHES},
159 
160     {.name = "cpu-migrations", .config = PERF_COUNT_SW_CPU_MIGRATIONS},
161 
162     {.name = "page-faults", .config = PERF_COUNT_SW_PAGE_FAULTS},
163 
164     {.name = "minor-faults", .config = PERF_COUNT_SW_PAGE_FAULTS_MIN},
165 
166     {.name = "major-faults", .config = PERF_COUNT_SW_PAGE_FAULTS_MAJ},
167 
168     {.name = "alignment-faults", .config = PERF_COUNT_SW_ALIGNMENT_FAULTS},
169 
170     {.name = "emulation-faults", .config = PERF_COUNT_SW_EMULATION_FAULTS},
171 };
172 
173 static intel_pmu_ctx_t g_ctx;
174 
175 #if COLLECT_DEBUG
pmu_dump_events()176 static void pmu_dump_events() {
177 
178   DEBUG(PMU_PLUGIN ": Events:");
179 
180   struct event *e;
181 
182   for (e = g_ctx.event_list->eventlist; e; e = e->next) {
183     DEBUG(PMU_PLUGIN ":   event       : %s", e->event);
184     DEBUG(PMU_PLUGIN ":     group_lead: %d", e->group_leader);
185     DEBUG(PMU_PLUGIN ":     in_group  : %d", e->ingroup);
186     DEBUG(PMU_PLUGIN ":     end_group : %d", e->end_group);
187     DEBUG(PMU_PLUGIN ":     type      : %#x", e->attr.type);
188     DEBUG(PMU_PLUGIN ":     config    : %#x", (unsigned)e->attr.config);
189     DEBUG(PMU_PLUGIN ":     size      : %d", e->attr.size);
190     if (e->attr.sample_period > 0)
191       DEBUG(PMU_PLUGIN ":     period    : %lld", e->attr.sample_period);
192     if (e->extra.decoded)
193       DEBUG(PMU_PLUGIN ":     perf      : %s", e->extra.decoded);
194     DEBUG(PMU_PLUGIN ":     uncore    : %d", e->uncore);
195   }
196 }
197 
pmu_dump_config(void)198 static void pmu_dump_config(void) {
199 
200   DEBUG(PMU_PLUGIN ": Config:");
201   DEBUG(PMU_PLUGIN ":   dispatch_cloned_pmus: %d", g_ctx.dispatch_cloned_pmus);
202   DEBUG(PMU_PLUGIN ":   hw_cache_events     : %d", g_ctx.hw_cache_events);
203   DEBUG(PMU_PLUGIN ":   kernel_pmu_events   : %d", g_ctx.kernel_pmu_events);
204   DEBUG(PMU_PLUGIN ":   software_events     : %d", g_ctx.sw_events);
205 
206   for (size_t i = 0; i < g_ctx.hw_events_count; i++) {
207     DEBUG(PMU_PLUGIN ":   hardware_events[%" PRIsz "]  : %s", i,
208           g_ctx.hw_events[i]);
209   }
210 }
211 
pmu_dump_cgroups(void)212 static void pmu_dump_cgroups(void) {
213 
214   DEBUG(PMU_PLUGIN ": num cpus   : %d", g_ctx.event_list->num_cpus);
215   DEBUG(PMU_PLUGIN ": num sockets: %d", g_ctx.event_list->num_sockets);
216   for (size_t i = 0; i < g_ctx.event_list->num_sockets; i++) {
217     DEBUG(PMU_PLUGIN ":   socket [%" PRIsz "] core: %d", i,
218           g_ctx.event_list->socket_cpus[i]);
219   }
220 
221   DEBUG(PMU_PLUGIN ": Core groups:");
222 
223   for (size_t i = 0; i < g_ctx.cores.num_cgroups; i++) {
224     core_group_t *cgroup = g_ctx.cores.cgroups + i;
225     const size_t cores_size = cgroup->num_cores * 4 + 1;
226     char *cores = calloc(cores_size, sizeof(*cores));
227     if (cores == NULL) {
228       DEBUG(PMU_PLUGIN ": Failed to allocate string to list cores.");
229       return;
230     }
231     for (size_t j = 0; j < cgroup->num_cores; j++)
232       if (snprintf(cores + strlen(cores), cores_size - strlen(cores), " %d",
233                    cgroup->cores[j]) < 0) {
234         DEBUG(PMU_PLUGIN ": Failed to write list of cores to string.");
235         sfree(cores);
236         return;
237       }
238 
239     DEBUG(PMU_PLUGIN ":   group[%" PRIsz "]", i);
240     DEBUG(PMU_PLUGIN ":     description: %s", cgroup->desc);
241     DEBUG(PMU_PLUGIN ":     cores count: %" PRIsz, cgroup->num_cores);
242     DEBUG(PMU_PLUGIN ":     cores      :%s", cores);
243     sfree(cores);
244   }
245 }
246 
247 #endif /* COLLECT_DEBUG */
248 
pmu_validate_cgroups(core_group_t * cgroups,size_t len,int max_cores)249 static int pmu_validate_cgroups(core_group_t *cgroups, size_t len,
250                                 int max_cores) {
251   /* i - group index, j - core index */
252   for (size_t i = 0; i < len; i++) {
253     for (size_t j = 0; j < cgroups[i].num_cores; j++) {
254       int core = (int)cgroups[i].cores[j];
255 
256       /* Core index cannot exceed number of cores in system,
257          note that max_cores include both online and offline CPUs. */
258       if (core >= max_cores) {
259         ERROR(PMU_PLUGIN ": Core %d is not valid, max core index: %d.", core,
260               max_cores - 1);
261         return -1;
262       }
263     }
264     /* Check if cores are set in remaining groups */
265     for (size_t k = i + 1; k < len; k++)
266       if (config_cores_cmp_cgroups(&cgroups[i], &cgroups[k]) != 0) {
267         ERROR(PMU_PLUGIN ": Same cores cannot be set in different groups.");
268         return -1;
269       }
270   }
271   return 0;
272 }
273 
pmu_config_hw_events(oconfig_item_t * ci)274 static int pmu_config_hw_events(oconfig_item_t *ci) {
275 
276   if (strcasecmp("HardwareEvents", ci->key) != 0) {
277     return -EINVAL;
278   }
279 
280   if (g_ctx.hw_events) {
281     ERROR(PMU_PLUGIN ": Duplicate config for HardwareEvents.");
282     return -EINVAL;
283   }
284 
285   g_ctx.hw_events = calloc(ci->values_num, sizeof(*g_ctx.hw_events));
286   if (g_ctx.hw_events == NULL) {
287     ERROR(PMU_PLUGIN ": Failed to allocate hw events.");
288     return -ENOMEM;
289   }
290 
291   for (int i = 0; i < ci->values_num; i++) {
292     if (ci->values[i].type != OCONFIG_TYPE_STRING) {
293       WARNING(PMU_PLUGIN ": The %s option requires string arguments.", ci->key);
294       continue;
295     }
296 
297     g_ctx.hw_events[g_ctx.hw_events_count] = strdup(ci->values[i].value.string);
298     if (g_ctx.hw_events[g_ctx.hw_events_count] == NULL) {
299       ERROR(PMU_PLUGIN ": Failed to allocate hw events entry.");
300       return -ENOMEM;
301     }
302 
303     g_ctx.hw_events_count++;
304   }
305 
306   return 0;
307 }
308 
pmu_config(oconfig_item_t * ci)309 static int pmu_config(oconfig_item_t *ci) {
310 
311   DEBUG(PMU_PLUGIN ": %s:%d", __FUNCTION__, __LINE__);
312 
313   for (int i = 0; i < ci->children_num; i++) {
314     int ret = 0;
315     oconfig_item_t *child = ci->children + i;
316 
317     if (strcasecmp("ReportHardwareCacheEvents", child->key) == 0) {
318       ret = cf_util_get_boolean(child, &g_ctx.hw_cache_events);
319     } else if (strcasecmp("ReportKernelPMUEvents", child->key) == 0) {
320       ret = cf_util_get_boolean(child, &g_ctx.kernel_pmu_events);
321     } else if (strcasecmp("EventList", child->key) == 0) {
322       ret = cf_util_get_string_buffer(child, g_ctx.event_list_fn,
323                                       sizeof(g_ctx.event_list_fn));
324     } else if (strcasecmp("HardwareEvents", child->key) == 0) {
325       ret = pmu_config_hw_events(child);
326     } else if (strcasecmp("ReportSoftwareEvents", child->key) == 0) {
327       ret = cf_util_get_boolean(child, &g_ctx.sw_events);
328     } else if (strcasecmp("Cores", child->key) == 0) {
329       ret = config_cores_parse(child, &g_ctx.cores);
330     } else if (strcasecmp("DispatchMultiPmu", child->key) == 0) {
331       ret = cf_util_get_boolean(child, &g_ctx.dispatch_cloned_pmus);
332     } else {
333       ERROR(PMU_PLUGIN ": Unknown configuration parameter \"%s\".", child->key);
334       ret = -1;
335     }
336 
337     if (ret != 0) {
338       DEBUG(PMU_PLUGIN ": %s:%d ret=%d", __FUNCTION__, __LINE__, ret);
339       return ret;
340     }
341   }
342 
343 #if COLLECT_DEBUG
344   pmu_dump_config();
345 #endif
346 
347   return 0;
348 }
349 
pmu_submit_counter(const char * cgroup,const char * event,const uint32_t * event_type,counter_t value,meta_data_t * meta)350 static void pmu_submit_counter(const char *cgroup, const char *event,
351                                const uint32_t *event_type, counter_t value,
352                                meta_data_t *meta) {
353   value_list_t vl = VALUE_LIST_INIT;
354 
355   vl.values = &(value_t){.counter = value};
356   vl.values_len = 1;
357 
358   sstrncpy(vl.plugin, PMU_PLUGIN, sizeof(vl.plugin));
359   sstrncpy(vl.plugin_instance, cgroup, sizeof(vl.plugin_instance));
360   if (meta)
361     vl.meta = meta;
362   sstrncpy(vl.type, "counter", sizeof(vl.type));
363   if (event_type)
364     ssnprintf(vl.type_instance, sizeof(vl.type_instance), "%s:type=%d", event,
365               *event_type);
366   else
367     sstrncpy(vl.type_instance, event, sizeof(vl.type_instance));
368 
369   plugin_dispatch_values(&vl);
370 }
371 
pmu_meta_data_create(const struct efd * efd)372 meta_data_t *pmu_meta_data_create(const struct efd *efd) {
373   meta_data_t *meta = NULL;
374 
375   /* create meta data only if value was scaled */
376   if (efd->val[1] == efd->val[2] || !efd->val[2]) {
377     return NULL;
378   }
379 
380   meta = meta_data_create();
381   if (meta == NULL) {
382     ERROR(PMU_PLUGIN ": meta_data_create failed.");
383     return NULL;
384   }
385 
386   DEBUG(PMU_PLUGIN ": scaled value = [raw]%lu * [enabled]%lu / [running]%lu",
387         efd->val[0], efd->val[1], efd->val[2]);
388   meta_data_add_unsigned_int(meta, "intel_pmu:raw_count", efd->val[0]);
389   meta_data_add_unsigned_int(meta, "intel_pmu:time_enabled", efd->val[1]);
390   meta_data_add_unsigned_int(meta, "intel_pmu:time_running", efd->val[2]);
391 
392   return meta;
393 }
394 
pmu_dispatch_data(void)395 static void pmu_dispatch_data(void) {
396 
397   struct event *e;
398 
399   for (e = g_ctx.event_list->eventlist; e; e = e->next) {
400     const uint32_t *event_type = NULL;
401     if (e->orig && !g_ctx.dispatch_cloned_pmus)
402       continue;
403     if ((e->extra.multi_pmu || e->orig) && g_ctx.dispatch_cloned_pmus)
404       event_type = &e->attr.type;
405 
406     for (size_t i = 0; i < g_ctx.cores.num_cgroups; i++) {
407       core_group_t *cgroup = g_ctx.cores.cgroups + i;
408       uint64_t cgroup_value = 0;
409       int event_enabled_cgroup = 0;
410       meta_data_t *meta = NULL;
411 
412       for (size_t j = 0; j < cgroup->num_cores; j++) {
413         int core = (int)cgroup->cores[j];
414         if (e->efd[core].fd < 0)
415           continue;
416 
417         event_enabled_cgroup++;
418 
419         /* If there are more events than counters, the kernel uses time
420          * multiplexing. With multiplexing, at the end of the run,
421          * the counter is scaled basing on total time enabled vs time running.
422          * final_count = raw_count * time_enabled/time_running
423          */
424         if (e->extra.multi_pmu && !g_ctx.dispatch_cloned_pmus)
425           cgroup_value += event_scaled_value_sum(e, core);
426         else {
427           cgroup_value += event_scaled_value(e, core);
428 
429           /* get meta data with information about scaling */
430           if (cgroup->num_cores == 1)
431             meta = pmu_meta_data_create(&e->efd[core]);
432         }
433       }
434 
435       if (event_enabled_cgroup > 0) {
436 #if COLLECT_DEBUG
437         if (event_type)
438           DEBUG(PMU_PLUGIN ": %s:type=%d/%s = %lu", e->event, *event_type,
439                 cgroup->desc, cgroup_value);
440         else
441           DEBUG(PMU_PLUGIN ": %s/%s = %lu", e->event, cgroup->desc,
442                 cgroup_value);
443 #endif
444         /* dispatch per core group value */
445         pmu_submit_counter(cgroup->desc, e->event, event_type, cgroup_value,
446                            meta);
447         meta_data_destroy(meta);
448       }
449     }
450   }
451 }
452 
pmu_read(user_data_t * ud)453 static int pmu_read(__attribute__((unused)) user_data_t *ud) {
454   int ret;
455   struct event *e;
456 
457   DEBUG(PMU_PLUGIN ": %s:%d", __FUNCTION__, __LINE__);
458 
459   /* read all events only for configured cores */
460   for (e = g_ctx.event_list->eventlist; e; e = e->next) {
461     for (size_t i = 0; i < g_ctx.cores.num_cgroups; i++) {
462       core_group_t *cgroup = g_ctx.cores.cgroups + i;
463       for (size_t j = 0; j < cgroup->num_cores; j++) {
464         int core = (int)cgroup->cores[j];
465         if (e->efd[core].fd < 0)
466           continue;
467 
468         ret = read_event(e, core);
469         if (ret != 0) {
470           ERROR(PMU_PLUGIN ": Failed to read value of %s/%d event.", e->event,
471                 core);
472           return ret;
473         }
474       }
475     }
476   }
477 
478   pmu_dispatch_data();
479 
480   return 0;
481 }
482 
pmu_add_events(struct eventlist * el,uint32_t type,event_info_t * events,size_t count)483 static int pmu_add_events(struct eventlist *el, uint32_t type,
484                           event_info_t *events, size_t count) {
485 
486   for (size_t i = 0; i < count; i++) {
487     /* Allocate memory for event struct that contains array of efd structs
488        for all cores */
489     struct event *e =
490         calloc(1, sizeof(struct event) + sizeof(struct efd) * el->num_cpus);
491     if (e == NULL) {
492       ERROR(PMU_PLUGIN ": Failed to allocate event structure");
493       return -ENOMEM;
494     }
495 
496     e->attr.type = type;
497     e->attr.config = events[i].config;
498     e->attr.size = PERF_ATTR_SIZE_VER0;
499     if (!el->eventlist)
500       el->eventlist = e;
501     if (el->eventlist_last)
502       el->eventlist_last->next = e;
503     el->eventlist_last = e;
504     e->event = strdup(events[i].name);
505   }
506 
507   return 0;
508 }
509 
pmu_add_cloned_pmus(struct eventlist * el,struct event * e)510 static int pmu_add_cloned_pmus(struct eventlist *el, struct event *e) {
511   struct perf_event_attr attr = e->attr;
512   int ret;
513 
514   while ((ret = jevent_next_pmu(&e->extra, &attr)) == 1) {
515     /* Allocate memory for event struct that contains array of efd structs
516        for all cores */
517     struct event *ne =
518         calloc(1, sizeof(struct event) + sizeof(struct efd) * el->num_cpus);
519     if (ne == NULL) {
520       return -ENOMEM;
521     }
522     for (size_t i = 0; i < el->num_cpus; i++)
523       ne->efd[i].fd = -1;
524 
525     ne->attr = attr;
526     ne->orig = e;
527     ne->uncore = e->uncore;
528     e->num_clones++;
529     jevent_copy_extra(&ne->extra, &e->extra);
530 
531     ne->next = NULL;
532     if (!el->eventlist)
533       el->eventlist = ne;
534     if (el->eventlist_last)
535       el->eventlist_last->next = ne;
536     el->eventlist_last = ne;
537     ne->event = strdup(e->event);
538   }
539 
540   if (ret < 0) {
541     ERROR(PMU_PLUGIN ": Cannot find PMU for event %s", e->event);
542     return ret;
543   }
544 
545   return 0;
546 }
547 
pmu_add_hw_events(struct eventlist * el,char ** e,size_t count)548 static int pmu_add_hw_events(struct eventlist *el, char **e, size_t count) {
549 
550   for (size_t i = 0; i < count; i++) {
551 
552     size_t group_events_count = 0;
553 
554     char *events = strdup(e[i]);
555     if (!events)
556       return -1;
557 
558     bool group = strrchr(events, ',') != NULL ? true : false;
559 
560     char *s, *tmp = NULL;
561     for (s = strtok_r(events, ",", &tmp); s; s = strtok_r(NULL, ",", &tmp)) {
562 
563       /* Allocate memory for event struct that contains array of efd structs
564          for all cores */
565       struct event *e =
566           calloc(1, sizeof(struct event) + sizeof(struct efd) * el->num_cpus);
567       if (e == NULL) {
568         free(events);
569         return -ENOMEM;
570       }
571       for (size_t j = 0; j < el->num_cpus; j++)
572         e->efd[j].fd = -1;
573 
574       if (resolve_event_extra(s, &e->attr, &e->extra) != 0) {
575         WARNING(PMU_PLUGIN ": Cannot resolve %s", s);
576         sfree(e);
577         continue;
578       }
579 
580       e->uncore = jevent_pmu_uncore(e->extra.decoded);
581 
582       /* Multiple events parsed in one entry */
583       if (group) {
584         if (e->extra.multi_pmu) {
585           ERROR(PMU_PLUGIN ": Cannot handle multi pmu event %s in a group\n",
586                 s);
587           jevent_free_extra(&e->extra);
588           sfree(e);
589           sfree(events);
590           return -1;
591         }
592         if (group_events_count == 0)
593           /* Mark first added event as group leader */
594           e->group_leader = 1;
595 
596         e->ingroup = 1;
597       }
598 
599       e->next = NULL;
600       if (!el->eventlist)
601         el->eventlist = e;
602       if (el->eventlist_last)
603         el->eventlist_last->next = e;
604       el->eventlist_last = e;
605       e->event = strdup(s);
606 
607       if (e->extra.multi_pmu && pmu_add_cloned_pmus(el, e) != 0)
608         return -1;
609 
610       group_events_count++;
611     }
612 
613     /* Multiple events parsed in one entry */
614     if (group && group_events_count > 0) {
615       /* Mark last added event as group end */
616       el->eventlist_last->end_group = 1;
617     }
618 
619     free(events);
620   }
621 
622   return 0;
623 }
624 
pmu_free_events(struct eventlist * el)625 static void pmu_free_events(struct eventlist *el) {
626 
627   if (el == NULL)
628     return;
629 
630   free_eventlist(el);
631 }
632 
pmu_setup_events(struct eventlist * el,bool measure_all,int measure_pid)633 static int pmu_setup_events(struct eventlist *el, bool measure_all,
634                             int measure_pid) {
635   struct event *e, *leader = NULL;
636   int ret = -1;
637 
638   for (e = el->eventlist; e; e = e->next) {
639 
640     for (size_t i = 0; i < g_ctx.cores.num_cgroups; i++) {
641       core_group_t *cgroup = g_ctx.cores.cgroups + i;
642       for (size_t j = 0; j < cgroup->num_cores; j++) {
643         int core = (int)cgroup->cores[j];
644 
645         if (e->uncore) {
646           bool match = false;
647           for (size_t k = 0; k < el->num_sockets; k++)
648             if (el->socket_cpus[k] == core) {
649               match = true;
650               break;
651             }
652           if (!match)
653             continue;
654         }
655 
656         if (setup_event(e, core, leader, measure_all, measure_pid) < 0) {
657           WARNING(PMU_PLUGIN ": perf event '%s' is not available (cpu=%d).",
658                   e->event, core);
659         } else {
660           /* success if at least one event was set */
661           ret = 0;
662         }
663       }
664     }
665 
666     if (e->group_leader)
667       leader = e;
668     if (e->end_group)
669       leader = NULL;
670   }
671 
672   return ret;
673 }
674 
pmu_init(void)675 static int pmu_init(void) {
676   int ret;
677 
678   DEBUG(PMU_PLUGIN ": %s:%d", __FUNCTION__, __LINE__);
679 
680   g_ctx.event_list = alloc_eventlist();
681   if (g_ctx.event_list == NULL) {
682     ERROR(PMU_PLUGIN ": Failed to allocate event list.");
683     return -ENOMEM;
684   }
685 
686   if (g_ctx.cores.num_cgroups == 0) {
687     ret = config_cores_default(g_ctx.event_list->num_cpus, &g_ctx.cores);
688     if (ret != 0) {
689       ERROR(PMU_PLUGIN ": Failed to set default core groups.");
690       goto init_error;
691     }
692   } else {
693     ret = pmu_validate_cgroups(g_ctx.cores.cgroups, g_ctx.cores.num_cgroups,
694                                g_ctx.event_list->num_cpus);
695     if (ret != 0) {
696       ERROR(PMU_PLUGIN ": Invalid core groups configuration.");
697       goto init_error;
698     }
699   }
700 #if COLLECT_DEBUG
701   pmu_dump_cgroups();
702 #endif
703 
704   if (g_ctx.hw_cache_events) {
705     ret =
706         pmu_add_events(g_ctx.event_list, PERF_TYPE_HW_CACHE, g_hw_cache_events,
707                        STATIC_ARRAY_SIZE(g_hw_cache_events));
708     if (ret != 0) {
709       ERROR(PMU_PLUGIN ": Failed to add hw cache events.");
710       goto init_error;
711     }
712   }
713 
714   if (g_ctx.kernel_pmu_events) {
715     ret = pmu_add_events(g_ctx.event_list, PERF_TYPE_HARDWARE,
716                          g_kernel_pmu_events,
717                          STATIC_ARRAY_SIZE(g_kernel_pmu_events));
718     if (ret != 0) {
719       ERROR(PMU_PLUGIN ": Failed to add kernel PMU events.");
720       goto init_error;
721     }
722   }
723 
724   /* parse events names if config option is present and is not empty */
725   if (g_ctx.hw_events_count) {
726 
727     ret = read_events(g_ctx.event_list_fn);
728     if (ret != 0) {
729       ERROR(PMU_PLUGIN ": Failed to read event list file '%s'.",
730             g_ctx.event_list_fn);
731       return ret;
732     }
733 
734     ret = pmu_add_hw_events(g_ctx.event_list, g_ctx.hw_events,
735                             g_ctx.hw_events_count);
736     if (ret != 0) {
737       ERROR(PMU_PLUGIN ": Failed to add hardware events.");
738       goto init_error;
739     }
740   }
741 
742   if (g_ctx.sw_events) {
743     ret = pmu_add_events(g_ctx.event_list, PERF_TYPE_SOFTWARE, g_sw_events,
744                          STATIC_ARRAY_SIZE(g_sw_events));
745     if (ret != 0) {
746       ERROR(PMU_PLUGIN ": Failed to add software events.");
747       goto init_error;
748     }
749   }
750 
751 #if COLLECT_DEBUG
752   pmu_dump_events();
753 #endif
754 
755   if (g_ctx.event_list->eventlist != NULL) {
756     /* measure all processes */
757     ret = pmu_setup_events(g_ctx.event_list, true, -1);
758     if (ret != 0) {
759       ERROR(PMU_PLUGIN ": Failed to setup perf events for the event list.");
760       goto init_error;
761     }
762   } else {
763     WARNING(PMU_PLUGIN
764             ": Events list is empty. No events were setup for monitoring.");
765   }
766 
767   return 0;
768 
769 init_error:
770 
771   pmu_free_events(g_ctx.event_list);
772   g_ctx.event_list = NULL;
773   for (size_t i = 0; i < g_ctx.hw_events_count; i++) {
774     sfree(g_ctx.hw_events[i]);
775   }
776   sfree(g_ctx.hw_events);
777   g_ctx.hw_events_count = 0;
778 
779   config_cores_cleanup(&g_ctx.cores);
780 
781   return ret;
782 }
783 
pmu_shutdown(void)784 static int pmu_shutdown(void) {
785 
786   DEBUG(PMU_PLUGIN ": %s:%d", __FUNCTION__, __LINE__);
787 
788   pmu_free_events(g_ctx.event_list);
789   g_ctx.event_list = NULL;
790   for (size_t i = 0; i < g_ctx.hw_events_count; i++) {
791     sfree(g_ctx.hw_events[i]);
792   }
793   sfree(g_ctx.hw_events);
794   g_ctx.hw_events_count = 0;
795 
796   config_cores_cleanup(&g_ctx.cores);
797 
798   return 0;
799 }
800 
module_register(void)801 void module_register(void) {
802   plugin_register_init(PMU_PLUGIN, pmu_init);
803   plugin_register_complex_config(PMU_PLUGIN, pmu_config);
804   plugin_register_complex_read(NULL, PMU_PLUGIN, pmu_read, 0, NULL);
805   plugin_register_shutdown(PMU_PLUGIN, pmu_shutdown);
806 }
807