1 /*
2 * Copyright (c) 2012,2013,2018 Intel Corporation
3 * Author: Andi Kleen
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that: (1) source code distributions
7 * retain the above copyright notice and this paragraph in its entirety, (2)
8 * distributions including binary code include the above copyright notice and
9 * this paragraph in its entirety in the documentation or other materials
10 * provided with the distribution
11 *
12 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
13 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
14 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
15 */
16
17 #include "config.h"
18
19 /* Ring 3 RDPMC support */
20 #include <unistd.h>
21 #include <stdio.h>
22 #include <sys/mman.h>
23 #include <sys/fcntl.h>
24 #include <linux/perf_event.h>
25 #include <stdint.h>
26 #include <stdlib.h>
27 #include <sys/syscall.h>
28
29 #include <rdma/fi_errno.h>
30
31
32 /**
33 * DOC: Ring 3 counting for CPU performance counters
34 *
35 * This library allows accessing CPU performance counters from ring 3
36 * using the perf_events subsystem. This is useful to measure specific
37 * parts of programs (e.g. excluding initialization code)
38 *
39 * Requires a Linux 3.3+ kernel
40 */
41
42 #include "linux/rdpmc.h"
43
44 typedef unsigned long long u64;
45
46 #define rmb() asm volatile("" ::: "memory")
47
48
perf_event_open(struct perf_event_attr * attr,pid_t pid,int cpu,int group_fd,unsigned long flags)49 static int perf_event_open(struct perf_event_attr *attr, pid_t pid, int cpu,
50 int group_fd, unsigned long flags)
51 {
52 return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
53 }
54
55 /**
56 * rdpmc_open - initialize a simple ring 3 readable performance counter
57 * @counter: Raw event descriptor (UUEE UU unit mask EE event)
58 * @ctx: Pointer to struct &rdpmc_ctx that is initialized
59 *
60 * The counter will be set up to count CPU events excluding the kernel.
61 * Must be called for each thread using the counter.
62 * The caller must make sure counter is suitable for the running CPU.
63 * Only works in 3.3+ kernels.
64 * Must be closed with rdpmc_close()
65 */
66
rdpmc_open(unsigned counter,struct rdpmc_ctx * ctx)67 int rdpmc_open(unsigned counter, struct rdpmc_ctx *ctx)
68 {
69 struct perf_event_attr attr = {
70 .type = counter > 10 ? PERF_TYPE_RAW : PERF_TYPE_HARDWARE,
71 .size = PERF_ATTR_SIZE_VER0,
72 .config = counter,
73 .sample_type = PERF_SAMPLE_READ,
74 .exclude_kernel = 1,
75 };
76 return rdpmc_open_attr(&attr, ctx, NULL);
77 }
78
79 /**
80 * rdpmc_open_attr - initialize a raw ring 3 readable performance counter
81 * @attr: perf struct %perf_event_attr for the counter
82 * @ctx: Pointer to struct %rdpmc_ctx that is initialized.
83 * @leader_ctx: context of group leader or NULL
84 *
85 * This allows more flexible setup with a custom &perf_event_attr.
86 * For simple uses rdpmc_open() should be used instead.
87 * Must be called for each thread using the counter.
88 * Must be closed with rdpmc_close()
89 */
rdpmc_open_attr(struct perf_event_attr * attr,struct rdpmc_ctx * ctx,struct rdpmc_ctx * leader_ctx)90 int rdpmc_open_attr(struct perf_event_attr *attr, struct rdpmc_ctx *ctx,
91 struct rdpmc_ctx *leader_ctx)
92 {
93 ctx->fd = perf_event_open(attr, 0, -1,
94 leader_ctx ? leader_ctx->fd : -1, 0);
95 if (ctx->fd < 0) {
96 perror("perf_event_open");
97 return -1;
98 }
99 ctx->buf = mmap(NULL, sysconf(_SC_PAGESIZE), PROT_READ, MAP_SHARED, ctx->fd, 0);
100 if (ctx->buf == MAP_FAILED) {
101 close(ctx->fd);
102 perror("mmap on perf fd");
103 return -1;
104 }
105 /* Not sure why this happens? */
106 if (ctx->buf->index == 0) {
107 munmap(ctx->buf, sysconf(_SC_PAGESIZE));
108 close(ctx->fd);
109 return -1;
110 }
111 return 0;
112 }
113
114 /**
115 * rdpmc_close - free a ring 3 readable performance counter
116 * @ctx: Pointer to &rdpmc_ctx context.
117 *
118 * Must be called by each thread for each context it initialized.
119 */
rdpmc_close(struct rdpmc_ctx * ctx)120 void rdpmc_close(struct rdpmc_ctx *ctx)
121 {
122 close(ctx->fd);
123 munmap(ctx->buf, sysconf(_SC_PAGESIZE));
124 }
125
126 /**
127 * rdpmc_read - read a ring 3 readable performance counter
128 * @ctx: Pointer to initialized &rdpmc_ctx structure.
129 *
130 * Read the current value of a running performance counter.
131 * This should only be called from the same thread/process as opened
132 * the context. For new threads please create a new context.
133 */
rdpmc_read(struct rdpmc_ctx * ctx)134 unsigned long long rdpmc_read(struct rdpmc_ctx *ctx)
135 {
136 u64 val;
137 unsigned seq;
138 u64 offset;
139 typeof (ctx->buf) buf = ctx->buf;
140 unsigned index;
141
142 do {
143 seq = buf->lock;
144 rmb();
145 index = buf->index;
146 offset = buf->offset;
147 if (index == 0) /* rdpmc not allowed */
148 return offset;
149 val = __builtin_ia32_rdpmc(index - 1);
150 rmb();
151 } while (buf->lock != seq);
152 return val + offset;
153 }
154
155
rdpmc_hw_id(uint32_t cntr_id)156 static uint64_t rdpmc_hw_id(uint32_t cntr_id)
157 {
158 switch (cntr_id) {
159 case OFI_PMC_CPU_CYCLES:
160 return PERF_COUNT_HW_CPU_CYCLES;
161 case OFI_PMC_CPU_INSTR:
162 return PERF_COUNT_HW_INSTRUCTIONS;
163 default:
164 return ~0;
165 }
166 }
167
rdpmc_cache_id(uint32_t cntr_id,uint32_t flags)168 static uint64_t rdpmc_cache_id(uint32_t cntr_id, uint32_t flags)
169 {
170 /* TODO */
171 return ~0;
172 }
173
rdpmc_sw_id(uint32_t cntr_id)174 static uint64_t rdpmc_sw_id(uint32_t cntr_id)
175 {
176 switch (cntr_id) {
177 case OFI_PMC_OS_PAGE_FAULT:
178 return PERF_COUNT_SW_PAGE_FAULTS;
179 default:
180 return ~0;
181 }
182 }
183
ofi_pmu_open(struct ofi_perf_ctx ** ctx,enum ofi_perf_domain domain,uint32_t cntr_id,uint32_t flags)184 int ofi_pmu_open(struct ofi_perf_ctx **ctx, enum ofi_perf_domain domain,
185 uint32_t cntr_id, uint32_t flags)
186 {
187 struct perf_event_attr attr = {
188 .size = PERF_ATTR_SIZE_VER0,
189 .sample_type = PERF_SAMPLE_READ,
190 .exclude_kernel = 1,
191 };
192 int ret;
193
194 *ctx = calloc(1, sizeof **ctx);
195 if (!*ctx)
196 return -FI_ENOMEM;
197
198 switch(domain) {
199 case OFI_PMU_CPU:
200 attr.type = PERF_TYPE_HARDWARE;
201 attr.config = rdpmc_hw_id(cntr_id);
202 break;
203 case OFI_PMU_CACHE:
204 attr.type = PERF_TYPE_HW_CACHE;
205 attr.config = rdpmc_cache_id(cntr_id, flags);
206 break;
207 case OFI_PMU_OS:
208 attr.type = PERF_TYPE_SOFTWARE;
209 attr.config = rdpmc_sw_id(cntr_id);
210 break;
211 default:
212 return -FI_ENOSYS;
213 }
214
215 if (attr.config == ~0)
216 return -FI_ENOSYS;
217
218 ret = rdpmc_open_attr(&attr, &(*ctx)->ctx, NULL);
219 return ret ? -errno : 0;
220 }
221
ofi_pmu_read(struct ofi_perf_ctx * ctx)222 inline uint64_t ofi_pmu_read(struct ofi_perf_ctx *ctx)
223 {
224 return rdpmc_read(&ctx->ctx);
225 }
226
ofi_pmu_close(struct ofi_perf_ctx * ctx)227 inline void ofi_pmu_close(struct ofi_perf_ctx *ctx)
228 {
229 rdpmc_close(&ctx->ctx);
230 free(ctx);
231 }
232