1 /* Linux-dependent part of branch trace support for GDB, and GDBserver.
2 
3    Copyright (C) 2013 Free Software Foundation, Inc.
4 
5    Contributed by Intel Corp. <markus.t.metzger@intel.com>
6 
7    This file is part of GDB.
8 
9    This program is free software; you can redistribute it and/or modify
10    it under the terms of the GNU General Public License as published by
11    the Free Software Foundation; either version 3 of the License, or
12    (at your option) any later version.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
21 
22 #ifdef GDBSERVER
23 #include "server.h"
24 #else
25 #include "defs.h"
26 #endif
27 
28 #include "linux-btrace.h"
29 #include "common-utils.h"
30 #include "gdb_assert.h"
31 #include "regcache.h"
32 #include "gdbthread.h"
33 
34 #if HAVE_LINUX_PERF_EVENT_H
35 
36 #include <errno.h>
37 #include <string.h>
38 #include <stdint.h>
39 #include <unistd.h>
40 #include <sys/syscall.h>
41 #include <sys/mman.h>
42 #include <sys/user.h>
43 #include <sys/ptrace.h>
44 #include <sys/types.h>
45 #include <sys/wait.h>
46 #include <signal.h>
47 
48 /* A branch trace record in perf_event.  */
49 struct perf_event_bts
50 {
51   /* The linear address of the branch source.  */
52   uint64_t from;
53 
54   /* The linear address of the branch destination.  */
55   uint64_t to;
56 };
57 
58 /* A perf_event branch trace sample.  */
59 struct perf_event_sample
60 {
61   /* The perf_event sample header.  */
62   struct perf_event_header header;
63 
64   /* The perf_event branch tracing payload.  */
65   struct perf_event_bts bts;
66 };
67 
68 /* Get the perf_event header.  */
69 
70 static inline volatile struct perf_event_mmap_page *
perf_event_header(struct btrace_target_info * tinfo)71 perf_event_header (struct btrace_target_info* tinfo)
72 {
73   return tinfo->buffer;
74 }
75 
76 /* Get the size of the perf_event mmap buffer.  */
77 
78 static inline size_t
perf_event_mmap_size(const struct btrace_target_info * tinfo)79 perf_event_mmap_size (const struct btrace_target_info *tinfo)
80 {
81   /* The branch trace buffer is preceded by a configuration page.  */
82   return (tinfo->size + 1) * PAGE_SIZE;
83 }
84 
85 /* Get the size of the perf_event buffer.  */
86 
87 static inline size_t
perf_event_buffer_size(struct btrace_target_info * tinfo)88 perf_event_buffer_size (struct btrace_target_info* tinfo)
89 {
90   return tinfo->size * PAGE_SIZE;
91 }
92 
93 /* Get the start address of the perf_event buffer.  */
94 
95 static inline const uint8_t *
perf_event_buffer_begin(struct btrace_target_info * tinfo)96 perf_event_buffer_begin (struct btrace_target_info* tinfo)
97 {
98   return ((const uint8_t *) tinfo->buffer) + PAGE_SIZE;
99 }
100 
101 /* Get the end address of the perf_event buffer.  */
102 
103 static inline const uint8_t *
perf_event_buffer_end(struct btrace_target_info * tinfo)104 perf_event_buffer_end (struct btrace_target_info* tinfo)
105 {
106   return perf_event_buffer_begin (tinfo) + perf_event_buffer_size (tinfo);
107 }
108 
109 /* Check whether an address is in the kernel.  */
110 
111 static inline int
perf_event_is_kernel_addr(const struct btrace_target_info * tinfo,uint64_t addr)112 perf_event_is_kernel_addr (const struct btrace_target_info *tinfo,
113 			   uint64_t addr)
114 {
115   uint64_t mask;
116 
117   /* If we don't know the size of a pointer, we can't check.  Let's assume it's
118      not a kernel address in this case.  */
119   if (tinfo->ptr_bits == 0)
120     return 0;
121 
122   /* A bit mask for the most significant bit in an address.  */
123   mask = (uint64_t) 1 << (tinfo->ptr_bits - 1);
124 
125   /* Check whether the most significant bit in the address is set.  */
126   return (addr & mask) != 0;
127 }
128 
129 /* Check whether a perf event record should be skipped.  */
130 
131 static inline int
perf_event_skip_record(const struct btrace_target_info * tinfo,const struct perf_event_bts * bts)132 perf_event_skip_record (const struct btrace_target_info *tinfo,
133 			const struct perf_event_bts *bts)
134 {
135   /* The hardware may report branches from kernel into user space.  Branches
136      from user into kernel space will be suppressed.  We filter the former to
137      provide a consistent branch trace excluding kernel.  */
138   return perf_event_is_kernel_addr (tinfo, bts->from);
139 }
140 
141 /* Perform a few consistency checks on a perf event sample record.  This is
142    meant to catch cases when we get out of sync with the perf event stream.  */
143 
144 static inline int
perf_event_sample_ok(const struct perf_event_sample * sample)145 perf_event_sample_ok (const struct perf_event_sample *sample)
146 {
147   if (sample->header.type != PERF_RECORD_SAMPLE)
148     return 0;
149 
150   if (sample->header.size != sizeof (*sample))
151     return 0;
152 
153   return 1;
154 }
155 
156 /* Branch trace is collected in a circular buffer [begin; end) as pairs of from
157    and to addresses (plus a header).
158 
159    Start points into that buffer at the next sample position.
160    We read the collected samples backwards from start.
161 
162    While reading the samples, we convert the information into a list of blocks.
163    For two adjacent samples s1 and s2, we form a block b such that b.begin =
164    s1.to and b.end = s2.from.
165 
166    In case the buffer overflows during sampling, one sample may have its lower
167    part at the end and its upper part at the beginning of the buffer.  */
168 
VEC(btrace_block_s)169 static VEC (btrace_block_s) *
170 perf_event_read_bts (struct btrace_target_info* tinfo, const uint8_t *begin,
171 		     const uint8_t *end, const uint8_t *start)
172 {
173   VEC (btrace_block_s) *btrace = NULL;
174   struct perf_event_sample sample;
175   size_t read = 0, size = (end - begin);
176   struct btrace_block block = { 0, 0 };
177   struct regcache *regcache;
178 
179   gdb_assert (begin <= start);
180   gdb_assert (start <= end);
181 
182   /* The first block ends at the current pc.  */
183 #ifdef GDBSERVER
184   regcache = get_thread_regcache (find_thread_ptid (tinfo->ptid), 1);
185 #else
186   regcache = get_thread_regcache (tinfo->ptid);
187 #endif
188   block.end = regcache_read_pc (regcache);
189 
190   /* The buffer may contain a partial record as its last entry (i.e. when the
191      buffer size is not a multiple of the sample size).  */
192   read = sizeof (sample) - 1;
193 
194   for (; read < size; read += sizeof (sample))
195     {
196       const struct perf_event_sample *psample;
197 
198       /* Find the next perf_event sample in a backwards traversal.  */
199       start -= sizeof (sample);
200 
201       /* If we're still inside the buffer, we're done.  */
202       if (begin <= start)
203 	psample = (const struct perf_event_sample *) start;
204       else
205 	{
206 	  int missing;
207 
208 	  /* We're to the left of the ring buffer, we will wrap around and
209 	     reappear at the very right of the ring buffer.  */
210 
211 	  missing = (begin - start);
212 	  start = (end - missing);
213 
214 	  /* If the entire sample is missing, we're done.  */
215 	  if (missing == sizeof (sample))
216 	    psample = (const struct perf_event_sample *) start;
217 	  else
218 	    {
219 	      uint8_t *stack;
220 
221 	      /* The sample wrapped around.  The lower part is at the end and
222 		 the upper part is at the beginning of the buffer.  */
223 	      stack = (uint8_t *) &sample;
224 
225 	      /* Copy the two parts so we have a contiguous sample.  */
226 	      memcpy (stack, start, missing);
227 	      memcpy (stack + missing, begin, sizeof (sample) - missing);
228 
229 	      psample = &sample;
230 	    }
231 	}
232 
233       if (!perf_event_sample_ok (psample))
234 	{
235 	  warning (_("Branch trace may be incomplete."));
236 	  break;
237 	}
238 
239       if (perf_event_skip_record (tinfo, &psample->bts))
240 	continue;
241 
242       /* We found a valid sample, so we can complete the current block.  */
243       block.begin = psample->bts.to;
244 
245       VEC_safe_push (btrace_block_s, btrace, &block);
246 
247       /* Start the next block.  */
248       block.end = psample->bts.from;
249     }
250 
251   return btrace;
252 }
253 
254 /* Check whether the kernel supports branch tracing.  */
255 
256 static int
kernel_supports_btrace(void)257 kernel_supports_btrace (void)
258 {
259   struct perf_event_attr attr;
260   pid_t child, pid;
261   int status, file;
262 
263   errno = 0;
264   child = fork ();
265   switch (child)
266     {
267     case -1:
268       warning (_("test branch tracing: cannot fork: %s."), strerror (errno));
269       return 0;
270 
271     case 0:
272       status = ptrace (PTRACE_TRACEME, 0, NULL, NULL);
273       if (status != 0)
274 	{
275 	  warning (_("test branch tracing: cannot PTRACE_TRACEME: %s."),
276 		   strerror (errno));
277 	  _exit (1);
278 	}
279 
280       status = raise (SIGTRAP);
281       if (status != 0)
282 	{
283 	  warning (_("test branch tracing: cannot raise SIGTRAP: %s."),
284 		   strerror (errno));
285 	  _exit (1);
286 	}
287 
288       _exit (1);
289 
290     default:
291       pid = waitpid (child, &status, 0);
292       if (pid != child)
293 	{
294 	  warning (_("test branch tracing: bad pid %ld, error: %s."),
295 		   (long) pid, strerror (errno));
296 	  return 0;
297 	}
298 
299       if (!WIFSTOPPED (status))
300 	{
301 	  warning (_("test branch tracing: expected stop. status: %d."),
302 		   status);
303 	  return 0;
304 	}
305 
306       memset (&attr, 0, sizeof (attr));
307 
308       attr.type = PERF_TYPE_HARDWARE;
309       attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
310       attr.sample_period = 1;
311       attr.sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_ADDR;
312       attr.exclude_kernel = 1;
313       attr.exclude_hv = 1;
314       attr.exclude_idle = 1;
315 
316       file = syscall (SYS_perf_event_open, &attr, child, -1, -1, 0);
317       if (file >= 0)
318 	close (file);
319 
320       kill (child, SIGKILL);
321       ptrace (PTRACE_KILL, child, NULL, NULL);
322 
323       pid = waitpid (child, &status, 0);
324       if (pid != child)
325 	{
326 	  warning (_("test branch tracing: bad pid %ld, error: %s."),
327 		   (long) pid, strerror (errno));
328 	  if (!WIFSIGNALED (status))
329 	    warning (_("test branch tracing: expected killed. status: %d."),
330 		     status);
331 	}
332 
333       return (file >= 0);
334     }
335 }
336 
337 /* Check whether an Intel cpu supports branch tracing.  */
338 
339 static int
intel_supports_btrace(void)340 intel_supports_btrace (void)
341 {
342 #if defined __i386__ || defined __x86_64__
343     unsigned int cpuid, model, family;
344 
345     __asm__ __volatile__ ("movl   $1, %%eax;"
346 			  "cpuid;"
347 			  : "=a" (cpuid)
348 			  :: "%ebx", "%ecx", "%edx");
349 
350     family = (cpuid >> 8) & 0xf;
351     model = (cpuid >> 4) & 0xf;
352 
353     switch (family)
354       {
355       case 0x6:
356 	model += (cpuid >> 12) & 0xf0;
357 
358 	switch (model)
359 	  {
360 	  case 0x1a: /* Nehalem */
361 	  case 0x1f:
362 	  case 0x1e:
363 	  case 0x2e:
364 	  case 0x25: /* Westmere */
365 	  case 0x2c:
366 	  case 0x2f:
367 	  case 0x2a: /* Sandy Bridge */
368 	  case 0x2d:
369 	  case 0x3a: /* Ivy Bridge */
370 
371 	    /* AAJ122: LBR, BTM, or BTS records may have incorrect branch
372 	       "from" information afer an EIST transition, T-states, C1E, or
373 	       Adaptive Thermal Throttling.  */
374 	    return 0;
375 	  }
376       }
377 
378   return 1;
379 
380 #else /* !defined __i386__ && !defined __x86_64__ */
381 
382   return 0;
383 
384 #endif /* !defined __i386__ && !defined __x86_64__ */
385 }
386 
387 /* Check whether the cpu supports branch tracing.  */
388 
389 static int
cpu_supports_btrace(void)390 cpu_supports_btrace (void)
391 {
392 #if defined __i386__ || defined __x86_64__
393   char vendor[13];
394 
395   __asm__ __volatile__ ("xorl   %%ebx, %%ebx;"
396 			"xorl   %%ecx, %%ecx;"
397 			"xorl   %%edx, %%edx;"
398 			"movl   $0,    %%eax;"
399 			"cpuid;"
400 			"movl   %%ebx,  %0;"
401 			"movl   %%edx,  %1;"
402 			"movl   %%ecx,  %2;"
403 			: "=m" (vendor[0]),
404 			  "=m" (vendor[4]),
405 			  "=m" (vendor[8])
406 			:
407 			: "%eax", "%ebx", "%ecx", "%edx");
408   vendor[12] = '\0';
409 
410   if (strcmp (vendor, "GenuineIntel") == 0)
411     return intel_supports_btrace ();
412 
413   /* Don't know about others.  Let's assume they do.  */
414   return 1;
415 
416 #else /* !defined __i386__ && !defined __x86_64__ */
417 
418   return 0;
419 
420 #endif /* !defined __i386__ && !defined __x86_64__ */
421 }
422 
423 /* See linux-btrace.h.  */
424 
425 int
linux_supports_btrace(void)426 linux_supports_btrace (void)
427 {
428   static int cached;
429 
430   if (cached == 0)
431     {
432       if (!kernel_supports_btrace ())
433 	cached = -1;
434       else if (!cpu_supports_btrace ())
435 	cached = -1;
436       else
437 	cached = 1;
438     }
439 
440   return cached > 0;
441 }
442 
443 /* See linux-btrace.h.  */
444 
445 struct btrace_target_info *
linux_enable_btrace(ptid_t ptid)446 linux_enable_btrace (ptid_t ptid)
447 {
448   struct btrace_target_info *tinfo;
449   int pid;
450 
451   tinfo = xzalloc (sizeof (*tinfo));
452   tinfo->ptid = ptid;
453 
454   tinfo->attr.size = sizeof (tinfo->attr);
455   tinfo->attr.type = PERF_TYPE_HARDWARE;
456   tinfo->attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
457   tinfo->attr.sample_period = 1;
458 
459   /* We sample from and to address.  */
460   tinfo->attr.sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_ADDR;
461 
462   tinfo->attr.exclude_kernel = 1;
463   tinfo->attr.exclude_hv = 1;
464   tinfo->attr.exclude_idle = 1;
465 
466   tinfo->ptr_bits = 0;
467 
468   pid = ptid_get_lwp (ptid);
469   if (pid == 0)
470     pid = ptid_get_pid (ptid);
471 
472   errno = 0;
473   tinfo->file = syscall (SYS_perf_event_open, &tinfo->attr, pid, -1, -1, 0);
474   if (tinfo->file < 0)
475     goto err;
476 
477   /* We hard-code the trace buffer size.
478      At some later time, we should make this configurable.  */
479   tinfo->size = 1;
480   tinfo->buffer = mmap (NULL, perf_event_mmap_size (tinfo),
481 			PROT_READ, MAP_SHARED, tinfo->file, 0);
482   if (tinfo->buffer == MAP_FAILED)
483     goto err_file;
484 
485   return tinfo;
486 
487  err_file:
488   close (tinfo->file);
489 
490  err:
491   xfree (tinfo);
492   return NULL;
493 }
494 
495 /* See linux-btrace.h.  */
496 
497 int
linux_disable_btrace(struct btrace_target_info * tinfo)498 linux_disable_btrace (struct btrace_target_info *tinfo)
499 {
500   int errcode;
501 
502   errno = 0;
503   errcode = munmap (tinfo->buffer, perf_event_mmap_size (tinfo));
504   if (errcode != 0)
505     return errno;
506 
507   close (tinfo->file);
508   xfree (tinfo);
509 
510   return 0;
511 }
512 
513 /* Check whether the branch trace has changed.  */
514 
515 static int
linux_btrace_has_changed(struct btrace_target_info * tinfo)516 linux_btrace_has_changed (struct btrace_target_info *tinfo)
517 {
518   volatile struct perf_event_mmap_page *header = perf_event_header (tinfo);
519 
520   return header->data_head != tinfo->data_head;
521 }
522 
523 /* See linux-btrace.h.  */
524 
VEC(btrace_block_s)525 VEC (btrace_block_s) *
526 linux_read_btrace (struct btrace_target_info *tinfo,
527 		   enum btrace_read_type type)
528 {
529   VEC (btrace_block_s) *btrace = NULL;
530   volatile struct perf_event_mmap_page *header;
531   const uint8_t *begin, *end, *start;
532   unsigned long data_head, retries = 5;
533   size_t buffer_size;
534 
535   if (type == btrace_read_new && !linux_btrace_has_changed (tinfo))
536     return NULL;
537 
538   header = perf_event_header (tinfo);
539   buffer_size = perf_event_buffer_size (tinfo);
540 
541   /* We may need to retry reading the trace.  See below.  */
542   while (retries--)
543     {
544       data_head = header->data_head;
545 
546       /* If there's new trace, let's read it.  */
547       if (data_head != tinfo->data_head)
548 	{
549 	  /* Data_head keeps growing; the buffer itself is circular.  */
550 	  begin = perf_event_buffer_begin (tinfo);
551 	  start = begin + data_head % buffer_size;
552 
553 	  if (data_head <= buffer_size)
554 	    end = start;
555 	  else
556 	    end = perf_event_buffer_end (tinfo);
557 
558 	  btrace = perf_event_read_bts (tinfo, begin, end, start);
559 	}
560 
561       /* The stopping thread notifies its ptracer before it is scheduled out.
562 	 On multi-core systems, the debugger might therefore run while the
563 	 kernel might be writing the last branch trace records.
564 
565 	 Let's check whether the data head moved while we read the trace.  */
566       if (data_head == header->data_head)
567 	break;
568     }
569 
570   tinfo->data_head = data_head;
571 
572   return btrace;
573 }
574 
575 #else /* !HAVE_LINUX_PERF_EVENT_H */
576 
577 /* See linux-btrace.h.  */
578 
579 int
linux_supports_btrace(void)580 linux_supports_btrace (void)
581 {
582   return 0;
583 }
584 
585 /* See linux-btrace.h.  */
586 
587 struct btrace_target_info *
linux_enable_btrace(ptid_t ptid)588 linux_enable_btrace (ptid_t ptid)
589 {
590   return NULL;
591 }
592 
593 /* See linux-btrace.h.  */
594 
595 int
linux_disable_btrace(struct btrace_target_info * tinfo)596 linux_disable_btrace (struct btrace_target_info *tinfo)
597 {
598   return ENOSYS;
599 }
600 
601 /* See linux-btrace.h.  */
602 
VEC(btrace_block_s)603 VEC (btrace_block_s) *
604 linux_read_btrace (struct btrace_target_info *tinfo,
605 		   enum btrace_read_type type)
606 {
607   return NULL;
608 }
609 
610 #endif /* !HAVE_LINUX_PERF_EVENT_H */
611