1 /* Standard libdwfl callbacks for debugging the running Linux kernel.
2    Copyright (C) 2005-2011, 2013, 2014, 2015 Red Hat, Inc.
3    This file is part of elfutils.
4 
5    This file is free software; you can redistribute it and/or modify
6    it under the terms of either
7 
8      * the GNU Lesser General Public License as published by the Free
9        Software Foundation; either version 3 of the License, or (at
10        your option) any later version
11 
12    or
13 
14      * the GNU General Public License as published by the Free
15        Software Foundation; either version 2 of the License, or (at
16        your option) any later version
17 
18    or both in parallel, as here.
19 
20    elfutils is distributed in the hope that it will be useful, but
21    WITHOUT ANY WARRANTY; without even the implied warranty of
22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23    General Public License for more details.
24 
25    You should have received copies of the GNU General Public License and
26    the GNU Lesser General Public License along with this program.  If
27    not, see <http://www.gnu.org/licenses/>.  */
28 
29 /* In case we have a bad fts we include this before config.h because it
30    can't handle _FILE_OFFSET_BITS.
31    Everything we need here is fine if its declarations just come first.
32    Also, include sys/types.h before fts. On some systems fts.h is not self
33    contained. */
34 #ifdef BAD_FTS
35   #include <sys/types.h>
36   #include <fts.h>
37 #endif
38 
39 #include <config.h>
40 #include <system.h>
41 
42 #include "libelfP.h"
43 #include "libdwflP.h"
44 #include <inttypes.h>
45 #include <errno.h>
46 #include <stdio.h>
47 #include <stdio_ext.h>
48 #include <string.h>
49 #include <stdlib.h>
50 #include <sys/utsname.h>
51 #include <fcntl.h>
52 #include <unistd.h>
53 
54 /* If fts.h is included before config.h, its indirect inclusions may not
55    give us the right LFS aliases of these functions, so map them manually.  */
56 #ifdef BAD_FTS
57   #ifdef _FILE_OFFSET_BITS
58     #define open open64
59     #define fopen fopen64
60   #endif
61 #else
62   #include <sys/types.h>
63   #include <fts.h>
64 #endif
65 
66 
67 #define KERNEL_MODNAME	"kernel"
68 
69 #define MODULEDIRFMT	"/lib/modules/%s"
70 
71 #define KNOTESFILE	"/sys/kernel/notes"
72 #define	MODNOTESFMT	"/sys/module/%s/notes"
73 #define KSYMSFILE	"/proc/kallsyms"
74 #define MODULELIST	"/proc/modules"
75 #define	SECADDRDIRFMT	"/sys/module/%s/sections/"
76 #define MODULE_SECT_NAME_LEN 32	/* Minimum any linux/module.h has had.  */
77 
78 
79 static const char *vmlinux_suffixes[] =
80   {
81     ".gz",
82 #ifdef USE_BZLIB
83     ".bz2",
84 #endif
85 #ifdef USE_LZMA
86     ".xz",
87 #endif
88   };
89 
90 /* Try to open the given file as it is or under the debuginfo directory.  */
91 static int
try_kernel_name(Dwfl * dwfl,char ** fname,bool try_debug)92 try_kernel_name (Dwfl *dwfl, char **fname, bool try_debug)
93 {
94   if (*fname == NULL)
95     return -1;
96 
97   /* Don't bother trying *FNAME itself here if the path will cause it to be
98      tried because we give its own basename as DEBUGLINK_FILE.  */
99   int fd = ((((dwfl->callbacks->debuginfo_path
100 	       ? *dwfl->callbacks->debuginfo_path : NULL)
101 	      ?: DEFAULT_DEBUGINFO_PATH)[0] == ':') ? -1
102 	    : TEMP_FAILURE_RETRY (open (*fname, O_RDONLY)));
103 
104   if (fd < 0)
105     {
106       Dwfl_Module fakemod = { .dwfl = dwfl };
107 
108       if (try_debug)
109 	/* Passing NULL for DEBUGLINK_FILE searches for both the basenamer
110 	   "vmlinux" and the default of basename + ".debug", to look for
111 	   "vmlinux.debug" files.  */
112 	fd = INTUSE(dwfl_standard_find_debuginfo) (&fakemod, NULL, NULL, 0,
113 						   *fname, NULL, 0,
114 						   &fakemod.debug.name);
115       else
116 	/* Try the file's unadorned basename as DEBUGLINK_FILE,
117 	   to look only for "vmlinux" files.  */
118 	fd = INTUSE(dwfl_standard_find_debuginfo) (&fakemod, NULL, NULL, 0,
119 						   *fname, basename (*fname),
120 						   0, &fakemod.debug.name);
121 
122       if (fakemod.debug.name != NULL)
123 	{
124 	  free (*fname);
125 	  *fname = fakemod.debug.name;
126 	}
127     }
128 
129   if (fd < 0)
130     for (size_t i = 0;
131 	 i < sizeof vmlinux_suffixes / sizeof vmlinux_suffixes[0];
132 	 ++i)
133       {
134 	char *zname;
135 	if (asprintf (&zname, "%s%s", *fname, vmlinux_suffixes[i]) > 0)
136 	  {
137 	    fd = TEMP_FAILURE_RETRY (open (zname, O_RDONLY));
138 	    if (fd < 0)
139 	      free (zname);
140 	    else
141 	      {
142 		free (*fname);
143 		*fname = zname;
144 	      }
145 	  }
146       }
147 
148   if (fd < 0)
149     {
150       free (*fname);
151       *fname = NULL;
152     }
153 
154   return fd;
155 }
156 
157 static inline const char *
kernel_release(void)158 kernel_release (void)
159 {
160 #ifdef __linux__
161   /* Cache the `uname -r` string we'll use.  */
162   static struct utsname utsname;
163   if (utsname.release[0] == '\0' && uname (&utsname) != 0)
164     return NULL;
165   return utsname.release;
166 #else
167   /* Used for finding the running linux kernel, which isn't supported
168      on non-linux kernel systems.  */
169   errno = ENOTSUP;
170   return NULL;
171 #endif
172 }
173 
174 static int
find_kernel_elf(Dwfl * dwfl,const char * release,char ** fname)175 find_kernel_elf (Dwfl *dwfl, const char *release, char **fname)
176 {
177   /* First try to find an uncompressed vmlinux image.  Possibly
178      including debuginfo.  */
179   if (release == NULL
180       || ((release[0] == '/'
181 	   ? asprintf (fname, "%s/vmlinux", release)
182 	   : asprintf (fname, "/boot/vmlinux-%s", release)) < 0))
183     return -1;
184 
185   int fd = try_kernel_name (dwfl, fname, true);
186   if (fd < 0 && release[0] != '/')
187     {
188       free (*fname);
189       if (asprintf (fname, MODULEDIRFMT "/vmlinux", release) < 0)
190 	return -1;
191       fd = try_kernel_name (dwfl, fname, true);
192     }
193 
194   /* There might be a compressed vmlinuz image.  Probably without
195      debuginfo, but try to find it under the debug path also, just in
196      case.  */
197   if (fd < 0)
198     {
199       free (*fname);
200       if ((release[0] == '/'
201            ? asprintf (fname, "%s/vmlinuz", release)
202            : asprintf (fname, "/boot/vmlinuz-%s", release)) < 0)
203         return -1;
204 
205       fd = try_kernel_name (dwfl, fname, true);
206       if (fd < 0 && release[0] != '/')
207 	{
208 	  free (*fname);
209 	  if (asprintf (fname, MODULEDIRFMT "/vmlinuz", release) < 0)
210 	    return -1;
211 	  fd = try_kernel_name (dwfl, fname, true);
212 	}
213     }
214 
215   return fd;
216 }
217 
218 static int
get_release(Dwfl * dwfl,const char ** release)219 get_release (Dwfl *dwfl, const char **release)
220 {
221   if (dwfl == NULL)
222     return -1;
223 
224   const char *release_string = release == NULL ? NULL : *release;
225   if (release_string == NULL)
226     {
227       release_string = kernel_release ();
228       if (release_string == NULL)
229 	return errno;
230       if (release != NULL)
231 	*release = release_string;
232     }
233 
234   return 0;
235 }
236 
237 static int
report_kernel(Dwfl * dwfl,const char ** release,int (* predicate)(const char * module,const char * file))238 report_kernel (Dwfl *dwfl, const char **release,
239 	       int (*predicate) (const char *module, const char *file))
240 {
241   int result = get_release (dwfl, release);
242   if (unlikely (result != 0))
243     return result;
244 
245   if (release == NULL || *release == NULL)
246     return EINVAL;
247 
248   char *fname;
249   int fd = find_kernel_elf (dwfl, *release, &fname);
250 
251   if (fd < 0)
252     result = ((predicate != NULL && !(*predicate) (KERNEL_MODNAME, NULL))
253 	      ? 0 : errno ?: ENOENT);
254   else
255     {
256       bool report = true;
257 
258       if (predicate != NULL)
259 	{
260 	  /* Let the predicate decide whether to use this one.  */
261 	  int want = (*predicate) (KERNEL_MODNAME, fname);
262 	  if (want < 0)
263 	    result = errno;
264 	  report = want > 0;
265 	}
266 
267       if (report)
268 	{
269 	  /* Note that on some architectures (e.g. x86_64) the vmlinux
270 	     is ET_EXEC, while on others (e.g. ppc64) it is ET_DYN.
271 	     In both cases the phdr p_vaddr load address will be non-zero.
272 	     We want the image to be placed as if it was ET_DYN, so
273 	     pass true for add_p_vaddr which will do the right thing
274 	     (in combination with a zero base) in either case.  */
275 	  Dwfl_Module *mod = INTUSE(dwfl_report_elf) (dwfl, KERNEL_MODNAME,
276 						      fname, fd, 0, true);
277 	  if (mod == NULL)
278 	    result = -1;
279 	  else
280 	    /* The kernel is ET_EXEC, but always treat it as relocatable.  */
281 	    mod->e_type = ET_DYN;
282 	}
283 
284       free (fname);
285 
286       if (!report || result < 0)
287 	close (fd);
288     }
289 
290   return result;
291 }
292 
293 /* Look for a kernel debug archive.  If we find one, report all its modules.
294    If not, return ENOENT.  */
295 static int
report_kernel_archive(Dwfl * dwfl,const char ** release,int (* predicate)(const char * module,const char * file))296 report_kernel_archive (Dwfl *dwfl, const char **release,
297 		       int (*predicate) (const char *module, const char *file))
298 {
299   int result = get_release (dwfl, release);
300   if (unlikely (result != 0))
301     return result;
302 
303   if (release == NULL || *release == NULL)
304     return EINVAL;
305 
306   char *archive;
307   int res = (((*release)[0] == '/')
308 	     ? asprintf (&archive, "%s/debug.a", *release)
309 	     : asprintf (&archive, MODULEDIRFMT "/debug.a", *release));
310   if (unlikely (res < 0))
311     return ENOMEM;
312 
313   int fd = try_kernel_name (dwfl, &archive, false);
314   if (fd < 0)
315     result = errno ?: ENOENT;
316   else
317     {
318       /* We have the archive file open!  */
319       Dwfl_Module *last = __libdwfl_report_offline (dwfl, NULL, archive, fd,
320 						    true, predicate);
321       if (unlikely (last == NULL))
322 	result = -1;
323       else
324 	{
325 	  /* Find the kernel and move it to the head of the list.  */
326 	  Dwfl_Module **tailp = &dwfl->modulelist, **prevp = tailp;
327 	  for (Dwfl_Module *m = *prevp; m != NULL; m = *(prevp = &m->next))
328 	    if (!m->gc && m->e_type != ET_REL && !strcmp (m->name, "kernel"))
329 	      {
330 		*prevp = m->next;
331 		m->next = *tailp;
332 		*tailp = m;
333 		break;
334 	      }
335 	}
336     }
337 
338   free (archive);
339   return result;
340 }
341 
342 static size_t
check_suffix(const FTSENT * f,size_t namelen)343 check_suffix (const FTSENT *f, size_t namelen)
344 {
345 #define TRY(sfx)							\
346   if ((namelen ? f->fts_namelen == namelen + sizeof sfx - 1		\
347        : f->fts_namelen >= sizeof sfx)					\
348       && !memcmp (f->fts_name + f->fts_namelen - (sizeof sfx - 1),	\
349 		  sfx, sizeof sfx))					\
350     return sizeof sfx - 1
351 
352   TRY (".ko");
353   TRY (".ko.gz");
354 #if USE_BZLIB
355   TRY (".ko.bz2");
356 #endif
357 #if USE_LZMA
358   TRY (".ko.xz");
359 #endif
360 
361   return 0;
362 
363 #undef	TRY
364 }
365 
366 /* Report a kernel and all its modules found on disk, for offline use.
367    If RELEASE starts with '/', it names a directory to look in;
368    if not, it names a directory to find under /lib/modules/;
369    if null, /lib/modules/`uname -r` is used.
370    Returns zero on success, -1 if dwfl_report_module failed,
371    or an errno code if finding the files on disk failed.  */
372 int
dwfl_linux_kernel_report_offline(Dwfl * dwfl,const char * release,int (* predicate)(const char * module,const char * file))373 dwfl_linux_kernel_report_offline (Dwfl *dwfl, const char *release,
374 				  int (*predicate) (const char *module,
375 						    const char *file))
376 {
377   int result = report_kernel_archive (dwfl, &release, predicate);
378   if (result != ENOENT)
379     return result;
380 
381   /* First report the kernel.  */
382   result = report_kernel (dwfl, &release, predicate);
383   if (result == 0)
384     {
385       /* Do "find /lib/modules/RELEASE -name *.ko".  */
386 
387       char *modulesdir[] = { NULL, NULL };
388       if (release[0] == '/')
389 	modulesdir[0] = (char *) release;
390       else
391 	{
392 	  if (asprintf (&modulesdir[0], MODULEDIRFMT, release) < 0)
393 	    return errno;
394 	}
395 
396       FTS *fts = fts_open (modulesdir, FTS_NOSTAT | FTS_LOGICAL, NULL);
397       if (modulesdir[0] == (char *) release)
398 	modulesdir[0] = NULL;
399       if (fts == NULL)
400 	{
401 	  free (modulesdir[0]);
402 	  return errno;
403 	}
404 
405       FTSENT *f;
406       while ((f = fts_read (fts)) != NULL)
407 	{
408 	  /* Skip a "source" subtree, which tends to be large.
409 	     This insane hard-coding of names is what depmod does too.  */
410 	  if (f->fts_namelen == sizeof "source" - 1
411 	      && !strcmp (f->fts_name, "source"))
412 	    {
413 	      fts_set (fts, f, FTS_SKIP);
414 	      continue;
415 	    }
416 
417 	  switch (f->fts_info)
418 	    {
419 	    case FTS_F:
420 	    case FTS_SL:
421 	    case FTS_NSOK:;
422 	      /* See if this file name matches "*.ko".  */
423 	      const size_t suffix = check_suffix (f, 0);
424 	      if (suffix)
425 		{
426 		  /* We have a .ko file to report.  Following the algorithm
427 		     by which the kernel makefiles set KBUILD_MODNAME, we
428 		     replace all ',' or '-' with '_' in the file name and
429 		     call that the module name.  Modules could well be
430 		     built using different embedded names than their file
431 		     names.  To handle that, we would have to look at the
432 		     __this_module.name contents in the module's text.  */
433 
434 		  char *name = strndup (f->fts_name, f->fts_namelen - suffix);
435 		  if (unlikely (name == NULL))
436 		    {
437 		      __libdwfl_seterrno (DWFL_E_NOMEM);
438 		      result = -1;
439 		      break;
440 		    }
441 		  for (size_t i = 0; i < f->fts_namelen - suffix; ++i)
442 		    if (name[i] == '-' || name[i] == ',')
443 		      name[i] = '_';
444 
445 		  if (predicate != NULL)
446 		    {
447 		      /* Let the predicate decide whether to use this one.  */
448 		      int want = (*predicate) (name, f->fts_path);
449 		      if (want < 0)
450 			{
451 			  result = -1;
452 			  free (name);
453 			  break;
454 			}
455 		      if (!want)
456 			{
457 			  free (name);
458 			  continue;
459 			}
460 		    }
461 
462 		  if (dwfl_report_offline (dwfl, name, f->fts_path, -1) == NULL)
463 		    {
464 		      free (name);
465 		      result = -1;
466 		      break;
467 		    }
468 		  free (name);
469 		}
470 	      continue;
471 
472 	    case FTS_ERR:
473 	    case FTS_DNR:
474 	    case FTS_NS:
475 	      result = f->fts_errno;
476 	      break;
477 
478 	    case FTS_SLNONE:
479 	    default:
480 	      continue;
481 	    }
482 
483 	  /* We only get here in error cases.  */
484 	  break;
485 	}
486       fts_close (fts);
487       free (modulesdir[0]);
488     }
489 
490   return result;
491 }
492 INTDEF (dwfl_linux_kernel_report_offline)
493 
494 
495 /* State of read_address used by intuit_kernel_bounds. */
496 struct read_address_state {
497   FILE *f;
498   char *line;
499   size_t linesz;
500   size_t n;
501   char *p;
502   const char *type;
503 };
504 
505 static inline bool
read_address(struct read_address_state * state,Dwarf_Addr * addr)506 read_address (struct read_address_state *state, Dwarf_Addr *addr)
507 {
508   if ((state->n = getline (&state->line, &state->linesz, state->f)) < 1 ||
509       state->line[state->n - 2] == ']')
510     return false;
511   *addr = strtoull (state->line, &state->p, 16);
512   state->p += strspn (state->p, " \t");
513   state->type = strsep (&state->p, " \t\n");
514   if (state->type == NULL)
515     return false;
516   return state->p != NULL && state->p != state->line;
517 }
518 
519 
520 /* Grovel around to guess the bounds of the runtime kernel image.  */
521 static int
intuit_kernel_bounds(Dwarf_Addr * start,Dwarf_Addr * end,Dwarf_Addr * notes)522 intuit_kernel_bounds (Dwarf_Addr *start, Dwarf_Addr *end, Dwarf_Addr *notes)
523 {
524   struct read_address_state state = { NULL, NULL, 0, 0, NULL, NULL };
525 
526   *notes = 0;
527 
528   state.f = fopen (KSYMSFILE, "r");
529   if (state.f == NULL)
530     return errno;
531 
532   (void) __fsetlocking (state.f, FSETLOCKING_BYCALLER);
533 
534   int result;
535   do
536     result = read_address (&state, start) ? 0 : -1;
537   while (result == 0 && strchr ("TtRr", *state.type) == NULL);
538 
539   if (result == 0)
540     {
541       *end = *start;
542       while (read_address (&state, end))
543 	if (*notes == 0 && !strcmp (state.p, "__start_notes\n"))
544 	  *notes = *end;
545 
546       Dwarf_Addr round_kernel = sysconf (_SC_PAGESIZE);
547       *start &= -(Dwarf_Addr) round_kernel;
548       *end += round_kernel - 1;
549       *end &= -(Dwarf_Addr) round_kernel;
550       if (*start >= *end || *end - *start < round_kernel)
551 	result = -1;
552     }
553   free (state.line);
554 
555   if (result == -1)
556     result = ferror_unlocked (state.f) ? errno : ENOEXEC;
557 
558   fclose (state.f);
559 
560   return result;
561 }
562 
563 
564 /* Look for a build ID note in NOTESFILE and associate the ID with MOD.  */
565 static int
check_notes(Dwfl_Module * mod,const char * notesfile,Dwarf_Addr vaddr,const char * secname)566 check_notes (Dwfl_Module *mod, const char *notesfile,
567 	     Dwarf_Addr vaddr, const char *secname)
568 {
569   int fd = open (notesfile, O_RDONLY);
570   if (fd < 0)
571     return 1;
572 
573   assert (sizeof (Elf32_Nhdr) == sizeof (GElf_Nhdr));
574   assert (sizeof (Elf64_Nhdr) == sizeof (GElf_Nhdr));
575   union
576   {
577     GElf_Nhdr nhdr;
578     unsigned char data[8192];
579   } buf;
580 
581   ssize_t n = read (fd, buf.data, sizeof buf);
582   close (fd);
583 
584   if (n <= 0)
585     return 1;
586 
587   unsigned char *p = buf.data;
588   size_t len = 0;
589   while (p < &buf.data[n])
590     {
591       /* No translation required since we are reading the native kernel.  */
592       GElf_Nhdr *nhdr = (void *) p;
593       len += sizeof *nhdr;
594       p += len;
595       unsigned char *name = p;
596       unsigned char *bits;
597       /* This is somewhat ugly, GNU Property notes use different padding,
598 	 but all we have is the file content, so we have to actually check
599 	 the name and type.  */
600       if (nhdr->n_type == NT_GNU_PROPERTY_TYPE_0
601           && nhdr->n_namesz == sizeof "GNU"
602           && name + nhdr->n_namesz < &buf.data[n]
603           && !memcmp (name, "GNU", sizeof "GNU"))
604 	{
605 	  len += nhdr->n_namesz;
606 	  len = NOTE_ALIGN8 (len);
607 	  p = buf.data + len;
608 	  bits = p;
609 	  len += nhdr->n_descsz;
610 	  len = NOTE_ALIGN8 (len);
611 	  p = buf.data + len;
612 	}
613       else
614 	{
615 	  len += nhdr->n_namesz;
616 	  len = NOTE_ALIGN4 (len);
617 	  p = buf.data + len;
618 	  bits = p;
619 	  len += nhdr->n_descsz;
620 	  len = NOTE_ALIGN4 (len);
621 	  p = buf.data + len;
622 	}
623 
624       if (p <= &buf.data[n]
625 	  && nhdr->n_type == NT_GNU_BUILD_ID
626 	  && nhdr->n_namesz == sizeof "GNU"
627 	  && !memcmp (name, "GNU", sizeof "GNU"))
628 	{
629 	  /* Found it.  For a module we must figure out its VADDR now.  */
630 
631 	  if (secname != NULL
632 	      && (INTUSE(dwfl_linux_kernel_module_section_address)
633 		  (mod, NULL, mod->name, 0, secname, 0, NULL, &vaddr) != 0
634 		  || vaddr == (GElf_Addr) -1l))
635 	    vaddr = 0;
636 
637 	  if (vaddr != 0)
638 	    vaddr += bits - buf.data;
639 	  return INTUSE(dwfl_module_report_build_id) (mod, bits,
640 						      nhdr->n_descsz, vaddr);
641 	}
642     }
643 
644   return 0;
645 }
646 
647 /* Look for a build ID for the kernel.  */
648 static int
check_kernel_notes(Dwfl_Module * kernelmod,GElf_Addr vaddr)649 check_kernel_notes (Dwfl_Module *kernelmod, GElf_Addr vaddr)
650 {
651   return check_notes (kernelmod, KNOTESFILE, vaddr, NULL) < 0 ? -1 : 0;
652 }
653 
654 /* Look for a build ID for a loaded kernel module.  */
655 static int
check_module_notes(Dwfl_Module * mod)656 check_module_notes (Dwfl_Module *mod)
657 {
658   char *dirs[2] = { NULL, NULL };
659   if (asprintf (&dirs[0], MODNOTESFMT, mod->name) < 0)
660     return ENOMEM;
661 
662   FTS *fts = fts_open (dirs, FTS_NOSTAT | FTS_LOGICAL, NULL);
663   if (fts == NULL)
664     {
665       free (dirs[0]);
666       return 0;
667     }
668 
669   int result = 0;
670   FTSENT *f;
671   while ((f = fts_read (fts)) != NULL)
672     {
673       switch (f->fts_info)
674 	{
675 	case FTS_F:
676 	case FTS_SL:
677 	case FTS_NSOK:
678 	  result = check_notes (mod, f->fts_accpath, 0, f->fts_name);
679 	  if (result > 0)	/* Nothing found.  */
680 	    {
681 	      result = 0;
682 	      continue;
683 	    }
684 	  break;
685 
686 	case FTS_ERR:
687 	case FTS_DNR:
688 	  result = f->fts_errno;
689 	  break;
690 
691 	case FTS_NS:
692 	case FTS_SLNONE:
693 	default:
694 	  continue;
695 	}
696 
697       /* We only get here when finished or in error cases.  */
698       break;
699     }
700   fts_close (fts);
701   free (dirs[0]);
702 
703   return result;
704 }
705 
706 int
dwfl_linux_kernel_report_kernel(Dwfl * dwfl)707 dwfl_linux_kernel_report_kernel (Dwfl *dwfl)
708 {
709   Dwarf_Addr start = 0;
710   Dwarf_Addr end = 0;
711 
712   #define report() \
713     (INTUSE(dwfl_report_module) (dwfl, KERNEL_MODNAME, start, end))
714 
715   /* This is a bit of a kludge.  If we already reported the kernel,
716      don't bother figuring it out again--it never changes.  */
717   for (Dwfl_Module *m = dwfl->modulelist; m != NULL; m = m->next)
718     if (!strcmp (m->name, KERNEL_MODNAME))
719       {
720 	start = m->low_addr;
721 	end = m->high_addr;
722 	return report () == NULL ? -1 : 0;
723       }
724 
725   /* Try to figure out the bounds of the kernel image without
726      looking for any vmlinux file.  */
727   Dwarf_Addr notes;
728   int result = intuit_kernel_bounds (&start, &end, &notes);
729   if (result == 0)
730     {
731       Dwfl_Module *mod = report ();
732       return unlikely (mod == NULL) ? -1 : check_kernel_notes (mod, notes);
733     }
734   if (result != ENOENT)
735     return result;
736 
737   /* Find the ELF file for the running kernel and dwfl_report_elf it.  */
738   return report_kernel (dwfl, NULL, NULL);
739 }
INTDEF(dwfl_linux_kernel_report_kernel)740 INTDEF (dwfl_linux_kernel_report_kernel)
741 
742 
743 static inline bool
744 subst_name (char from, char to,
745             const char * const module_name,
746             char * const alternate_name,
747             const size_t namelen)
748 {
749   const char *n = memchr (module_name, from, namelen);
750   if (n == NULL)
751     return false;
752   char *a = mempcpy (alternate_name, module_name, n - module_name);
753   *a++ = to;
754   ++n;
755   const char *p;
756   while ((p = memchr (n, from, namelen - (n - module_name))) != NULL)
757     {
758       a = mempcpy (a, n, p - n);
759       *a++ = to;
760       n = p + 1;
761     }
762   memcpy (a, n, namelen - (n - module_name) + 1);
763   return true;
764 }
765 
766 /* Dwfl_Callbacks.find_elf for the running Linux kernel and its modules.  */
767 
768 int
dwfl_linux_kernel_find_elf(Dwfl_Module * mod,void ** userdata,const char * module_name,Dwarf_Addr base,char ** file_name,Elf ** elfp)769 dwfl_linux_kernel_find_elf (Dwfl_Module *mod,
770 			    void **userdata __attribute__ ((unused)),
771 			    const char *module_name,
772 			    Dwarf_Addr base __attribute__ ((unused)),
773 			    char **file_name, Elf **elfp)
774 {
775   if (mod->build_id_len > 0)
776     {
777       int fd = INTUSE(dwfl_build_id_find_elf) (mod, NULL, NULL, 0,
778 					       file_name, elfp);
779       if (fd >= 0 || mod->main.elf != NULL || errno != 0)
780 	return fd;
781     }
782 
783   const char *release = kernel_release ();
784   if (release == NULL)
785     return errno;
786 
787   if (!strcmp (module_name, KERNEL_MODNAME))
788     return find_kernel_elf (mod->dwfl, release, file_name);
789 
790   /* Do "find /lib/modules/`uname -r` -name MODULE_NAME.ko".  */
791 
792   char *modulesdir[] = { NULL, NULL };
793   if (asprintf (&modulesdir[0], MODULEDIRFMT, release) < 0)
794     return -1;
795 
796   FTS *fts = fts_open (modulesdir, FTS_NOSTAT | FTS_LOGICAL, NULL);
797   if (fts == NULL)
798     {
799       free (modulesdir[0]);
800       return -1;
801     }
802 
803   size_t namelen = strlen (module_name);
804 
805   /* This is a kludge.  There is no actual necessary relationship between
806      the name of the .ko file installed and the module name the kernel
807      knows it by when it's loaded.  The kernel's only idea of the module
808      name comes from the name embedded in the object's magic
809      .gnu.linkonce.this_module section.
810 
811      In practice, these module names match the .ko file names except for
812      some using '_' and some using '-'.  So our cheap kludge is to look for
813      two files when either a '_' or '-' appears in a module name, one using
814      only '_' and one only using '-'.  */
815 
816   char *alternate_name = malloc (namelen + 1);
817   if (unlikely (alternate_name == NULL))
818     {
819       free (modulesdir[0]);
820       return ENOMEM;
821     }
822   if (!subst_name ('-', '_', module_name, alternate_name, namelen) &&
823       !subst_name ('_', '-', module_name, alternate_name, namelen))
824     alternate_name[0] = '\0';
825 
826   FTSENT *f;
827   int error = ENOENT;
828   while ((f = fts_read (fts)) != NULL)
829     {
830       /* Skip a "source" subtree, which tends to be large.
831 	 This insane hard-coding of names is what depmod does too.  */
832       if (f->fts_namelen == sizeof "source" - 1
833 	  && !strcmp (f->fts_name, "source"))
834 	{
835 	  fts_set (fts, f, FTS_SKIP);
836 	  continue;
837 	}
838 
839       error = ENOENT;
840       switch (f->fts_info)
841 	{
842 	case FTS_F:
843 	case FTS_SL:
844 	case FTS_NSOK:
845 	  /* See if this file name is "MODULE_NAME.ko".  */
846 	  if (check_suffix (f, namelen)
847 	      && (!memcmp (f->fts_name, module_name, namelen)
848 		  || !memcmp (f->fts_name, alternate_name, namelen)))
849 	    {
850 	      int fd = open (f->fts_accpath, O_RDONLY);
851 	      *file_name = strdup (f->fts_path);
852 	      fts_close (fts);
853 	      free (modulesdir[0]);
854 	      free (alternate_name);
855 	      if (fd < 0)
856 		free (*file_name);
857 	      else if (*file_name == NULL)
858 		{
859 		  close (fd);
860 		  fd = -1;
861 		}
862 	      return fd;
863 	    }
864 	  break;
865 
866 	case FTS_ERR:
867 	case FTS_DNR:
868 	case FTS_NS:
869 	  error = f->fts_errno;
870 	  break;
871 
872 	case FTS_SLNONE:
873 	default:
874 	  break;
875 	}
876     }
877 
878   fts_close (fts);
879   free (modulesdir[0]);
880   free (alternate_name);
881   errno = error;
882   return -1;
883 }
INTDEF(dwfl_linux_kernel_find_elf)884 INTDEF (dwfl_linux_kernel_find_elf)
885 
886 
887 /* Dwfl_Callbacks.section_address for kernel modules in the running Linux.
888    We read the information from /sys/module directly.  */
889 
890 int
891 dwfl_linux_kernel_module_section_address
892 (Dwfl_Module *mod __attribute__ ((unused)),
893  void **userdata __attribute__ ((unused)),
894  const char *modname, Dwarf_Addr base __attribute__ ((unused)),
895  const char *secname, Elf32_Word shndx __attribute__ ((unused)),
896  const GElf_Shdr *shdr __attribute__ ((unused)),
897  Dwarf_Addr *addr)
898 {
899   char *sysfile;
900   if (asprintf (&sysfile, SECADDRDIRFMT "%s", modname, secname) < 0)
901     return DWARF_CB_ABORT;
902 
903   FILE *f = fopen (sysfile, "r");
904   free (sysfile);
905 
906   if (f == NULL)
907     {
908       if (errno == ENOENT)
909 	{
910 	  /* The .modinfo and .data.percpu sections are never kept
911 	     loaded in the kernel.  If the kernel was compiled without
912 	     CONFIG_MODULE_UNLOAD, the .exit.* sections are not
913 	     actually loaded at all.
914 
915 	     Setting *ADDR to -1 tells the caller this section is
916 	     actually absent from memory.  */
917 
918 	  if (!strcmp (secname, ".modinfo")
919 	      || !strcmp (secname, ".data.percpu")
920 	      || !strncmp (secname, ".exit", 5))
921 	    {
922 	      *addr = (Dwarf_Addr) -1l;
923 	      return DWARF_CB_OK;
924 	    }
925 
926 	  /* The goofy PPC64 module_frob_arch_sections function tweaks
927 	     the section names as a way to control other kernel code's
928 	     behavior, and this cruft leaks out into the /sys information.
929 	     The file name for ".init*" may actually look like "_init*".  */
930 
931 	  const bool is_init = !strncmp (secname, ".init", 5);
932 	  if (is_init)
933 	    {
934 	      if (asprintf (&sysfile, SECADDRDIRFMT "_%s",
935 			    modname, &secname[1]) < 0)
936 		return ENOMEM;
937 	      f = fopen (sysfile, "r");
938 	      free (sysfile);
939 	      if (f != NULL)
940 		goto ok;
941 	    }
942 
943 	  /* The kernel truncates section names to MODULE_SECT_NAME_LEN - 1.
944 	     In case that size increases in the future, look for longer
945 	     truncated names first.  */
946 	  size_t namelen = strlen (secname);
947 	  if (namelen >= MODULE_SECT_NAME_LEN)
948 	    {
949 	      int len = asprintf (&sysfile, SECADDRDIRFMT "%s",
950 				  modname, secname);
951 	      if (len < 0)
952 		return DWARF_CB_ABORT;
953 	      char *end = sysfile + len;
954 	      do
955 		{
956 		  *--end = '\0';
957 		  f = fopen (sysfile, "r");
958 		  if (is_init && f == NULL && errno == ENOENT)
959 		    {
960 		      sysfile[len - namelen] = '_';
961 		      f = fopen (sysfile, "r");
962 		      sysfile[len - namelen] = '.';
963 		    }
964 		}
965 	      while (f == NULL && errno == ENOENT
966 		     && end - &sysfile[len - namelen] >= MODULE_SECT_NAME_LEN);
967 	      free (sysfile);
968 
969 	      if (f != NULL)
970 		goto ok;
971 	    }
972 	}
973 
974       return DWARF_CB_ABORT;
975     }
976 
977  ok:
978   (void) __fsetlocking (f, FSETLOCKING_BYCALLER);
979 
980   int result = (fscanf (f, "%" PRIx64 "\n", addr) == 1 ? 0
981 		: ferror_unlocked (f) ? errno : ENOEXEC);
982   fclose (f);
983 
984   if (result == 0)
985     return DWARF_CB_OK;
986 
987   errno = result;
988   return DWARF_CB_ABORT;
989 }
INTDEF(dwfl_linux_kernel_module_section_address)990 INTDEF (dwfl_linux_kernel_module_section_address)
991 
992 int
993 dwfl_linux_kernel_report_modules (Dwfl *dwfl)
994 {
995   FILE *f = fopen (MODULELIST, "r");
996   if (f == NULL)
997     return errno;
998 
999   (void) __fsetlocking (f, FSETLOCKING_BYCALLER);
1000 
1001   int result = 0;
1002   Dwarf_Addr modaddr;
1003   unsigned long int modsz;
1004   char modname[128];
1005   char *line = NULL;
1006   size_t linesz = 0;
1007   /* We can't just use fscanf here because it's not easy to distinguish \n
1008      from other whitespace so as to take the optional word following the
1009      address but always stop at the end of the line.  */
1010   while (getline (&line, &linesz, f) > 0
1011 	 && sscanf (line, "%128s %lu %*s %*s %*s %" PRIx64 " %*s\n",
1012 		    modname, &modsz, &modaddr) == 3)
1013     {
1014       Dwfl_Module *mod = INTUSE(dwfl_report_module) (dwfl, modname,
1015 						     modaddr, modaddr + modsz);
1016       if (mod == NULL)
1017 	{
1018 	  result = -1;
1019 	  break;
1020 	}
1021 
1022       result = check_module_notes (mod);
1023     }
1024   free (line);
1025 
1026   if (result == 0)
1027     result = ferror_unlocked (f) ? errno : feof_unlocked (f) ? 0 : ENOEXEC;
1028 
1029   fclose (f);
1030 
1031   return result;
1032 }
1033 INTDEF (dwfl_linux_kernel_report_modules)
1034