1 /* Symbol, variable and name lookup.
2    Copyright (C) 2019-2020 Free Software Foundation, Inc.
3 
4    This file is part of libctf.
5 
6    libctf is free software; you can redistribute it and/or modify it under
7    the terms of the GNU General Public License as published by the Free
8    Software Foundation; either version 3, or (at your option) any later
9    version.
10 
11    This program is distributed in the hope that it will be useful, but
12    WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14    See the GNU General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with this program; see the file COPYING.  If not see
18    <http://www.gnu.org/licenses/>.  */
19 
20 #include <ctf-impl.h>
21 #include <elf.h>
22 #include <string.h>
23 
24 /* Compare the given input string and length against a table of known C storage
25    qualifier keywords.  We just ignore these in ctf_lookup_by_name, below.  To
26    do this quickly, we use a pre-computed Perfect Hash Function similar to the
27    technique originally described in the classic paper:
28 
29    R.J. Cichelli, "Minimal Perfect Hash Functions Made Simple",
30    Communications of the ACM, Volume 23, Issue 1, January 1980, pp. 17-19.
31 
32    For an input string S of length N, we use hash H = S[N - 1] + N - 105, which
33    for the current set of qualifiers yields a unique H in the range [0 .. 20].
34    The hash can be modified when the keyword set changes as necessary.  We also
35    store the length of each keyword and check it prior to the final strcmp().
36 
37    TODO: just use gperf.  */
38 
39 static int
40 isqualifier (const char *s, size_t len)
41 {
42   static const struct qual
43   {
44     const char *q_name;
45     size_t q_len;
46   } qhash[] = {
47     {"static", 6}, {"", 0}, {"", 0}, {"", 0},
48     {"volatile", 8}, {"", 0}, {"", 0}, {"", 0}, {"", 0},
49     {"", 0}, {"auto", 4}, {"extern", 6}, {"", 0}, {"", 0},
50     {"", 0}, {"", 0}, {"const", 5}, {"register", 8},
51     {"", 0}, {"restrict", 8}, {"_Restrict", 9}
52   };
53 
54   int h = s[len - 1] + (int) len - 105;
55   const struct qual *qp = &qhash[h];
56 
57   return (h >= 0 && (size_t) h < sizeof (qhash) / sizeof (qhash[0])
58 	  && (size_t) len == qp->q_len &&
59 	  strncmp (qp->q_name, s, qp->q_len) == 0);
60 }
61 
62 /* Attempt to convert the given C type name into the corresponding CTF type ID.
63    It is not possible to do complete and proper conversion of type names
64    without implementing a more full-fledged parser, which is necessary to
65    handle things like types that are function pointers to functions that
66    have arguments that are function pointers, and fun stuff like that.
67    Instead, this function implements a very simple conversion algorithm that
68    finds the things that we actually care about: structs, unions, enums,
69    integers, floats, typedefs, and pointers to any of these named types.  */
70 
71 ctf_id_t
72 ctf_lookup_by_name (ctf_file_t *fp, const char *name)
73 {
74   static const char delimiters[] = " \t\n\r\v\f*";
75 
76   const ctf_lookup_t *lp;
77   const char *p, *q, *end;
78   ctf_id_t type = 0;
79   ctf_id_t ntype, ptype;
80 
81   if (name == NULL)
82     return (ctf_set_errno (fp, EINVAL));
83 
84   for (p = name, end = name + strlen (name); *p != '\0'; p = q)
85     {
86       while (isspace (*p))
87 	p++;			/* Skip leading whitespace.  */
88 
89       if (p == end)
90 	break;
91 
92       if ((q = strpbrk (p + 1, delimiters)) == NULL)
93 	q = end;		/* Compare until end.  */
94 
95       if (*p == '*')
96 	{
97 	  /* Find a pointer to type by looking in fp->ctf_ptrtab.
98 	     If we can't find a pointer to the given type, see if
99 	     we can compute a pointer to the type resulting from
100 	     resolving the type down to its base type and use
101 	     that instead.  This helps with cases where the CTF
102 	     data includes "struct foo *" but not "foo_t *" and
103 	     the user tries to access "foo_t *" in the debugger.
104 
105 	     TODO need to handle parent containers too.  */
106 
107 	  ntype = fp->ctf_ptrtab[LCTF_TYPE_TO_INDEX (fp, type)];
108 	  if (ntype == 0)
109 	    {
110 	      ntype = ctf_type_resolve_unsliced (fp, type);
111 	      if (ntype == CTF_ERR
112 		  || (ntype =
113 		      fp->ctf_ptrtab[LCTF_TYPE_TO_INDEX (fp, ntype)]) == 0)
114 		{
115 		  (void) ctf_set_errno (fp, ECTF_NOTYPE);
116 		  goto err;
117 		}
118 	    }
119 
120 	  type = LCTF_INDEX_TO_TYPE (fp, ntype, (fp->ctf_flags & LCTF_CHILD));
121 
122 	  q = p + 1;
123 	  continue;
124 	}
125 
126       if (isqualifier (p, (size_t) (q - p)))
127 	continue;		/* Skip qualifier keyword.  */
128 
129       for (lp = fp->ctf_lookups; lp->ctl_prefix != NULL; lp++)
130 	{
131 	  /* TODO: This is not MT-safe.  */
132 	  if ((lp->ctl_prefix[0] == '\0' ||
133 	       strncmp (p, lp->ctl_prefix, (size_t) (q - p)) == 0) &&
134 	      (size_t) (q - p) >= lp->ctl_len)
135 	    {
136 	      for (p += lp->ctl_len; isspace (*p); p++)
137 		continue;	/* Skip prefix and next whitespace.  */
138 
139 	      if ((q = strchr (p, '*')) == NULL)
140 		q = end;	/* Compare until end.  */
141 
142 	      while (isspace (q[-1]))
143 		q--;		/* Exclude trailing whitespace.  */
144 
145 	      /* Expand and/or allocate storage for a slice of the name, then
146 		 copy it in.  */
147 
148 	      if (fp->ctf_tmp_typeslicelen >= (size_t) (q - p) + 1)
149 		{
150 		  memcpy (fp->ctf_tmp_typeslice, p, (size_t) (q - p));
151 		  fp->ctf_tmp_typeslice[(size_t) (q - p)] = '\0';
152 		}
153 	      else
154 		{
155 		  free (fp->ctf_tmp_typeslice);
156 		  fp->ctf_tmp_typeslice = xstrndup (p, (size_t) (q - p));
157 		  if (fp->ctf_tmp_typeslice == NULL)
158 		    {
159 		      (void) ctf_set_errno (fp, ENOMEM);
160 		      return CTF_ERR;
161 		    }
162 		}
163 
164 	      if ((type = ctf_lookup_by_rawhash (fp, lp->ctl_hash,
165 						 fp->ctf_tmp_typeslice)) == 0)
166 		{
167 		  (void) ctf_set_errno (fp, ECTF_NOTYPE);
168 		  goto err;
169 		}
170 
171 	      break;
172 	    }
173 	}
174 
175       if (lp->ctl_prefix == NULL)
176 	{
177 	  (void) ctf_set_errno (fp, ECTF_NOTYPE);
178 	  goto err;
179 	}
180     }
181 
182   if (*p != '\0' || type == 0)
183     return (ctf_set_errno (fp, ECTF_SYNTAX));
184 
185   return type;
186 
187 err:
188   if (fp->ctf_parent != NULL
189       && (ptype = ctf_lookup_by_name (fp->ctf_parent, name)) != CTF_ERR)
190     return ptype;
191 
192   return CTF_ERR;
193 }
194 
195 typedef struct ctf_lookup_var_key
196 {
197   ctf_file_t *clvk_fp;
198   const char *clvk_name;
199 } ctf_lookup_var_key_t;
200 
201 /* A bsearch function for variable names.  */
202 
203 static int
204 ctf_lookup_var (const void *key_, const void *memb_)
205 {
206   const ctf_lookup_var_key_t *key = key_;
207   const ctf_varent_t *memb = memb_;
208 
209   return (strcmp (key->clvk_name, ctf_strptr (key->clvk_fp, memb->ctv_name)));
210 }
211 
212 /* Given a variable name, return the type of the variable with that name.  */
213 
214 ctf_id_t
215 ctf_lookup_variable (ctf_file_t *fp, const char *name)
216 {
217   ctf_varent_t *ent;
218   ctf_lookup_var_key_t key = { fp, name };
219 
220   /* This array is sorted, so we can bsearch for it.  */
221 
222   ent = bsearch (&key, fp->ctf_vars, fp->ctf_nvars, sizeof (ctf_varent_t),
223 		 ctf_lookup_var);
224 
225   if (ent == NULL)
226     {
227       if (fp->ctf_parent != NULL)
228 	return ctf_lookup_variable (fp->ctf_parent, name);
229 
230       return (ctf_set_errno (fp, ECTF_NOTYPEDAT));
231     }
232 
233   return ent->ctv_type;
234 }
235 
236 /* Given a symbol table index, return the name of that symbol from the secondary
237    string table, or the null string (never NULL).  */
238 const char *
239 ctf_lookup_symbol_name (ctf_file_t *fp, unsigned long symidx)
240 {
241   const ctf_sect_t *sp = &fp->ctf_symtab;
242   Elf64_Sym sym, *gsp;
243 
244   if (sp->cts_data == NULL)
245     {
246       ctf_set_errno (fp, ECTF_NOSYMTAB);
247       return _CTF_NULLSTR;
248     }
249 
250   if (symidx >= fp->ctf_nsyms)
251     {
252       ctf_set_errno (fp, EINVAL);
253       return _CTF_NULLSTR;
254     }
255 
256   if (sp->cts_entsize == sizeof (Elf32_Sym))
257     {
258       const Elf32_Sym *symp = (Elf32_Sym *) sp->cts_data + symidx;
259       gsp = ctf_sym_to_elf64 (symp, &sym);
260     }
261   else
262       gsp = (Elf64_Sym *) sp->cts_data + symidx;
263 
264   if (gsp->st_name < fp->ctf_str[CTF_STRTAB_1].cts_len)
265     return (const char *) fp->ctf_str[CTF_STRTAB_1].cts_strs + gsp->st_name;
266 
267   return _CTF_NULLSTR;
268 }
269 
270 /* Given a symbol table index, return the type of the data object described
271    by the corresponding entry in the symbol table.  */
272 
273 ctf_id_t
274 ctf_lookup_by_symbol (ctf_file_t *fp, unsigned long symidx)
275 {
276   const ctf_sect_t *sp = &fp->ctf_symtab;
277   ctf_id_t type;
278 
279   if (sp->cts_data == NULL)
280     return (ctf_set_errno (fp, ECTF_NOSYMTAB));
281 
282   if (symidx >= fp->ctf_nsyms)
283     return (ctf_set_errno (fp, EINVAL));
284 
285   if (sp->cts_entsize == sizeof (Elf32_Sym))
286     {
287       const Elf32_Sym *symp = (Elf32_Sym *) sp->cts_data + symidx;
288       if (ELF32_ST_TYPE (symp->st_info) != STT_OBJECT)
289 	return (ctf_set_errno (fp, ECTF_NOTDATA));
290     }
291   else
292     {
293       const Elf64_Sym *symp = (Elf64_Sym *) sp->cts_data + symidx;
294       if (ELF64_ST_TYPE (symp->st_info) != STT_OBJECT)
295 	return (ctf_set_errno (fp, ECTF_NOTDATA));
296     }
297 
298   if (fp->ctf_sxlate[symidx] == -1u)
299     return (ctf_set_errno (fp, ECTF_NOTYPEDAT));
300 
301   type = *(uint32_t *) ((uintptr_t) fp->ctf_buf + fp->ctf_sxlate[symidx]);
302   if (type == 0)
303     return (ctf_set_errno (fp, ECTF_NOTYPEDAT));
304 
305   return type;
306 }
307 
308 /* Return the pointer to the internal CTF type data corresponding to the
309    given type ID.  If the ID is invalid, the function returns NULL.
310    This function is not exported outside of the library.  */
311 
312 const ctf_type_t *
313 ctf_lookup_by_id (ctf_file_t **fpp, ctf_id_t type)
314 {
315   ctf_file_t *fp = *fpp;	/* Caller passes in starting CTF container.  */
316   ctf_id_t idx;
317 
318   if ((fp->ctf_flags & LCTF_CHILD) && LCTF_TYPE_ISPARENT (fp, type)
319       && (fp = fp->ctf_parent) == NULL)
320     {
321       (void) ctf_set_errno (*fpp, ECTF_NOPARENT);
322       return NULL;
323     }
324 
325   /* If this container is writable, check for a dynamic type.  */
326 
327   if (fp->ctf_flags & LCTF_RDWR)
328     {
329       ctf_dtdef_t *dtd;
330 
331       if ((dtd = ctf_dynamic_type (fp, type)) != NULL)
332 	{
333 	  *fpp = fp;
334 	  return &dtd->dtd_data;
335 	}
336       (void) ctf_set_errno (*fpp, ECTF_BADID);
337       return NULL;
338     }
339 
340   /* Check for a type in the static portion.  */
341 
342   idx = LCTF_TYPE_TO_INDEX (fp, type);
343   if (idx > 0 && (unsigned long) idx <= fp->ctf_typemax)
344     {
345       *fpp = fp;		/* Function returns ending CTF container.  */
346       return (LCTF_INDEX_TO_TYPEPTR (fp, idx));
347     }
348 
349   (void) ctf_set_errno (*fpp, ECTF_BADID);
350   return NULL;
351 }
352 
353 /* Given a symbol table index, return the info for the function described
354    by the corresponding entry in the symbol table.  */
355 
356 int
357 ctf_func_info (ctf_file_t *fp, unsigned long symidx, ctf_funcinfo_t *fip)
358 {
359   const ctf_sect_t *sp = &fp->ctf_symtab;
360   const uint32_t *dp;
361   uint32_t info, kind, n;
362 
363   if (sp->cts_data == NULL)
364     return (ctf_set_errno (fp, ECTF_NOSYMTAB));
365 
366   if (symidx >= fp->ctf_nsyms)
367     return (ctf_set_errno (fp, EINVAL));
368 
369   if (sp->cts_entsize == sizeof (Elf32_Sym))
370     {
371       const Elf32_Sym *symp = (Elf32_Sym *) sp->cts_data + symidx;
372       if (ELF32_ST_TYPE (symp->st_info) != STT_FUNC)
373 	return (ctf_set_errno (fp, ECTF_NOTFUNC));
374     }
375   else
376     {
377       const Elf64_Sym *symp = (Elf64_Sym *) sp->cts_data + symidx;
378       if (ELF64_ST_TYPE (symp->st_info) != STT_FUNC)
379 	return (ctf_set_errno (fp, ECTF_NOTFUNC));
380     }
381 
382   if (fp->ctf_sxlate[symidx] == -1u)
383     return (ctf_set_errno (fp, ECTF_NOFUNCDAT));
384 
385   dp = (uint32_t *) ((uintptr_t) fp->ctf_buf + fp->ctf_sxlate[symidx]);
386 
387   info = *dp++;
388   kind = LCTF_INFO_KIND (fp, info);
389   n = LCTF_INFO_VLEN (fp, info);
390 
391   if (kind == CTF_K_UNKNOWN && n == 0)
392     return (ctf_set_errno (fp, ECTF_NOFUNCDAT));
393 
394   if (kind != CTF_K_FUNCTION)
395     return (ctf_set_errno (fp, ECTF_CORRUPT));
396 
397   fip->ctc_return = *dp++;
398   fip->ctc_argc = n;
399   fip->ctc_flags = 0;
400 
401   if (n != 0 && dp[n - 1] == 0)
402     {
403       fip->ctc_flags |= CTF_FUNC_VARARG;
404       fip->ctc_argc--;
405     }
406 
407   return 0;
408 }
409 
410 /* Given a symbol table index, return the arguments for the function described
411    by the corresponding entry in the symbol table.  */
412 
413 int
414 ctf_func_args (ctf_file_t * fp, unsigned long symidx, uint32_t argc,
415 	       ctf_id_t * argv)
416 {
417   const uint32_t *dp;
418   ctf_funcinfo_t f;
419 
420   if (ctf_func_info (fp, symidx, &f) < 0)
421     return -1;			/* errno is set for us.  */
422 
423   /* The argument data is two uint32_t's past the translation table
424      offset: one for the function info, and one for the return type. */
425 
426   dp = (uint32_t *) ((uintptr_t) fp->ctf_buf + fp->ctf_sxlate[symidx]) + 2;
427 
428   for (argc = MIN (argc, f.ctc_argc); argc != 0; argc--)
429     *argv++ = *dp++;
430 
431   return 0;
432 }
433