1 /* CTF string table management.
2    Copyright (C) 2019-2021 Free Software Foundation, Inc.
3 
4    This file is part of libctf.
5 
6    libctf is free software; you can redistribute it and/or modify it under
7    the terms of the GNU General Public License as published by the Free
8    Software Foundation; either version 3, or (at your option) any later
9    version.
10 
11    This program is distributed in the hope that it will be useful, but
12    WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14    See the GNU General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with this program; see the file COPYING.  If not see
18    <http://www.gnu.org/licenses/>.  */
19 
20 #include <ctf-impl.h>
21 #include <string.h>
22 #include <assert.h>
23 
24 /* Convert an encoded CTF string name into a pointer to a C string, using an
25   explicit internal strtab rather than the fp-based one.  */
26 const char *
ctf_strraw_explicit(ctf_dict_t * fp,uint32_t name,ctf_strs_t * strtab)27 ctf_strraw_explicit (ctf_dict_t *fp, uint32_t name, ctf_strs_t *strtab)
28 {
29   ctf_strs_t *ctsp = &fp->ctf_str[CTF_NAME_STID (name)];
30 
31   if ((CTF_NAME_STID (name) == CTF_STRTAB_0) && (strtab != NULL))
32     ctsp = strtab;
33 
34   /* If this name is in the external strtab, and there is a synthetic strtab,
35      use it in preference.  */
36 
37   if (CTF_NAME_STID (name) == CTF_STRTAB_1
38       && fp->ctf_syn_ext_strtab != NULL)
39     return ctf_dynhash_lookup (fp->ctf_syn_ext_strtab,
40 			       (void *) (uintptr_t) name);
41 
42   /* If the name is in the internal strtab, and the offset is beyond the end of
43      the ctsp->cts_len but below the ctf_str_prov_offset, this is a provisional
44      string added by ctf_str_add*() but not yet built into a real strtab: get
45      the value out of the ctf_prov_strtab.  */
46 
47   if (CTF_NAME_STID (name) == CTF_STRTAB_0
48       && name >= ctsp->cts_len && name < fp->ctf_str_prov_offset)
49       return ctf_dynhash_lookup (fp->ctf_prov_strtab,
50 				 (void *) (uintptr_t) name);
51 
52   if (ctsp->cts_strs != NULL && CTF_NAME_OFFSET (name) < ctsp->cts_len)
53     return (ctsp->cts_strs + CTF_NAME_OFFSET (name));
54 
55   /* String table not loaded or corrupt offset.  */
56   return NULL;
57 }
58 
59 /* Convert an encoded CTF string name into a pointer to a C string by looking
60   up the appropriate string table buffer and then adding the offset.  */
61 const char *
ctf_strraw(ctf_dict_t * fp,uint32_t name)62 ctf_strraw (ctf_dict_t *fp, uint32_t name)
63 {
64   return ctf_strraw_explicit (fp, name, NULL);
65 }
66 
67 /* Return a guaranteed-non-NULL pointer to the string with the given CTF
68    name.  */
69 const char *
ctf_strptr(ctf_dict_t * fp,uint32_t name)70 ctf_strptr (ctf_dict_t *fp, uint32_t name)
71 {
72   const char *s = ctf_strraw (fp, name);
73   return (s != NULL ? s : "(?)");
74 }
75 
76 /* Remove all refs to a given atom.  */
77 static void
ctf_str_purge_atom_refs(ctf_str_atom_t * atom)78 ctf_str_purge_atom_refs (ctf_str_atom_t *atom)
79 {
80   ctf_str_atom_ref_t *ref, *next;
81 
82   for (ref = ctf_list_next (&atom->csa_refs); ref != NULL; ref = next)
83     {
84       next = ctf_list_next (ref);
85       ctf_list_delete (&atom->csa_refs, ref);
86       free (ref);
87     }
88 }
89 
90 /* Free an atom (only called on ctf_close().)  */
91 static void
ctf_str_free_atom(void * a)92 ctf_str_free_atom (void *a)
93 {
94   ctf_str_atom_t *atom = a;
95 
96   ctf_str_purge_atom_refs (atom);
97   free (atom);
98 }
99 
100 /* Create the atoms table.  There is always at least one atom in it, the null
101    string.  */
102 int
ctf_str_create_atoms(ctf_dict_t * fp)103 ctf_str_create_atoms (ctf_dict_t *fp)
104 {
105   fp->ctf_str_atoms = ctf_dynhash_create (ctf_hash_string, ctf_hash_eq_string,
106 					  free, ctf_str_free_atom);
107   if (!fp->ctf_str_atoms)
108     return -ENOMEM;
109 
110   if (!fp->ctf_prov_strtab)
111     fp->ctf_prov_strtab = ctf_dynhash_create (ctf_hash_integer,
112 					      ctf_hash_eq_integer,
113 					      NULL, NULL);
114   if (!fp->ctf_prov_strtab)
115     goto oom_prov_strtab;
116 
117   if (!fp->ctf_str_pending_ref)
118     fp->ctf_str_pending_ref = ctf_dynset_create (htab_hash_pointer,
119 						 htab_eq_pointer,
120 						 NULL);
121   if (!fp->ctf_str_pending_ref)
122     goto oom_str_pending_ref;
123 
124   errno = 0;
125   ctf_str_add (fp, "");
126   if (errno == ENOMEM)
127     goto oom_str_add;
128 
129   return 0;
130 
131  oom_str_add:
132   ctf_dynhash_destroy (fp->ctf_prov_strtab);
133   fp->ctf_prov_strtab = NULL;
134  oom_str_pending_ref:
135   ctf_dynset_destroy (fp->ctf_str_pending_ref);
136   fp->ctf_str_pending_ref = NULL;
137  oom_prov_strtab:
138   ctf_dynhash_destroy (fp->ctf_str_atoms);
139   fp->ctf_str_atoms = NULL;
140   return -ENOMEM;
141 }
142 
143 /* Destroy the atoms table.  */
144 void
ctf_str_free_atoms(ctf_dict_t * fp)145 ctf_str_free_atoms (ctf_dict_t *fp)
146 {
147   ctf_dynhash_destroy (fp->ctf_prov_strtab);
148   ctf_dynhash_destroy (fp->ctf_str_atoms);
149   ctf_dynset_destroy (fp->ctf_str_pending_ref);
150 }
151 
152 #define CTF_STR_ADD_REF 0x1
153 #define CTF_STR_MAKE_PROVISIONAL 0x2
154 #define CTF_STR_PENDING_REF 0x4
155 
156 /* Add a string to the atoms table, copying the passed-in string.  Return the
157    atom added. Return NULL only when out of memory (and do not touch the
158    passed-in string in that case).  Possibly augment the ref list with the
159    passed-in ref.  Possibly add a provisional entry for this string to the
160    provisional strtab.   */
161 static ctf_str_atom_t *
ctf_str_add_ref_internal(ctf_dict_t * fp,const char * str,int flags,uint32_t * ref)162 ctf_str_add_ref_internal (ctf_dict_t *fp, const char *str,
163 			  int flags, uint32_t *ref)
164 {
165   char *newstr = NULL;
166   ctf_str_atom_t *atom = NULL;
167   ctf_str_atom_ref_t *aref = NULL;
168 
169   atom = ctf_dynhash_lookup (fp->ctf_str_atoms, str);
170 
171   if (flags & CTF_STR_ADD_REF)
172     {
173       if ((aref = malloc (sizeof (struct ctf_str_atom_ref))) == NULL)
174 	return NULL;
175       aref->caf_ref = ref;
176     }
177 
178   if (atom)
179     {
180       if (flags & CTF_STR_ADD_REF)
181 	{
182 	  ctf_dynset_remove (fp->ctf_str_pending_ref, (void *) ref);
183 	  ctf_list_append (&atom->csa_refs, aref);
184 	  fp->ctf_str_num_refs++;
185 	}
186       return atom;
187     }
188 
189   if ((atom = malloc (sizeof (struct ctf_str_atom))) == NULL)
190     goto oom;
191   memset (atom, 0, sizeof (struct ctf_str_atom));
192 
193   if ((newstr = strdup (str)) == NULL)
194     goto oom;
195 
196   if (ctf_dynhash_insert (fp->ctf_str_atoms, newstr, atom) < 0)
197     goto oom;
198 
199   atom->csa_str = newstr;
200   atom->csa_snapshot_id = fp->ctf_snapshots;
201 
202   if (flags & CTF_STR_MAKE_PROVISIONAL)
203     {
204       atom->csa_offset = fp->ctf_str_prov_offset;
205 
206       if (ctf_dynhash_insert (fp->ctf_prov_strtab, (void *) (uintptr_t)
207 			      atom->csa_offset, (void *) atom->csa_str) < 0)
208 	goto oom;
209 
210       fp->ctf_str_prov_offset += strlen (atom->csa_str) + 1;
211     }
212 
213   if (flags & CTF_STR_PENDING_REF)
214     {
215       if (ctf_dynset_insert (fp->ctf_str_pending_ref, (void *) ref) < 0)
216 	goto oom;
217     }
218   else if (flags & CTF_STR_ADD_REF)
219     {
220       ctf_dynset_remove (fp->ctf_str_pending_ref, (void *) ref);
221       ctf_list_append (&atom->csa_refs, aref);
222       fp->ctf_str_num_refs++;
223     }
224   return atom;
225 
226  oom:
227   if (newstr)
228     ctf_dynhash_remove (fp->ctf_str_atoms, newstr);
229   free (atom);
230   free (aref);
231   free (newstr);
232   ctf_set_errno (fp, ENOMEM);
233   return NULL;
234 }
235 
236 /* Add a string to the atoms table, without augmenting the ref list for this
237    string: return a 'provisional offset' which can be used to return this string
238    until ctf_str_write_strtab is called, or 0 on failure.  (Everywhere the
239    provisional offset is assigned to should be added as a ref using
240    ctf_str_add_ref() as well.) */
241 uint32_t
ctf_str_add(ctf_dict_t * fp,const char * str)242 ctf_str_add (ctf_dict_t *fp, const char *str)
243 {
244   ctf_str_atom_t *atom;
245 
246   if (!str)
247     str = "";
248 
249   atom = ctf_str_add_ref_internal (fp, str, CTF_STR_MAKE_PROVISIONAL, 0);
250   if (!atom)
251     return 0;
252 
253   return atom->csa_offset;
254 }
255 
256 /* Like ctf_str_add(), but additionally augment the atom's refs list with the
257    passed-in ref, whether or not the string is already present.  There is no
258    attempt to deduplicate the refs list (but duplicates are harmless).  */
259 uint32_t
ctf_str_add_ref(ctf_dict_t * fp,const char * str,uint32_t * ref)260 ctf_str_add_ref (ctf_dict_t *fp, const char *str, uint32_t *ref)
261 {
262   ctf_str_atom_t *atom;
263 
264   if (!str)
265     str = "";
266 
267   atom = ctf_str_add_ref_internal (fp, str, CTF_STR_ADD_REF
268 				   | CTF_STR_MAKE_PROVISIONAL, ref);
269   if (!atom)
270     return 0;
271 
272   return atom->csa_offset;
273 }
274 
275 /* Like ctf_str_add_ref(), but notes that this memory location must be added as
276    a ref by a later serialization phase, rather than adding it itself.  */
277 uint32_t
ctf_str_add_pending(ctf_dict_t * fp,const char * str,uint32_t * ref)278 ctf_str_add_pending (ctf_dict_t *fp, const char *str, uint32_t *ref)
279 {
280   ctf_str_atom_t *atom;
281 
282   if (!str)
283     str = "";
284 
285   atom = ctf_str_add_ref_internal (fp, str, CTF_STR_PENDING_REF
286 				   | CTF_STR_MAKE_PROVISIONAL, ref);
287   if (!atom)
288     return 0;
289 
290   return atom->csa_offset;
291 }
292 
293 /* Note that a pending ref now located at NEW_REF has moved by BYTES bytes.  */
294 int
ctf_str_move_pending(ctf_dict_t * fp,uint32_t * new_ref,ptrdiff_t bytes)295 ctf_str_move_pending (ctf_dict_t *fp, uint32_t *new_ref, ptrdiff_t bytes)
296 {
297   if (bytes == 0)
298     return 0;
299 
300   if (ctf_dynset_insert (fp->ctf_str_pending_ref, (void *) new_ref) < 0)
301     return (ctf_set_errno (fp, ENOMEM));
302 
303   ctf_dynset_remove (fp->ctf_str_pending_ref,
304 		     (void *) ((signed char *) new_ref - bytes));
305   return 0;
306 }
307 
308 /* Add an external strtab reference at OFFSET.  Returns zero if the addition
309    failed, nonzero otherwise.  */
310 int
ctf_str_add_external(ctf_dict_t * fp,const char * str,uint32_t offset)311 ctf_str_add_external (ctf_dict_t *fp, const char *str, uint32_t offset)
312 {
313   ctf_str_atom_t *atom;
314 
315   if (!str)
316     str = "";
317 
318   atom = ctf_str_add_ref_internal (fp, str, 0, 0);
319   if (!atom)
320     return 0;
321 
322   atom->csa_external_offset = CTF_SET_STID (offset, CTF_STRTAB_1);
323 
324   if (!fp->ctf_syn_ext_strtab)
325     fp->ctf_syn_ext_strtab = ctf_dynhash_create (ctf_hash_integer,
326 						 ctf_hash_eq_integer,
327 						 NULL, NULL);
328   if (!fp->ctf_syn_ext_strtab)
329     {
330       ctf_set_errno (fp, ENOMEM);
331       return 0;
332     }
333 
334   if (ctf_dynhash_insert (fp->ctf_syn_ext_strtab,
335 			  (void *) (uintptr_t)
336 			  atom->csa_external_offset,
337 			  (void *) atom->csa_str) < 0)
338     {
339       /* No need to bother freeing the syn_ext_strtab: it will get freed at
340 	 ctf_str_write_strtab time if unreferenced.  */
341       ctf_set_errno (fp, ENOMEM);
342       return 0;
343     }
344 
345   return 1;
346 }
347 
348 /* Remove a single ref.  */
349 void
ctf_str_remove_ref(ctf_dict_t * fp,const char * str,uint32_t * ref)350 ctf_str_remove_ref (ctf_dict_t *fp, const char *str, uint32_t *ref)
351 {
352   ctf_str_atom_ref_t *aref, *anext;
353   ctf_str_atom_t *atom = NULL;
354 
355   atom = ctf_dynhash_lookup (fp->ctf_str_atoms, str);
356   if (!atom)
357     return;
358 
359   for (aref = ctf_list_next (&atom->csa_refs); aref != NULL; aref = anext)
360     {
361       anext = ctf_list_next (aref);
362       if (aref->caf_ref == ref)
363 	{
364 	  ctf_list_delete (&atom->csa_refs, aref);
365 	  free (aref);
366 	}
367     }
368 
369   ctf_dynset_remove (fp->ctf_str_pending_ref, (void *) ref);
370 }
371 
372 /* A ctf_dynhash_iter_remove() callback that removes atoms later than a given
373    snapshot ID.  External atoms are never removed, because they came from the
374    linker string table and are still present even if you roll back type
375    additions.  */
376 static int
ctf_str_rollback_atom(void * key _libctf_unused_,void * value,void * arg)377 ctf_str_rollback_atom (void *key _libctf_unused_, void *value, void *arg)
378 {
379   ctf_str_atom_t *atom = (ctf_str_atom_t *) value;
380   ctf_snapshot_id_t *id = (ctf_snapshot_id_t *) arg;
381 
382   return (atom->csa_snapshot_id > id->snapshot_id)
383     && (atom->csa_external_offset == 0);
384 }
385 
386 /* Roll back, deleting all (internal) atoms created after a particular ID.  */
387 void
ctf_str_rollback(ctf_dict_t * fp,ctf_snapshot_id_t id)388 ctf_str_rollback (ctf_dict_t *fp, ctf_snapshot_id_t id)
389 {
390   ctf_dynhash_iter_remove (fp->ctf_str_atoms, ctf_str_rollback_atom, &id);
391 }
392 
393 /* An adaptor around ctf_purge_atom_refs.  */
394 static void
ctf_str_purge_one_atom_refs(void * key _libctf_unused_,void * value,void * arg _libctf_unused_)395 ctf_str_purge_one_atom_refs (void *key _libctf_unused_, void *value,
396 			     void *arg _libctf_unused_)
397 {
398   ctf_str_atom_t *atom = (ctf_str_atom_t *) value;
399   ctf_str_purge_atom_refs (atom);
400 }
401 
402 /* Remove all the recorded refs from the atoms table.  */
403 void
ctf_str_purge_refs(ctf_dict_t * fp)404 ctf_str_purge_refs (ctf_dict_t *fp)
405 {
406   if (fp->ctf_str_num_refs > 0)
407     ctf_dynhash_iter (fp->ctf_str_atoms, ctf_str_purge_one_atom_refs, NULL);
408   fp->ctf_str_num_refs = 0;
409 }
410 
411 /* Update a list of refs to the specified value. */
412 static void
ctf_str_update_refs(ctf_str_atom_t * refs,uint32_t value)413 ctf_str_update_refs (ctf_str_atom_t *refs, uint32_t value)
414 {
415   ctf_str_atom_ref_t *ref;
416 
417   for (ref = ctf_list_next (&refs->csa_refs); ref != NULL;
418        ref = ctf_list_next (ref))
419       *(ref->caf_ref) = value;
420 }
421 
422 /* State shared across the strtab write process.  */
423 typedef struct ctf_strtab_write_state
424 {
425   /* Strtab we are writing, and the number of strings in it.  */
426   ctf_strs_writable_t *strtab;
427   size_t strtab_count;
428 
429   /* Pointers to (existing) atoms in the atoms table, for qsorting.  */
430   ctf_str_atom_t **sorttab;
431 
432   /* Loop counter for sorttab population.  */
433   size_t i;
434 
435   /* The null-string atom (skipped during population).  */
436   ctf_str_atom_t *nullstr;
437 } ctf_strtab_write_state_t;
438 
439 /* Count the number of entries in the strtab, and its length.  */
440 static void
ctf_str_count_strtab(void * key _libctf_unused_,void * value,void * arg)441 ctf_str_count_strtab (void *key _libctf_unused_, void *value,
442 	      void *arg)
443 {
444   ctf_str_atom_t *atom = (ctf_str_atom_t *) value;
445   ctf_strtab_write_state_t *s = (ctf_strtab_write_state_t *) arg;
446 
447   /* We only factor in the length of items that have no offset and have refs:
448      other items are in the external strtab, or will simply not be written out
449      at all.  They still contribute to the total count, though, because we still
450      have to sort them.  We add in the null string's length explicitly, outside
451      this function, since it is explicitly written out even if it has no refs at
452      all.  */
453 
454   if (s->nullstr == atom)
455     {
456       s->strtab_count++;
457       return;
458     }
459 
460   if (!ctf_list_empty_p (&atom->csa_refs))
461     {
462       if (!atom->csa_external_offset)
463 	s->strtab->cts_len += strlen (atom->csa_str) + 1;
464       s->strtab_count++;
465     }
466 }
467 
468 /* Populate the sorttab with pointers to the strtab atoms.  */
469 static void
ctf_str_populate_sorttab(void * key _libctf_unused_,void * value,void * arg)470 ctf_str_populate_sorttab (void *key _libctf_unused_, void *value,
471 		  void *arg)
472 {
473   ctf_str_atom_t *atom = (ctf_str_atom_t *) value;
474   ctf_strtab_write_state_t *s = (ctf_strtab_write_state_t *) arg;
475 
476   /* Skip the null string.  */
477   if (s->nullstr == atom)
478     return;
479 
480   /* Skip atoms with no refs.  */
481   if (!ctf_list_empty_p (&atom->csa_refs))
482     s->sorttab[s->i++] = atom;
483 }
484 
485 /* Sort the strtab.  */
486 static int
ctf_str_sort_strtab(const void * a,const void * b)487 ctf_str_sort_strtab (const void *a, const void *b)
488 {
489   ctf_str_atom_t **one = (ctf_str_atom_t **) a;
490   ctf_str_atom_t **two = (ctf_str_atom_t **) b;
491 
492   return (strcmp ((*one)->csa_str, (*two)->csa_str));
493 }
494 
495 /* Write out and return a strtab containing all strings with recorded refs,
496    adjusting the refs to refer to the corresponding string.  The returned strtab
497    may be NULL on error.  Also populate the synthetic strtab with mappings from
498    external strtab offsets to names, so we can look them up with ctf_strptr().
499    Only external strtab offsets with references are added.  */
500 ctf_strs_writable_t
ctf_str_write_strtab(ctf_dict_t * fp)501 ctf_str_write_strtab (ctf_dict_t *fp)
502 {
503   ctf_strs_writable_t strtab;
504   ctf_str_atom_t *nullstr;
505   uint32_t cur_stroff = 0;
506   ctf_strtab_write_state_t s;
507   ctf_str_atom_t **sorttab;
508   size_t i;
509   int any_external = 0;
510 
511   memset (&strtab, 0, sizeof (struct ctf_strs_writable));
512   memset (&s, 0, sizeof (struct ctf_strtab_write_state));
513   s.strtab = &strtab;
514 
515   nullstr = ctf_dynhash_lookup (fp->ctf_str_atoms, "");
516   if (!nullstr)
517     {
518       ctf_err_warn (fp, 0, ECTF_INTERNAL, _("null string not found in strtab"));
519       strtab.cts_strs = NULL;
520       return strtab;
521     }
522 
523   s.nullstr = nullstr;
524   ctf_dynhash_iter (fp->ctf_str_atoms, ctf_str_count_strtab, &s);
525   strtab.cts_len++;				/* For the null string.  */
526 
527   ctf_dprintf ("%lu bytes of strings in strtab.\n",
528 	       (unsigned long) strtab.cts_len);
529 
530   /* Sort the strtab.  Force the null string to be first.  */
531   sorttab = calloc (s.strtab_count, sizeof (ctf_str_atom_t *));
532   if (!sorttab)
533     goto oom;
534 
535   sorttab[0] = nullstr;
536   s.i = 1;
537   s.sorttab = sorttab;
538   ctf_dynhash_iter (fp->ctf_str_atoms, ctf_str_populate_sorttab, &s);
539 
540   qsort (&sorttab[1], s.strtab_count - 1, sizeof (ctf_str_atom_t *),
541 	 ctf_str_sort_strtab);
542 
543   if ((strtab.cts_strs = malloc (strtab.cts_len)) == NULL)
544     goto oom_sorttab;
545 
546   /* Update all refs: also update the strtab appropriately.  */
547   for (i = 0; i < s.strtab_count; i++)
548     {
549       if (sorttab[i]->csa_external_offset)
550 	{
551 	  /* External strtab entry.  */
552 
553 	  any_external = 1;
554 	  ctf_str_update_refs (sorttab[i], sorttab[i]->csa_external_offset);
555 	  sorttab[i]->csa_offset = sorttab[i]->csa_external_offset;
556 	}
557       else
558 	{
559 	  /* Internal strtab entry with refs: actually add to the string
560 	     table.  */
561 
562 	  ctf_str_update_refs (sorttab[i], cur_stroff);
563 	  sorttab[i]->csa_offset = cur_stroff;
564 	  strcpy (&strtab.cts_strs[cur_stroff], sorttab[i]->csa_str);
565 	  cur_stroff += strlen (sorttab[i]->csa_str) + 1;
566 	}
567     }
568   free (sorttab);
569 
570   if (!any_external)
571     {
572       ctf_dynhash_destroy (fp->ctf_syn_ext_strtab);
573       fp->ctf_syn_ext_strtab = NULL;
574     }
575 
576   /* All the provisional strtab entries are now real strtab entries, and
577      ctf_strptr() will find them there.  The provisional offset now starts right
578      beyond the new end of the strtab.  */
579 
580   ctf_dynhash_empty (fp->ctf_prov_strtab);
581   fp->ctf_str_prov_offset = strtab.cts_len + 1;
582   return strtab;
583 
584  oom_sorttab:
585   free (sorttab);
586  oom:
587   return strtab;
588 }
589