1 /*
2  * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.
8  *
9  * This code is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12  * version 2 for more details (a copy is included in the LICENSE file that
13  * accompanied this code).
14  *
15  * You should have received a copy of the GNU General Public License version
16  * 2 along with this work; if not, write to the Free Software Foundation,
17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18  *
19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20  * or visit www.oracle.com if you need additional information or have any
21  * questions.
22  *
23  */
24 
25 #include <unistd.h>
26 #include <search.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <db.h>
30 #include <fcntl.h>
31 
32 #include "libproc_impl.h"
33 #include "symtab.h"
34 #ifndef __APPLE__
35 #include "salibelf.h"
36 #endif // __APPLE__
37 
38 
39 // ----------------------------------------------------
40 // functions for symbol lookups
41 // ----------------------------------------------------
42 
43 typedef struct symtab_symbol {
44   char *name;                // name like __ZThread_...
45   uintptr_t offset;          // to loaded address
46   uintptr_t size;            // size strlen
47 } symtab_symbol;
48 
49 typedef struct symtab {
50   char *strs;                // all symbols "__symbol1__'\0'__symbol2__...."
51   size_t num_symbols;
52   DB* hash_table;
53   symtab_symbol* symbols;
54 } symtab_t;
55 
56 #ifdef __APPLE__
57 
build_search_table(symtab_t * symtab)58 void build_search_table(symtab_t *symtab) {
59   int i;
60   for (i = 0; i < symtab->num_symbols; i++) {
61     DBT key, value;
62     key.data = symtab->symbols[i].name;
63     key.size = strlen(key.data) + 1;
64     value.data = &(symtab->symbols[i]);
65     value.size = sizeof(symtab_symbol);
66     (*symtab->hash_table->put)(symtab->hash_table, &key, &value, 0);
67 
68     // check result
69     if (is_debug()) {
70       DBT rkey, rvalue;
71       char* tmp = (char *)malloc(strlen(symtab->symbols[i].name) + 1);
72       if (tmp == NULL) {
73         return;
74       }
75       strcpy(tmp, symtab->symbols[i].name);
76       rkey.data = tmp;
77       rkey.size = strlen(tmp) + 1;
78       (*symtab->hash_table->get)(symtab->hash_table, &rkey, &rvalue, 0);
79       // we may get a copy back so compare contents
80       symtab_symbol *res = (symtab_symbol *)rvalue.data;
81       if (strcmp(res->name, symtab->symbols[i].name)  ||
82           res->offset != symtab->symbols[i].offset    ||
83           res->size != symtab->symbols[i].size) {
84         print_debug("error to get hash_table value!\n");
85       }
86       free(tmp);
87     }
88   }
89 }
90 
91 // read symbol table from given fd.
build_symtab(int fd)92 struct symtab* build_symtab(int fd) {
93   symtab_t* symtab = NULL;
94   int i;
95   mach_header_64 header;
96   off_t image_start;
97 
98   if (!get_arch_off(fd, CPU_TYPE_X86_64, &image_start)) {
99     print_debug("failed in get fat header\n");
100     return NULL;
101   }
102   lseek(fd, image_start, SEEK_SET);
103   if (read(fd, (void *)&header, sizeof(mach_header_64)) != sizeof(mach_header_64)) {
104     print_debug("reading header failed!\n");
105     return NULL;
106   }
107   // header
108   if (header.magic != MH_MAGIC_64) {
109     print_debug("not a valid .dylib file\n");
110     return NULL;
111   }
112 
113   load_command lcmd;
114   symtab_command symtabcmd;
115   nlist_64 lentry;
116 
117   bool lcsymtab_exist = false;
118 
119   long filepos = ltell(fd);
120   for (i = 0; i < header.ncmds; i++) {
121     lseek(fd, filepos, SEEK_SET);
122     if (read(fd, (void *)&lcmd, sizeof(load_command)) != sizeof(load_command)) {
123       print_debug("read load_command failed for file\n");
124       return NULL;
125     }
126     filepos += lcmd.cmdsize;  // next command position
127     if (lcmd.cmd == LC_SYMTAB) {
128       lseek(fd, -sizeof(load_command), SEEK_CUR);
129       lcsymtab_exist = true;
130       break;
131     }
132   }
133   if (!lcsymtab_exist) {
134     print_debug("No symtab command found!\n");
135     return NULL;
136   }
137   if (read(fd, (void *)&symtabcmd, sizeof(symtab_command)) != sizeof(symtab_command)) {
138     print_debug("read symtab_command failed for file");
139     return NULL;
140   }
141   symtab = (symtab_t *)malloc(sizeof(symtab_t));
142   if (symtab == NULL) {
143     print_debug("out of memory: allocating symtab\n");
144     return NULL;
145   }
146 
147   // create hash table, we use berkeley db to
148   // manipulate the hash table.
149   symtab->hash_table = dbopen(NULL, O_CREAT | O_RDWR, 0600, DB_HASH, NULL);
150   if (symtab->hash_table == NULL)
151     goto quit;
152 
153   symtab->num_symbols = symtabcmd.nsyms;
154   symtab->symbols = (symtab_symbol *)malloc(sizeof(symtab_symbol) * symtab->num_symbols);
155   symtab->strs    = (char *)malloc(sizeof(char) * symtabcmd.strsize);
156   if (symtab->symbols == NULL || symtab->strs == NULL) {
157      print_debug("out of memory: allocating symtab.symbol or symtab.strs\n");
158      goto quit;
159   }
160   lseek(fd, image_start + symtabcmd.symoff, SEEK_SET);
161   for (i = 0; i < symtab->num_symbols; i++) {
162     if (read(fd, (void *)&lentry, sizeof(nlist_64)) != sizeof(nlist_64)) {
163       print_debug("read nlist_64 failed at %i\n", i);
164       goto quit;
165     }
166     symtab->symbols[i].offset = lentry.n_value;
167     symtab->symbols[i].size  = lentry.n_un.n_strx;        // index
168   }
169 
170   // string table
171   lseek(fd, image_start + symtabcmd.stroff, SEEK_SET);
172   int size = read(fd, (void *)(symtab->strs), symtabcmd.strsize * sizeof(char));
173   if (size != symtabcmd.strsize * sizeof(char)) {
174      print_debug("reading string table failed\n");
175      goto quit;
176   }
177 
178   for (i = 0; i < symtab->num_symbols; i++) {
179     symtab->symbols[i].name = symtab->strs + symtab->symbols[i].size;
180     if (i > 0) {
181       // fix size
182       symtab->symbols[i - 1].size = symtab->symbols[i].size - symtab->symbols[i - 1].size;
183       print_debug("%s size = %d\n", symtab->symbols[i - 1].name, symtab->symbols[i - 1].size);
184 
185     }
186 
187     if (i == symtab->num_symbols - 1) {
188       // last index
189       symtab->symbols[i].size =
190             symtabcmd.strsize - symtab->symbols[i].size;
191       print_debug("%s size = %d\n", symtab->symbols[i].name, symtab->symbols[i].size);
192     }
193   }
194 
195   // build a hashtable for fast query
196   build_search_table(symtab);
197   return symtab;
198 quit:
199   if (symtab) destroy_symtab(symtab);
200   return NULL;
201 }
202 
203 #else // __APPLE__
204 
205 struct elf_section {
206   ELF_SHDR   *c_shdr;
207   void       *c_data;
208 };
209 
210 // read symbol table from given fd.
build_symtab(int fd)211 struct symtab* build_symtab(int fd) {
212   ELF_EHDR ehdr;
213   struct symtab* symtab = NULL;
214 
215   // Reading of elf header
216   struct elf_section *scn_cache = NULL;
217   int cnt = 0;
218   ELF_SHDR* shbuf = NULL;
219   ELF_SHDR* cursct = NULL;
220   ELF_PHDR* phbuf = NULL;
221   int symtab_found = 0;
222   int dynsym_found = 0;
223   uint32_t symsection = SHT_SYMTAB;
224 
225   uintptr_t baseaddr = (uintptr_t)-1;
226 
227   lseek(fd, (off_t)0L, SEEK_SET);
228   if (! read_elf_header(fd, &ehdr)) {
229     // not an elf
230     return NULL;
231   }
232 
233   // read ELF header
234   if ((shbuf = read_section_header_table(fd, &ehdr)) == NULL) {
235     goto quit;
236   }
237 
238   baseaddr = find_base_address(fd, &ehdr);
239 
240   scn_cache = calloc(ehdr.e_shnum, sizeof(*scn_cache));
241   if (scn_cache == NULL) {
242     goto quit;
243   }
244 
245   for (cursct = shbuf, cnt = 0; cnt < ehdr.e_shnum; cnt++) {
246     scn_cache[cnt].c_shdr = cursct;
247     if (cursct->sh_type == SHT_SYMTAB ||
248         cursct->sh_type == SHT_STRTAB ||
249         cursct->sh_type == SHT_DYNSYM) {
250       if ( (scn_cache[cnt].c_data = read_section_data(fd, &ehdr, cursct)) == NULL) {
251          goto quit;
252       }
253     }
254 
255     if (cursct->sh_type == SHT_SYMTAB)
256        symtab_found++;
257 
258     if (cursct->sh_type == SHT_DYNSYM)
259        dynsym_found++;
260 
261     cursct++;
262   }
263 
264   if (!symtab_found && dynsym_found)
265      symsection = SHT_DYNSYM;
266 
267   for (cnt = 1; cnt < ehdr.e_shnum; cnt++) {
268     ELF_SHDR *shdr = scn_cache[cnt].c_shdr;
269 
270     if (shdr->sh_type == symsection) {
271       ELF_SYM  *syms;
272       int j, n;
273       size_t size;
274 
275       // FIXME: there could be multiple data buffers associated with the
276       // same ELF section. Here we can handle only one buffer. See man page
277       // for elf_getdata on Solaris.
278 
279       // guarantee(symtab == NULL, "multiple symtab");
280       symtab = calloc(1, sizeof(*symtab));
281       if (symtab == NULL) {
282          goto quit;
283       }
284       // the symbol table
285       syms = (ELF_SYM *)scn_cache[cnt].c_data;
286 
287       // number of symbols
288       n = shdr->sh_size / shdr->sh_entsize;
289 
290       // create hash table, we use berkeley db to
291       // manipulate the hash table.
292       symtab->hash_table = dbopen(NULL, O_CREAT | O_RDWR, 0600, DB_HASH, NULL);
293       // guarantee(symtab->hash_table, "unexpected failure: dbopen");
294       if (symtab->hash_table == NULL)
295         goto bad;
296 
297       // shdr->sh_link points to the section that contains the actual strings
298       // for symbol names. the st_name field in ELF_SYM is just the
299       // string table index. we make a copy of the string table so the
300       // strings will not be destroyed by elf_end.
301       size = scn_cache[shdr->sh_link].c_shdr->sh_size;
302       symtab->strs = malloc(size);
303       if (symtab->strs == NULL)
304         goto bad;
305       memcpy(symtab->strs, scn_cache[shdr->sh_link].c_data, size);
306 
307       // allocate memory for storing symbol offset and size;
308       symtab->num_symbols = n;
309       symtab->symbols = calloc(n , sizeof(*symtab->symbols));
310       if (symtab->symbols == NULL)
311         goto bad;
312 
313       // copy symbols info our symtab and enter them info the hash table
314       for (j = 0; j < n; j++, syms++) {
315         DBT key, value;
316         char *sym_name = symtab->strs + syms->st_name;
317 
318         // skip non-object and non-function symbols
319         int st_type = ELF_ST_TYPE(syms->st_info);
320         if ( st_type != STT_FUNC && st_type != STT_OBJECT)
321            continue;
322         // skip empty strings and undefined symbols
323         if (*sym_name == '\0' || syms->st_shndx == SHN_UNDEF) continue;
324 
325         symtab->symbols[j].name   = sym_name;
326         symtab->symbols[j].offset = syms->st_value - baseaddr;
327         symtab->symbols[j].size   = syms->st_size;
328 
329         key.data = sym_name;
330         key.size = strlen(sym_name) + 1;
331         value.data = &(symtab->symbols[j]);
332         value.size = sizeof(symtab_symbol);
333         (*symtab->hash_table->put)(symtab->hash_table, &key, &value, 0);
334       }
335     }
336   }
337   goto quit;
338 
339 bad:
340   destroy_symtab(symtab);
341   symtab = NULL;
342 
343 quit:
344   if (shbuf) free(shbuf);
345   if (phbuf) free(phbuf);
346   if (scn_cache) {
347     for (cnt = 0; cnt < ehdr.e_shnum; cnt++) {
348       if (scn_cache[cnt].c_data != NULL) {
349         free(scn_cache[cnt].c_data);
350       }
351     }
352     free(scn_cache);
353   }
354   return symtab;
355 }
356 
357 #endif // __APPLE__
358 
destroy_symtab(symtab_t * symtab)359 void destroy_symtab(symtab_t* symtab) {
360   if (!symtab) return;
361   free(symtab->strs);
362   free(symtab->symbols);
363   free(symtab);
364 }
365 
search_symbol(struct symtab * symtab,uintptr_t base,const char * sym_name,int * sym_size)366 uintptr_t search_symbol(struct symtab* symtab, uintptr_t base, const char *sym_name, int *sym_size) {
367   DBT key, value;
368   int ret;
369 
370   // library does not have symbol table
371   if (!symtab || !symtab->hash_table) {
372      return 0;
373   }
374 
375   key.data = (char*)(uintptr_t)sym_name;
376   key.size = strlen(sym_name) + 1;
377   ret = (*symtab->hash_table->get)(symtab->hash_table, &key, &value, 0);
378   if (ret == 0) {
379     symtab_symbol *sym = value.data;
380     uintptr_t rslt = (uintptr_t) ((char*)base + sym->offset);
381     if (sym_size) *sym_size = sym->size;
382     return rslt;
383   }
384 
385   return 0;
386 }
387 
nearest_symbol(struct symtab * symtab,uintptr_t offset,uintptr_t * poffset)388 const char* nearest_symbol(struct symtab* symtab, uintptr_t offset,
389                            uintptr_t* poffset) {
390   int n = 0;
391   if (!symtab) return NULL;
392   for (; n < symtab->num_symbols; n++) {
393     symtab_symbol* sym = &(symtab->symbols[n]);
394     if (sym->name != NULL &&
395       offset >= sym->offset && offset < sym->offset + sym->size) {
396       if (poffset) *poffset = (offset - sym->offset);
397       return sym->name;
398     }
399   }
400   return NULL;
401 }
402