xref: /openbsd/usr.sbin/crunchgen/crunchide.c (revision 3d8817e4)
1 /* $OpenBSD: crunchide.c,v 1.4 2009/12/04 04:59:48 drahn Exp $	 */
2 
3 /*
4  * Copyright (c) 1994 University of Maryland
5  * All Rights Reserved.
6  *
7  * Permission to use, copy, modify, distribute, and sell this software and its
8  * documentation for any purpose is hereby granted without fee, provided that
9  * the above copyright notice appear in all copies and that both that
10  * copyright notice and this permission notice appear in supporting
11  * documentation, and that the name of U.M. not be used in advertising or
12  * publicity pertaining to distribution of the software without specific,
13  * written prior permission.  U.M. makes no representations about the
14  * suitability of this software for any purpose.  It is provided "as is"
15  * without express or implied warranty.
16  *
17  * U.M. DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL U.M.
19  * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
21  * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
22  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
23  *
24  * Author: James da Silva, Systems Design and Analysis Group
25  *			   Computer Science Department
26  *			   University of Maryland at College Park
27  */
28 /*
29  * crunchide.c - tiptoes through an a.out symbol table, hiding all defined
30  *	global symbols.  Allows the user to supply a "keep list" of symbols
31  *	that are not to be hidden.  This program relies on the use of the
32  * 	linker's -dc flag to actually put global bss data into the file's
33  * 	bss segment (rather than leaving it as undefined "common" data).
34  *
35  * 	The point of all this is to allow multiple programs to be linked
36  *	together without getting multiple-defined errors.
37  *
38  *	For example, consider a program "foo.c".  It can be linked with a
39  *	small stub routine, called "foostub.c", eg:
40  *	    int foo_main(int argc, char **argv){ return main(argc, argv); }
41  *      like so:
42  *	    cc -c foo.c foostub.c
43  *	    ld -dc -r foo.o foostub.o -o foo.combined.o
44  *	    crunchide -k _foo_main foo.combined.o
45  *	at this point, foo.combined.o can be linked with another program
46  * 	and invoked with "foo_main(argc, argv)".  foo's main() and any
47  * 	other globals are hidden and will not conflict with other symbols.
48  *
49  * TODO:
50  *	- resolve the theoretical hanging reloc problem (see check_reloc()
51  *	  below). I have yet to see this problem actually occur in any real
52  *	  program. In what cases will gcc/gas generate code that needs a
53  *	  relative reloc from a global symbol, other than PIC?  The
54  *	  solution is to not hide the symbol from the linker in this case,
55  *	  but to generate some random name for it so that it doesn't link
56  *	  with anything but holds the place for the reloc.
57  *      - arrange that all the BSS segments start at the same address, so
58  *	  that the final crunched binary BSS size is the max of all the
59  *	  component programs' BSS sizes, rather than their sum.
60  */
61 #include <unistd.h>
62 #include <stdio.h>
63 #include <stdlib.h>
64 #include <string.h>
65 #include <errno.h>
66 #include <fcntl.h>
67 #include <a.out.h>
68 #include <sys/types.h>
69 #ifdef _NLIST_DO_ECOFF
70 #include <sys/exec_ecoff.h>
71 #endif
72 #include <sys/mman.h>
73 #include <sys/stat.h>
74 #include "mangle.h"
75 
76 /*
77  * if __ELF__ is defined, do not bother supporting AOUT.
78  */
79 #if defined(_NLIST_DO_AOUT) && !(defined(__ELF__))
80 #define DO_AOUT
81 #endif
82 
83 void            usage(void);
84 
85 void            add_to_keep_list(char *);
86 void            add_file_to_keep_list(char *);
87 
88 void            hide_syms(char *);
89 #ifdef _NLIST_DO_ECOFF
90 void            ecoff_hide(int, char *);
91 #endif
92 #ifdef _NLIST_DO_ELF
93 void            elf_hide(int, char *);
94 #endif
95 
96 extern char	*__progname;
97 extern int elf_mangle;
98 
99 int
100 crunchide_main(int argc, char *argv[])
101 {
102 	int             ch;
103 
104 	while ((ch = getopt(argc, argv, "Mhk:f:")) != -1)
105 		switch (ch) {
106 		case 'M':
107 			elf_mangle = 1;
108 			break;
109 		case 'h':
110 			break;
111 		case 'k':
112 			add_to_keep_list(optarg);
113 			break;
114 		case 'f':
115 			add_file_to_keep_list(optarg);
116 			break;
117 		default:
118 			usage();
119 		}
120 
121 	argc -= optind;
122 	argv += optind;
123 
124 	if (argc == 0)
125 		usage();
126 
127 	if (elf_mangle)
128 		init_mangle_state();
129 
130 	while (argc) {
131 		hide_syms(*argv);
132 		argc--;
133 		argv++;
134 	}
135 	if (elf_mangle)
136 		fini_mangle_state();
137 
138 	return 0;
139 }
140 
141 struct keep {
142 	struct keep    *next;
143 	char           *sym;
144 } *keep_list;
145 
146 void
147 add_to_keep_list(char *symbol)
148 {
149 	struct keep    *newp, *prevp, *curp;
150 	int             cmp = 0;
151 
152 	for (curp = keep_list, prevp = NULL; curp; prevp = curp, curp = curp->next)
153 		if ((cmp = strcmp(symbol, curp->sym)) <= 0)
154 			break;
155 
156 	if (curp && cmp == 0)
157 		return;		/* already in table */
158 
159 	newp = (struct keep *) calloc(1, sizeof(struct keep));
160 	if (newp)
161 		newp->sym = strdup(symbol);
162 	if (newp == NULL || newp->sym == NULL) {
163 		fprintf(stderr, "%s: out of memory for keep list\n", __progname);
164 		exit(1);
165 	}
166 	newp->next = curp;
167 	if (prevp)
168 		prevp->next = newp;
169 	else
170 		keep_list = newp;
171 }
172 
173 int
174 in_keep_list(char *symbol)
175 {
176 	struct keep    *curp;
177 	int             cmp = 0;
178 
179 	for (curp = keep_list; curp; curp = curp->next)
180 		if ((cmp = strcmp(symbol, curp->sym)) <= 0)
181 			break;
182 
183 	return curp && cmp == 0;
184 }
185 
186 void
187 add_file_to_keep_list(char *filename)
188 {
189 	FILE           *keepf;
190 	char            symbol[1024];
191 	int             len;
192 
193 	if ((keepf = fopen(filename, "r")) == NULL) {
194 		perror(filename);
195 		usage();
196 	}
197 	while (fgets(symbol, sizeof(symbol), keepf)) {
198 		len = strlen(symbol);
199 		if (len && symbol[len - 1] == '\n')
200 			symbol[len - 1] = '\0';
201 
202 		add_to_keep_list(symbol);
203 	}
204 	fclose(keepf);
205 }
206 
207 int             nsyms, ntextrel, ndatarel;
208 struct exec    *hdrp;
209 char           *aoutdata, *strbase;
210 struct relocation_info *textrel, *datarel;
211 struct nlist   *symbase;
212 
213 #define SYMSTR(sp)	&strbase[(sp)->n_un.n_strx]
214 
215 /* is the symbol a global symbol defined in the current file? */
216 #define IS_GLOBAL_DEFINED(sp) \
217 	(((sp)->n_type & N_EXT) && ((sp)->n_type & N_TYPE) != N_UNDF)
218 
219 #ifdef DO_AOUT
220 #if defined(__sparc__)
221 /* is the relocation entry dependent on a symbol? */
222 #define IS_SYMBOL_RELOC(rp)   \
223 	((rp)->r_extern || \
224 	((rp)->r_type >= RELOC_BASE10 && (rp)->r_type <= RELOC_BASE22) || \
225 	(rp)->r_type == RELOC_JMP_TBL)
226 #else
227 /* is the relocation entry dependent on a symbol? */
228 #define IS_SYMBOL_RELOC(rp)   \
229 		  ((rp)->r_extern||(rp)->r_baserel||(rp)->r_jmptable)
230 #endif
231 #endif
232 
233 void            check_reloc(char *filename, struct relocation_info * relp);
234 
235 void
236 hide_syms(char *filename)
237 {
238 	int             inf;
239 	struct stat     infstat;
240 #ifdef DO_AOUT
241 	struct relocation_info *relp;
242 	struct nlist   *symp;
243 	u_char          zero = 0;
244 #endif
245 	char           *buf;
246 
247 	/*
248          * Open the file and do some error checking.
249          */
250 
251 	if ((inf = open(filename, O_RDWR)) == -1) {
252 		perror(filename);
253 		return;
254 	}
255 	if (fstat(inf, &infstat) == -1) {
256 		perror(filename);
257 		close(inf);
258 		return;
259 	}
260 	if (infstat.st_size < sizeof(struct exec)) {
261 		fprintf(stderr, "%s: short file\n", filename);
262 		close(inf);
263 		return;
264 	}
265 	if ((buf = mmap(NULL, infstat.st_size, PROT_READ | PROT_WRITE,
266 	    MAP_FILE | MAP_SHARED, inf, 0)) == MAP_FAILED) {
267 		fprintf(stderr, "%s: cannot map\n", filename);
268 		close(inf);
269 		return;
270 	}
271 
272 #ifdef _NLIST_DO_ELF
273 	if (buf[0] == 0x7f && (buf[1] == 'E' || buf[1] == 'O') &&
274 	    buf[2] == 'L' && buf[3] == 'F') {
275 		elf_hide(inf, buf);
276 		return;
277 	}
278 #endif				/* _NLIST_DO_ELF */
279 
280 #ifdef _NLIST_DO_ECOFF
281 	if (!ECOFF_BADMAG((struct ecoff_exechdr *) buf)) {
282 		ecoff_hide(inf, buf);
283 		return;
284 	}
285 #endif				/* _NLIST_DO_ECOFF */
286 
287 #ifdef DO_AOUT
288 	aoutdata = buf;
289 
290 	/*
291          * Check the header and calculate offsets and sizes from it.
292          */
293 	hdrp = (struct exec *) aoutdata;
294 
295 	if (N_BADMAG(*hdrp)) {
296 		fprintf(stderr, "%s: bad magic: not an a.out, ecoff or elf  file\n",
297 		    filename);
298 		close(inf);
299 		return;
300 	}
301 	textrel = (struct relocation_info *) (aoutdata + N_TRELOFF(*hdrp));
302 	datarel = (struct relocation_info *) (aoutdata + N_DRELOFF(*hdrp));
303 	symbase = (struct nlist *) (aoutdata + N_SYMOFF(*hdrp));
304 	strbase = (char *) (aoutdata + N_STROFF(*hdrp));
305 
306 	ntextrel = hdrp->a_trsize / sizeof(struct relocation_info);
307 	ndatarel = hdrp->a_drsize / sizeof(struct relocation_info);
308 	nsyms = hdrp->a_syms / sizeof(struct nlist);
309 
310 	/*
311          * Zap the type field of all globally-defined symbols.  The linker will
312          * subsequently ignore these entries.  Don't zap any symbols in the
313          * keep list.
314          */
315 	for (symp = symbase; symp < symbase + nsyms; symp++)
316 		if (IS_GLOBAL_DEFINED(symp) && !in_keep_list(SYMSTR(symp))) {
317 			/*
318 		         * XXX Our VM system has some problems, so
319 		         * avoid the VM system....
320 		         */
321 			lseek(inf, (off_t) ((void *) &symp->n_type -
322 			    (void *) buf), SEEK_SET);
323 			write(inf, &zero, sizeof zero);
324 			symp->n_type = 0;
325 		}
326 	/*
327          * Check whether the relocation entries reference any symbols that we
328          * just zapped.  I don't know whether ld can handle this case, but I
329          * haven't encountered it yet.  These checks are here so that the program
330          * doesn't fail silently should such symbols be encountered.
331          */
332 	for (relp = textrel; relp < textrel + ntextrel; relp++)
333 		check_reloc(filename, relp);
334 	for (relp = datarel; relp < datarel + ndatarel; relp++)
335 		check_reloc(filename, relp);
336 
337 	msync(buf, infstat.st_size, MS_SYNC);
338 	munmap(buf, infstat.st_size);
339 	close(inf);
340 #endif				/* DO_AOUT */
341 }
342 
343 #ifdef DO_AOUT
344 void
345 check_reloc(char *filename, struct relocation_info * relp)
346 {
347 	/* bail out if we zapped a symbol that is needed */
348 	if (IS_SYMBOL_RELOC(relp) && symbase[relp->r_symbolnum].n_type == 0) {
349 		fprintf(stderr,
350 		    "%s: oops, have hanging relocation for %s: bailing out!\n",
351 		    filename, SYMSTR(&symbase[relp->r_symbolnum]));
352 		exit(1);
353 	}
354 }
355 #endif				/* DO_AOUT */
356