xref: /openbsd/usr.sbin/crunchgen/crunchide.c (revision 404b540a)
1 /* $OpenBSD: crunchide.c,v 1.3 2009/07/21 17:19:13 deraadt Exp $	 */
2 
3 /*
4  * Copyright (c) 1994 University of Maryland
5  * All Rights Reserved.
6  *
7  * Permission to use, copy, modify, distribute, and sell this software and its
8  * documentation for any purpose is hereby granted without fee, provided that
9  * the above copyright notice appear in all copies and that both that
10  * copyright notice and this permission notice appear in supporting
11  * documentation, and that the name of U.M. not be used in advertising or
12  * publicity pertaining to distribution of the software without specific,
13  * written prior permission.  U.M. makes no representations about the
14  * suitability of this software for any purpose.  It is provided "as is"
15  * without express or implied warranty.
16  *
17  * U.M. DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL U.M.
19  * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
21  * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
22  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
23  *
24  * Author: James da Silva, Systems Design and Analysis Group
25  *			   Computer Science Department
26  *			   University of Maryland at College Park
27  */
28 /*
29  * crunchide.c - tiptoes through an a.out symbol table, hiding all defined
30  *	global symbols.  Allows the user to supply a "keep list" of symbols
31  *	that are not to be hidden.  This program relies on the use of the
32  * 	linker's -dc flag to actually put global bss data into the file's
33  * 	bss segment (rather than leaving it as undefined "common" data).
34  *
35  * 	The point of all this is to allow multiple programs to be linked
36  *	together without getting multiple-defined errors.
37  *
38  *	For example, consider a program "foo.c".  It can be linked with a
39  *	small stub routine, called "foostub.c", eg:
40  *	    int foo_main(int argc, char **argv){ return main(argc, argv); }
41  *      like so:
42  *	    cc -c foo.c foostub.c
43  *	    ld -dc -r foo.o foostub.o -o foo.combined.o
44  *	    crunchide -k _foo_main foo.combined.o
45  *	at this point, foo.combined.o can be linked with another program
46  * 	and invoked with "foo_main(argc, argv)".  foo's main() and any
47  * 	other globals are hidden and will not conflict with other symbols.
48  *
49  * TODO:
50  *	- resolve the theoretical hanging reloc problem (see check_reloc()
51  *	  below). I have yet to see this problem actually occur in any real
52  *	  program. In what cases will gcc/gas generate code that needs a
53  *	  relative reloc from a global symbol, other than PIC?  The
54  *	  solution is to not hide the symbol from the linker in this case,
55  *	  but to generate some random name for it so that it doesn't link
56  *	  with anything but holds the place for the reloc.
57  *      - arrange that all the BSS segments start at the same address, so
58  *	  that the final crunched binary BSS size is the max of all the
59  *	  component programs' BSS sizes, rather than their sum.
60  */
61 #include <unistd.h>
62 #include <stdio.h>
63 #include <stdlib.h>
64 #include <string.h>
65 #include <errno.h>
66 #include <fcntl.h>
67 #include <a.out.h>
68 #include <sys/types.h>
69 #ifdef _NLIST_DO_ECOFF
70 #include <sys/exec_ecoff.h>
71 #endif
72 #include <sys/mman.h>
73 #include <sys/stat.h>
74 
75 /*
76  * if __ELF__ is defined, do not bother supporting AOUT.
77  */
78 #if defined(_NLIST_DO_AOUT) && !(defined(__ELF__))
79 #define DO_AOUT
80 #endif
81 
82 void            usage(void);
83 
84 void            add_to_keep_list(char *);
85 void            add_file_to_keep_list(char *);
86 
87 void            hide_syms(char *);
88 #ifdef _NLIST_DO_ECOFF
89 void            ecoff_hide(int, char *);
90 #endif
91 #ifdef _NLIST_DO_ELF
92 void            elf_hide(int, char *);
93 #endif
94 
95 extern char	*__progname;
96 
97 int
98 crunchide_main(int argc, char *argv[])
99 {
100 	int             ch;
101 
102 	while ((ch = getopt(argc, argv, "hk:f:")) != -1)
103 		switch (ch) {
104 		case 'h':
105 			break;
106 		case 'k':
107 			add_to_keep_list(optarg);
108 			break;
109 		case 'f':
110 			add_file_to_keep_list(optarg);
111 			break;
112 		default:
113 			usage();
114 		}
115 
116 	argc -= optind;
117 	argv += optind;
118 
119 	if (argc == 0)
120 		usage();
121 
122 	while (argc) {
123 		hide_syms(*argv);
124 		argc--;
125 		argv++;
126 	}
127 
128 	return 0;
129 }
130 
131 struct keep {
132 	struct keep    *next;
133 	char           *sym;
134 } *keep_list;
135 
136 void
137 add_to_keep_list(char *symbol)
138 {
139 	struct keep    *newp, *prevp, *curp;
140 	int             cmp = 0;
141 
142 	for (curp = keep_list, prevp = NULL; curp; prevp = curp, curp = curp->next)
143 		if ((cmp = strcmp(symbol, curp->sym)) <= 0)
144 			break;
145 
146 	if (curp && cmp == 0)
147 		return;		/* already in table */
148 
149 	newp = (struct keep *) calloc(1, sizeof(struct keep));
150 	if (newp)
151 		newp->sym = strdup(symbol);
152 	if (newp == NULL || newp->sym == NULL) {
153 		fprintf(stderr, "%s: out of memory for keep list\n", __progname);
154 		exit(1);
155 	}
156 	newp->next = curp;
157 	if (prevp)
158 		prevp->next = newp;
159 	else
160 		keep_list = newp;
161 }
162 
163 int
164 in_keep_list(char *symbol)
165 {
166 	struct keep    *curp;
167 	int             cmp = 0;
168 
169 	for (curp = keep_list; curp; curp = curp->next)
170 		if ((cmp = strcmp(symbol, curp->sym)) <= 0)
171 			break;
172 
173 	return curp && cmp == 0;
174 }
175 
176 void
177 add_file_to_keep_list(char *filename)
178 {
179 	FILE           *keepf;
180 	char            symbol[1024];
181 	int             len;
182 
183 	if ((keepf = fopen(filename, "r")) == NULL) {
184 		perror(filename);
185 		usage();
186 	}
187 	while (fgets(symbol, sizeof(symbol), keepf)) {
188 		len = strlen(symbol);
189 		if (len && symbol[len - 1] == '\n')
190 			symbol[len - 1] = '\0';
191 
192 		add_to_keep_list(symbol);
193 	}
194 	fclose(keepf);
195 }
196 
197 int             nsyms, ntextrel, ndatarel;
198 struct exec    *hdrp;
199 char           *aoutdata, *strbase;
200 struct relocation_info *textrel, *datarel;
201 struct nlist   *symbase;
202 
203 #define SYMSTR(sp)	&strbase[(sp)->n_un.n_strx]
204 
205 /* is the symbol a global symbol defined in the current file? */
206 #define IS_GLOBAL_DEFINED(sp) \
207 	(((sp)->n_type & N_EXT) && ((sp)->n_type & N_TYPE) != N_UNDF)
208 
209 #ifdef DO_AOUT
210 #if defined(__sparc__)
211 /* is the relocation entry dependent on a symbol? */
212 #define IS_SYMBOL_RELOC(rp)   \
213 	((rp)->r_extern || \
214 	((rp)->r_type >= RELOC_BASE10 && (rp)->r_type <= RELOC_BASE22) || \
215 	(rp)->r_type == RELOC_JMP_TBL)
216 #else
217 /* is the relocation entry dependent on a symbol? */
218 #define IS_SYMBOL_RELOC(rp)   \
219 		  ((rp)->r_extern||(rp)->r_baserel||(rp)->r_jmptable)
220 #endif
221 #endif
222 
223 void            check_reloc(char *filename, struct relocation_info * relp);
224 
225 void
226 hide_syms(char *filename)
227 {
228 	int             inf;
229 	struct stat     infstat;
230 #ifdef DO_AOUT
231 	struct relocation_info *relp;
232 	struct nlist   *symp;
233 	u_char          zero = 0;
234 #endif
235 	char           *buf;
236 
237 	/*
238          * Open the file and do some error checking.
239          */
240 
241 	if ((inf = open(filename, O_RDWR)) == -1) {
242 		perror(filename);
243 		return;
244 	}
245 	if (fstat(inf, &infstat) == -1) {
246 		perror(filename);
247 		close(inf);
248 		return;
249 	}
250 	if (infstat.st_size < sizeof(struct exec)) {
251 		fprintf(stderr, "%s: short file\n", filename);
252 		close(inf);
253 		return;
254 	}
255 	if ((buf = mmap(NULL, infstat.st_size, PROT_READ | PROT_WRITE,
256 	    MAP_FILE | MAP_SHARED, inf, 0)) == MAP_FAILED) {
257 		fprintf(stderr, "%s: cannot map\n", filename);
258 		close(inf);
259 		return;
260 	}
261 
262 #ifdef _NLIST_DO_ELF
263 	if (buf[0] == 0x7f && (buf[1] == 'E' || buf[1] == 'O') &&
264 	    buf[2] == 'L' && buf[3] == 'F') {
265 		elf_hide(inf, buf);
266 		return;
267 	}
268 #endif				/* _NLIST_DO_ELF */
269 
270 #ifdef _NLIST_DO_ECOFF
271 	if (!ECOFF_BADMAG((struct ecoff_exechdr *) buf)) {
272 		ecoff_hide(inf, buf);
273 		return;
274 	}
275 #endif				/* _NLIST_DO_ECOFF */
276 
277 #ifdef DO_AOUT
278 	aoutdata = buf;
279 
280 	/*
281          * Check the header and calculate offsets and sizes from it.
282          */
283 	hdrp = (struct exec *) aoutdata;
284 
285 	if (N_BADMAG(*hdrp)) {
286 		fprintf(stderr, "%s: bad magic: not an a.out, ecoff or elf  file\n",
287 		    filename);
288 		close(inf);
289 		return;
290 	}
291 	textrel = (struct relocation_info *) (aoutdata + N_TRELOFF(*hdrp));
292 	datarel = (struct relocation_info *) (aoutdata + N_DRELOFF(*hdrp));
293 	symbase = (struct nlist *) (aoutdata + N_SYMOFF(*hdrp));
294 	strbase = (char *) (aoutdata + N_STROFF(*hdrp));
295 
296 	ntextrel = hdrp->a_trsize / sizeof(struct relocation_info);
297 	ndatarel = hdrp->a_drsize / sizeof(struct relocation_info);
298 	nsyms = hdrp->a_syms / sizeof(struct nlist);
299 
300 	/*
301          * Zap the type field of all globally-defined symbols.  The linker will
302          * subsequently ignore these entries.  Don't zap any symbols in the
303          * keep list.
304          */
305 	for (symp = symbase; symp < symbase + nsyms; symp++)
306 		if (IS_GLOBAL_DEFINED(symp) && !in_keep_list(SYMSTR(symp))) {
307 			/*
308 		         * XXX Our VM system has some problems, so
309 		         * avoid the VM system....
310 		         */
311 			lseek(inf, (off_t) ((void *) &symp->n_type -
312 			    (void *) buf), SEEK_SET);
313 			write(inf, &zero, sizeof zero);
314 			symp->n_type = 0;
315 		}
316 	/*
317          * Check whether the relocation entries reference any symbols that we
318          * just zapped.  I don't know whether ld can handle this case, but I
319          * haven't encountered it yet.  These checks are here so that the program
320          * doesn't fail silently should such symbols be encountered.
321          */
322 	for (relp = textrel; relp < textrel + ntextrel; relp++)
323 		check_reloc(filename, relp);
324 	for (relp = datarel; relp < datarel + ndatarel; relp++)
325 		check_reloc(filename, relp);
326 
327 	msync(buf, infstat.st_size, MS_SYNC);
328 	munmap(buf, infstat.st_size);
329 	close(inf);
330 #endif				/* DO_AOUT */
331 }
332 
333 #ifdef DO_AOUT
334 void
335 check_reloc(char *filename, struct relocation_info * relp)
336 {
337 	/* bail out if we zapped a symbol that is needed */
338 	if (IS_SYMBOL_RELOC(relp) && symbase[relp->r_symbolnum].n_type == 0) {
339 		fprintf(stderr,
340 		    "%s: oops, have hanging relocation for %s: bailing out!\n",
341 		    filename, SYMSTR(&symbase[relp->r_symbolnum]));
342 		exit(1);
343 	}
344 }
345 #endif				/* DO_AOUT */
346