1 /*-
2  * Copyright (c) 2007 S.Sam Arun Raj
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/capsicum.h>
29 #include <sys/stat.h>
30 
31 #include <capsicum_helpers.h>
32 #include <ctype.h>
33 #include <err.h>
34 #include <errno.h>
35 #include <fcntl.h>
36 #include <getopt.h>
37 #include <inttypes.h>
38 #include <stdint.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <sysexits.h>
43 #include <unistd.h>
44 
45 #include <libelf.h>
46 #include <libelftc.h>
47 #include <gelf.h>
48 
49 #include <libcasper.h>
50 #include <casper/cap_fileargs.h>
51 
52 #include "_elftc.h"
53 
54 ELFTC_VCSID("$Id: strings.c 3648 2018-11-22 23:26:43Z emaste $");
55 
56 enum radix_style {
57 	RADIX_DECIMAL,
58 	RADIX_HEX,
59 	RADIX_OCTAL
60 };
61 
62 enum encoding_style {
63 	ENCODING_7BIT,
64 	ENCODING_8BIT,
65 	ENCODING_16BIT_BIG,
66 	ENCODING_16BIT_LITTLE,
67 	ENCODING_32BIT_BIG,
68 	ENCODING_32BIT_LITTLE
69 };
70 
71 #define PRINTABLE(c)						\
72       ((c) >= 0 && (c) <= 255 &&				\
73 	  ((c) == '\t' || isprint((c)) ||			\
74 	      (encoding == ENCODING_8BIT && (c) > 127)))
75 
76 static int encoding_size, entire_file, show_filename, show_loc;
77 static enum encoding_style encoding;
78 static enum radix_style radix;
79 static intmax_t min_len;
80 
81 static struct option strings_longopts[] = {
82 	{ "all",		no_argument,		NULL,	'a'},
83 	{ "bytes",		required_argument,	NULL,	'n'},
84 	{ "encoding",		required_argument,	NULL,	'e'},
85 	{ "help",		no_argument,		NULL,	'h'},
86 	{ "print-file-name",	no_argument,		NULL,	'f'},
87 	{ "radix",		required_argument,	NULL,	't'},
88 	{ "version",		no_argument,		NULL,	'v'},
89 	{ NULL, 0, NULL, 0 }
90 };
91 
92 int	getcharacter(FILE *, long *);
93 int	handle_file(fileargs_t *fa, const char *);
94 int	handle_elf(const char *, FILE *);
95 int	handle_binary(const char *, FILE *, size_t);
96 int	find_strings(const char *, FILE *, off_t, off_t);
97 void	show_version(void);
98 void	usage(void);
99 
100 /*
101  * strings(1) extracts text(contiguous printable characters)
102  * from elf and binary files.
103  */
104 int
105 main(int argc, char **argv)
106 {
107 	fileargs_t *fa;
108 	cap_rights_t rights;
109 	int ch, rc;
110 
111 	rc = 0;
112 	min_len = 0;
113 	encoding_size = 1;
114 	if (elf_version(EV_CURRENT) == EV_NONE)
115 		errx(EXIT_FAILURE, "ELF library initialization failed: %s",
116 		    elf_errmsg(-1));
117 
118 	while ((ch = getopt_long(argc, argv, "1234567890ae:fhn:ot:Vv",
119 	    strings_longopts, NULL)) != -1) {
120 		switch ((char)ch) {
121 		case 'a':
122 			entire_file = 1;
123 			break;
124 		case 'e':
125 			if (*optarg == 's') {
126 				encoding = ENCODING_7BIT;
127 			} else if (*optarg == 'S') {
128 				encoding = ENCODING_8BIT;
129 			} else if (*optarg == 'b') {
130 				encoding = ENCODING_16BIT_BIG;
131 				encoding_size = 2;
132 			} else if (*optarg == 'B') {
133 				encoding = ENCODING_32BIT_BIG;
134 				encoding_size = 4;
135 			} else if (*optarg == 'l') {
136 				encoding = ENCODING_16BIT_LITTLE;
137 				encoding_size = 2;
138 			} else if (*optarg == 'L') {
139 				encoding = ENCODING_32BIT_LITTLE;
140 				encoding_size = 4;
141 			} else
142 				usage();
143 			        /* NOTREACHED */
144 			break;
145 		case 'f':
146 			show_filename = 1;
147 			break;
148 		case 'n':
149 			min_len = strtoimax(optarg, (char**)NULL, 10);
150 			if (min_len <= 0)
151 				errx(EX_USAGE, "option -n should specify a "
152 				    "positive decimal integer.");
153 			break;
154 		case 'o':
155 			show_loc = 1;
156 			radix = RADIX_OCTAL;
157 			break;
158 		case 't':
159 			show_loc = 1;
160 			if (*optarg == 'd')
161 				radix = RADIX_DECIMAL;
162 			else if (*optarg == 'o')
163 				radix = RADIX_OCTAL;
164 			else if (*optarg == 'x')
165 				radix = RADIX_HEX;
166 			else
167 				usage();
168 			        /* NOTREACHED */
169 			break;
170 		case 'v':
171 		case 'V':
172 			show_version();
173 			/* NOTREACHED */
174 		case '0':
175 	        case '1':
176 		case '2':
177 		case '3':
178 		case '4':
179 		case '5':
180 		case '6':
181 		case '7':
182 		case '8':
183 		case '9':
184 			min_len *= 10;
185 			min_len += ch - '0';
186 			break;
187 		case 'h':
188 		case '?':
189 		default:
190 			usage();
191 			/* NOTREACHED */
192 		}
193 	}
194 	argc -= optind;
195 	argv += optind;
196 
197 	cap_rights_init(&rights, CAP_READ, CAP_SEEK, CAP_FSTAT, CAP_FCNTL, CAP_MMAP_R);
198 	fa = fileargs_init(argc, argv, O_RDONLY, 0, &rights, FA_OPEN);
199 	if (fa == NULL)
200 		err(1, "Unable to initialize casper fileargs");
201 
202 	caph_cache_catpages();
203 	if (caph_limit_stdio() < 0 || caph_enter_casper() < 0) {
204 		fileargs_free(fa);
205 		err(1, "Unable to enter capability mode");
206 	}
207 
208 	if (min_len == 0)
209 		min_len = 4;
210 	if (*argv == NULL)
211 		rc = find_strings("{standard input}", stdin, 0, 0);
212 	else while (*argv != NULL) {
213 		if (handle_file(fa, *argv) != 0)
214 			rc = 1;
215 		argv++;
216 	}
217 
218 	fileargs_free(fa);
219 
220 	return (rc);
221 }
222 
223 int
224 handle_file(fileargs_t *fa, const char *name)
225 {
226 	FILE *pfile;
227 	int rt;
228 
229 	if (name == NULL)
230 		return (1);
231 	pfile = fileargs_fopen(fa, name, "rb");
232 	if (pfile == NULL) {
233 		warnx("'%s': %s", name, strerror(errno));
234 		return (1);
235 	}
236 
237 	rt = handle_elf(name, pfile);
238 	fclose(pfile);
239 	return (rt);
240 }
241 
242 /*
243  * Files not understood by handle_elf, will be passed off here and will
244  * treated as a binary file. This would include text file, core dumps ...
245  */
246 int
247 handle_binary(const char *name, FILE *pfile, size_t size)
248 {
249 
250 	(void)fseeko(pfile, 0, SEEK_SET);
251 	return (find_strings(name, pfile, 0, size));
252 }
253 
254 /*
255  * Will analyse a file to see if it ELF, other files including ar(1),
256  * core dumps are passed off and treated as flat binary files. Unlike
257  * GNU size in FreeBSD this routine will not treat ELF object from
258  * different archs as flat binary files(has to overridden using -a).
259  */
260 int
261 handle_elf(const char *name, FILE *pfile)
262 {
263 	struct stat buf;
264 	GElf_Ehdr elfhdr;
265 	GElf_Shdr shdr;
266 	Elf *elf;
267 	Elf_Scn *scn;
268 	int rc, fd;
269 
270 	rc = 0;
271 	fd = fileno(pfile);
272 	if (fstat(fd, &buf) < 0)
273 		return (1);
274 
275 	/* If entire file is chosen, treat it as a binary file */
276 	if (entire_file)
277 		return (handle_binary(name, pfile, buf.st_size));
278 
279 	(void)lseek(fd, 0, SEEK_SET);
280 	elf = elf_begin(fd, ELF_C_READ, NULL);
281 	if (elf_kind(elf) != ELF_K_ELF) {
282 		(void)elf_end(elf);
283 		return (handle_binary(name, pfile, buf.st_size));
284 	}
285 
286 	if (gelf_getehdr(elf, &elfhdr) == NULL) {
287 		(void)elf_end(elf);
288 		warnx("%s: ELF file could not be processed", name);
289 		return (1);
290 	}
291 
292 	if (elfhdr.e_shnum == 0 && elfhdr.e_type == ET_CORE) {
293 		(void)elf_end(elf);
294 		return (handle_binary(name, pfile, buf.st_size));
295 	} else {
296 		scn = NULL;
297 		while ((scn = elf_nextscn(elf, scn)) != NULL) {
298 			if (gelf_getshdr(scn, &shdr) == NULL)
299 				continue;
300 			if (shdr.sh_type != SHT_NOBITS &&
301 			    (shdr.sh_flags & SHF_ALLOC) != 0) {
302 				rc = find_strings(name, pfile, shdr.sh_offset,
303 				    shdr.sh_size);
304 			}
305 		}
306 	}
307 	(void)elf_end(elf);
308 	return (rc);
309 }
310 
311 /*
312  * Retrieves a character from input stream based on the encoding
313  * type requested.
314  */
315 int
316 getcharacter(FILE *pfile, long *rt)
317 {
318 	int i, c;
319 	char buf[4];
320 
321 	for(i = 0; i < encoding_size; i++) {
322 		c = getc(pfile);
323 		if (c == EOF)
324 			return (-1);
325 		buf[i] = c;
326 	}
327 
328 	switch (encoding) {
329 	case ENCODING_7BIT:
330 	case ENCODING_8BIT:
331 		*rt = buf[0];
332 		break;
333 	case ENCODING_16BIT_BIG:
334 		*rt = (buf[0] << 8) | buf[1];
335 		break;
336 	case ENCODING_16BIT_LITTLE:
337 		*rt = buf[0] | (buf[1] << 8);
338 		break;
339 	case ENCODING_32BIT_BIG:
340 		*rt = ((long) buf[0] << 24) | ((long) buf[1] << 16) |
341 		    ((long) buf[2] << 8) | buf[3];
342 		break;
343 	case ENCODING_32BIT_LITTLE:
344 		*rt = buf[0] | ((long) buf[1] << 8) | ((long) buf[2] << 16) |
345 		    ((long) buf[3] << 24);
346 		break;
347 	default:
348 		return (-1);
349 	}
350 
351 	return (0);
352 }
353 
354 /*
355  * Input stream is read until the end of file is reached or until
356  * the section size is reached in case of ELF files. Contiguous
357  * characters of >= min_size(default 4) will be displayed.
358  */
359 int
360 find_strings(const char *name, FILE *pfile, off_t offset, off_t size)
361 {
362 	off_t cur_off, start_off;
363 	char *obuf;
364 	long c;
365 	int i;
366 
367 	if ((obuf = (char*)calloc(1, min_len + 1)) == NULL) {
368 		fprintf(stderr, "Unable to allocate memory: %s\n",
369 		    strerror(errno));
370 		return (1);
371 	}
372 
373 	(void)fseeko(pfile, offset, SEEK_SET);
374 	cur_off = offset;
375 	start_off = 0;
376 	for (;;) {
377 		if ((offset + size) && (cur_off >= offset + size))
378 			break;
379 		start_off = cur_off;
380 		memset(obuf, 0, min_len + 1);
381 		for(i = 0; i < min_len; i++) {
382 			if (getcharacter(pfile, &c) < 0)
383 				goto _exit1;
384 			if (PRINTABLE(c)) {
385 				obuf[i] = c;
386 				obuf[i + 1] = 0;
387 				cur_off += encoding_size;
388 			} else {
389 				if (encoding == ENCODING_8BIT &&
390 				    (uint8_t)c > 127) {
391 					obuf[i] = c;
392 					obuf[i + 1] = 0;
393 					cur_off += encoding_size;
394 					continue;
395 				}
396 				cur_off += encoding_size;
397 				break;
398 			}
399 		}
400 
401 		if (i >= min_len && ((cur_off <= offset + size) ||
402 		    !(offset + size))) {
403 			if (show_filename)
404 				printf("%s: ", name);
405 			if (show_loc) {
406 				switch (radix) {
407 				case RADIX_DECIMAL:
408 					printf("%7ju ", (uintmax_t)start_off);
409 					break;
410 				case RADIX_HEX:
411 					printf("%7jx ", (uintmax_t)start_off);
412 					break;
413 				case RADIX_OCTAL:
414 					printf("%7jo ", (uintmax_t)start_off);
415 					break;
416 				}
417 			}
418 			printf("%s", obuf);
419 
420 			for (;;) {
421 				if ((offset + size) &&
422 				    (cur_off >= offset + size))
423 					break;
424 				if (getcharacter(pfile, &c) < 0)
425 					break;
426 				cur_off += encoding_size;
427 				if (encoding == ENCODING_8BIT &&
428 				    (uint8_t)c > 127) {
429 					putchar(c);
430 					continue;
431 				}
432 				if (!PRINTABLE(c))
433 					break;
434 				putchar(c);
435 			}
436 			putchar('\n');
437 		}
438 	}
439 _exit1:
440 	free(obuf);
441 	return (0);
442 }
443 
444 #define	USAGE_MESSAGE	"\
445 Usage: %s [options] [file...]\n\
446   Print contiguous sequences of printable characters.\n\n\
447   Options:\n\
448   -a     | --all               Scan the entire file for strings.\n\
449   -e ENC | --encoding=ENC      Select the character encoding to use.\n\
450   -f     | --print-file-name   Print the file name before each string.\n\
451   -h     | --help              Print a help message and exit.\n\
452   -n N   | --bytes=N | -N      Print sequences with 'N' or more characters.\n\
453   -o                           Print offsets in octal.\n\
454   -t R   | --radix=R           Print offsets using the radix named by 'R'.\n\
455   -v     | --version           Print a version identifier and exit.\n"
456 
457 void
458 usage(void)
459 {
460 
461 	fprintf(stderr, USAGE_MESSAGE, ELFTC_GETPROGNAME());
462 	exit(EXIT_FAILURE);
463 }
464 
465 void
466 show_version(void)
467 {
468 
469         printf("%s (%s)\n", ELFTC_GETPROGNAME(), elftc_version());
470         exit(EXIT_SUCCESS);
471 }
472