1 /*-
2  * Copyright (c) 2007 S.Sam Arun Raj
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/stat.h>
28 #include <sys/types.h>
29 
30 #include <ctype.h>
31 #include <err.h>
32 #include <errno.h>
33 #include <fcntl.h>
34 #include <getopt.h>
35 #include <inttypes.h>
36 #include <stdint.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <sysexits.h>
41 #include <unistd.h>
42 
43 #include <libelf.h>
44 #include <libelftc.h>
45 #include <gelf.h>
46 
47 #include "_elftc.h"
48 
49 ELFTC_VCSID("$Id: strings.c 3498 2016-10-26 19:25:13Z emaste $");
50 
51 enum radix_style {
52 	RADIX_DECIMAL,
53 	RADIX_HEX,
54 	RADIX_OCTAL
55 };
56 
57 enum encoding_style {
58 	ENCODING_7BIT,
59 	ENCODING_8BIT,
60 	ENCODING_16BIT_BIG,
61 	ENCODING_16BIT_LITTLE,
62 	ENCODING_32BIT_BIG,
63 	ENCODING_32BIT_LITTLE
64 };
65 
66 #define PRINTABLE(c)						\
67       ((c) >= 0 && (c) <= 255 && 				\
68 	  ((c) == '\t' || isprint((c)) ||			\
69 	      (encoding == ENCODING_8BIT && (c) > 127)))
70 
71 static int encoding_size, entire_file, show_filename, show_loc;
72 static enum encoding_style encoding;
73 static enum radix_style radix;
74 static intmax_t min_len;
75 
76 static struct option strings_longopts[] = {
77 	{ "all",		no_argument,		NULL,	'a'},
78 	{ "bytes",		required_argument,	NULL,	'n'},
79 	{ "encoding",		required_argument,	NULL,	'e'},
80 	{ "help",		no_argument,		NULL,	'h'},
81 	{ "print-file-name",	no_argument,		NULL,	'f'},
82 	{ "radix",		required_argument,	NULL,	't'},
83 	{ "version",		no_argument,		NULL,	'v'},
84 	{ NULL, 0, NULL, 0 }
85 };
86 
87 long	getcharacter(void);
88 int	handle_file(const char *);
89 int	handle_elf(const char *, int);
90 int	handle_binary(const char *, int);
91 int	find_strings(const char *, off_t, off_t);
92 void	show_version(void);
93 void	usage(void);
94 
95 /*
96  * strings(1) extracts text(contiguous printable characters)
97  * from elf and binary files.
98  */
99 int
100 main(int argc, char **argv)
101 {
102 	int ch, rc;
103 
104 	rc = 0;
105 	min_len = 0;
106 	encoding_size = 1;
107 	if (elf_version(EV_CURRENT) == EV_NONE)
108 		errx(EXIT_FAILURE, "ELF library initialization failed: %s",
109 		    elf_errmsg(-1));
110 
111 	while ((ch = getopt_long(argc, argv, "1234567890ae:fhn:ot:Vv",
112 	    strings_longopts, NULL)) != -1)
113 		switch((char)ch) {
114 		case 'a':
115 			entire_file = 1;
116 			break;
117 		case 'e':
118 			if (*optarg == 's') {
119 				encoding = ENCODING_7BIT;
120 			} else if (*optarg == 'S') {
121 				encoding = ENCODING_8BIT;
122 			} else if (*optarg == 'b') {
123 				encoding = ENCODING_16BIT_BIG;
124 				encoding_size = 2;
125 			} else if (*optarg == 'B') {
126 				encoding = ENCODING_32BIT_BIG;
127 				encoding_size = 4;
128 			} else if (*optarg == 'l') {
129 				encoding = ENCODING_16BIT_LITTLE;
130 				encoding_size = 2;
131 			} else if (*optarg == 'L') {
132 				encoding = ENCODING_32BIT_LITTLE;
133 				encoding_size = 4;
134 			} else
135 				usage();
136 			        /* NOTREACHED */
137 			break;
138 		case 'f':
139 			show_filename = 1;
140 			break;
141 		case 'n':
142 			min_len = strtoimax(optarg, (char**)NULL, 10);
143 			if (min_len <= 0)
144 				errx(EX_USAGE, "option -n should specify a "
145 				    "positive decimal integer.");
146 			break;
147 		case 'o':
148 			show_loc = 1;
149 			radix = RADIX_OCTAL;
150 			break;
151 		case 't':
152 			show_loc = 1;
153 			if (*optarg == 'd')
154 				radix = RADIX_DECIMAL;
155 			else if (*optarg == 'o')
156 				radix = RADIX_OCTAL;
157 			else if (*optarg == 'x')
158 				radix = RADIX_HEX;
159 			else
160 				usage();
161 			        /* NOTREACHED */
162 			break;
163 		case 'v':
164 		case 'V':
165 			show_version();
166 			/* NOTREACHED */
167 		case '0':
168 	        case '1':
169 		case '2':
170 		case '3':
171 		case '4':
172 		case '5':
173 		case '6':
174 		case '7':
175 		case '8':
176 		case '9':
177 			min_len *= 10;
178 			min_len += ch - '0';
179 			break;
180 		case 'h':
181 		case '?':
182 		default:
183 			usage();
184 			/* NOTREACHED */
185 		}
186 	argc -= optind;
187 	argv += optind;
188 
189 	if (!min_len)
190 		min_len = 4;
191 	if (!*argv)
192 		rc = find_strings("{standard input}", 0, 0);
193 	else while (*argv) {
194 		if (handle_file(*argv) != 0)
195 			rc = 1;
196 		argv++;
197 	}
198 	return (rc);
199 }
200 
201 int
202 handle_file(const char *name)
203 {
204 	int fd, rt;
205 
206 	if (name == NULL)
207 		return (1);
208 	if (freopen(name, "rb", stdin) == NULL) {
209 		warnx("'%s': %s", name, strerror(errno));
210 		return (1);
211 	}
212 
213 	fd = fileno(stdin);
214 	if (fd < 0)
215 		return (1);
216 	rt = handle_elf(name, fd);
217 	return (rt);
218 }
219 
220 /*
221  * Files not understood by handle_elf, will be passed off here and will
222  * treated as a binary file. This would include text file, core dumps ...
223  */
224 int
225 handle_binary(const char *name, int fd)
226 {
227 	struct stat buf;
228 
229 	memset(&buf, 0, sizeof(struct stat));
230 	(void) lseek(fd, (off_t)0, SEEK_SET);
231 	if (!fstat(fd, &buf))
232 		return (find_strings(name, (off_t)0, buf.st_size));
233 	return (1);
234 }
235 
236 /*
237  * Will analyse a file to see if it ELF, other files including ar(1),
238  * core dumps are passed off and treated as flat binary files. Unlike
239  * GNU size in FreeBSD this routine will not treat ELF object from
240  * different archs as flat binary files(has to overridden using -a).
241  */
242 int
243 handle_elf(const char *name, int fd)
244 {
245 	GElf_Ehdr elfhdr;
246 	GElf_Shdr shdr;
247 	Elf *elf;
248 	Elf_Scn *scn;
249 	int rc;
250 
251 	rc = 0;
252 	/* If entire file is chosen, treat it as a binary file */
253 	if (entire_file)
254 		return (handle_binary(name, fd));
255 
256 	(void) lseek(fd, (off_t)0, SEEK_SET);
257 	elf = elf_begin(fd, ELF_C_READ, NULL);
258 	if (elf_kind(elf) != ELF_K_ELF) {
259 		(void) elf_end(elf);
260 		return (handle_binary(name, fd));
261 	}
262 
263 	if (gelf_getehdr(elf, &elfhdr) == NULL) {
264 		(void) elf_end(elf);
265 		warnx("%s: ELF file could not be processed", name);
266 		return (1);
267 	}
268 
269 	if (elfhdr.e_shnum == 0 && elfhdr.e_type == ET_CORE) {
270 		(void) elf_end(elf);
271 		return (handle_binary(name, fd));
272 	} else {
273 		scn = NULL;
274 		while ((scn = elf_nextscn(elf, scn)) != NULL) {
275 			if (gelf_getshdr(scn, &shdr) == NULL)
276 				continue;
277 			if (shdr.sh_type != SHT_NOBITS &&
278 			    (shdr.sh_flags & SHF_ALLOC) != 0) {
279 				rc = find_strings(name, shdr.sh_offset,
280 				    shdr.sh_size);
281 			}
282 		}
283 	}
284 	(void) elf_end(elf);
285 	return (rc);
286 }
287 
288 /*
289  * Retrieves a character from input stream based on the encoding
290  * type requested.
291  */
292 long
293 getcharacter(void)
294 {
295 	long rt;
296 	int i;
297 	char buf[4], c;
298 
299 	rt = EOF;
300 	for(i = 0; i < encoding_size; i++) {
301 		c = getc(stdin);
302 		if (feof(stdin))
303 			return (EOF);
304 		buf[i] = c;
305 	}
306 
307 	switch(encoding) {
308 	case ENCODING_7BIT:
309 	case ENCODING_8BIT:
310 		rt = buf[0];
311 		break;
312 	case ENCODING_16BIT_BIG:
313 		rt = (buf[0] << 8) | buf[1];
314 		break;
315 	case ENCODING_16BIT_LITTLE:
316 		 rt = buf[0] | (buf[1] << 8);
317 		 break;
318 	case ENCODING_32BIT_BIG:
319 		rt = ((long) buf[0] << 24) | ((long) buf[1] << 16) |
320            	    ((long) buf[2] << 8) | buf[3];
321            	break;
322 	case ENCODING_32BIT_LITTLE:
323 		rt = buf[0] | ((long) buf[1] << 8) | ((long) buf[2] << 16) |
324         	    ((long) buf[3] << 24);
325            	break;
326 	}
327 	return (rt);
328 }
329 
330 /*
331  * Input stream stdin is read until the end of file is reached or until
332  * the section size is reached in case of ELF files. Contiguous
333  * characters of >= min_size(default 4) will be displayed.
334  */
335 int
336 find_strings(const char *name, off_t offset, off_t size)
337 {
338 	off_t cur_off, start_off;
339 	char *obuf;
340 	long c;
341 	int i;
342 
343 	if ((obuf = (char*)calloc(1, min_len + 1)) == NULL) {
344 		(void) fprintf(stderr, "Unable to allocate memory: %s\n",
345 		     strerror(errno));
346 		return (1);
347 	}
348 
349 	(void) fseeko(stdin, offset, SEEK_SET);
350 	cur_off = offset;
351 	start_off = 0;
352 	while(1) {
353 		if ((offset + size) && (cur_off >= offset + size))
354 			break;
355 		start_off = cur_off;
356 		memset(obuf, 0, min_len+1);
357 		for(i = 0; i < min_len; i++) {
358 			c = getcharacter();
359 			if (c == EOF && feof(stdin))
360 				goto _exit1;
361 		 	if (PRINTABLE(c)) {
362 		 		obuf[i] = c;
363 		 		obuf[i+1] = 0;
364 		 		cur_off += encoding_size;
365 		 	} else {
366 				if (encoding == ENCODING_8BIT &&
367 				    (uint8_t)c > 127) {
368 			 		obuf[i] = c;
369 			 		obuf[i+1] = 0;
370 			 		cur_off += encoding_size;
371 			 		continue;
372 			 	}
373 	 			cur_off += encoding_size;
374 	 			break;
375 		 	}
376 		}
377 
378 		if (i >= min_len && ((cur_off <= offset + size) ||
379 		    !(offset + size))) {
380 			if (show_filename)
381 				printf ("%s: ", name);
382 			if (show_loc) {
383 				switch(radix) {
384 				case RADIX_DECIMAL:
385 					(void) printf("%7ju ",
386 					    (uintmax_t)start_off);
387 					break;
388 				case RADIX_HEX:
389 					(void) printf("%7jx ",
390 					    (uintmax_t)start_off);
391 					break;
392 				case RADIX_OCTAL:
393 					(void) printf("%7jo ",
394 					    (uintmax_t)start_off);
395 					break;
396 				}
397 			}
398 			printf("%s", obuf);
399 
400 			while(1) {
401 				if ((offset + size) &&
402 				    (cur_off >= offset + size))
403 					break;
404 				c = getcharacter();
405 				cur_off += encoding_size;
406 				if (encoding == ENCODING_8BIT &&
407 				    (uint8_t)c > 127) {
408 			 		putchar(c);
409 			 		continue;
410 			 	}
411 				if (!PRINTABLE(c) || c == EOF)
412 					break;
413 				putchar(c);
414 			}
415 			putchar('\n');
416 		}
417 	}
418 _exit1:
419 	free(obuf);
420 	return (0);
421 }
422 
423 #define	USAGE_MESSAGE	"\
424 Usage: %s [options] [file...]\n\
425   Print contiguous sequences of printable characters.\n\n\
426   Options:\n\
427   -a     | --all               Scan the entire file for strings.\n\
428   -e ENC | --encoding=ENC      Select the character encoding to use.\n\
429   -f     | --print-file-name   Print the file name before each string.\n\
430   -h     | --help              Print a help message and exit.\n\
431   -n N   | --bytes=N | -N      Print sequences with 'N' or more characters.\n\
432   -o                           Print offsets in octal.\n\
433   -t R   | --radix=R           Print offsets using the radix named by 'R'.\n\
434   -v     | --version           Print a version identifier and exit.\n"
435 
436 void
437 usage(void)
438 {
439 	(void) fprintf(stderr, USAGE_MESSAGE, ELFTC_GETPROGNAME());
440 	exit(EXIT_FAILURE);
441 }
442 
443 void
444 show_version(void)
445 {
446         (void) printf("%s (%s)\n", ELFTC_GETPROGNAME(), elftc_version());
447         exit(EXIT_SUCCESS);
448 }
449