1 /*-
2 * Copyright (c) 2007 S.Sam Arun Raj
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27 #include <sys/stat.h>
28 #include <sys/types.h>
29
30 #include <ctype.h>
31 #include <err.h>
32 #include <errno.h>
33 #include <fcntl.h>
34 #include <getopt.h>
35 #include <inttypes.h>
36 #include <stdint.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <sysexits.h>
41 #include <unistd.h>
42
43 #include <libelf.h>
44 #include <libelftc.h>
45 #include <gelf.h>
46
47 #include "_elftc.h"
48
49 ELFTC_VCSID("$Id: strings.c 3360 2016-01-24 18:34:06Z jkoshy $");
50
51 enum return_code {
52 RETURN_OK,
53 RETURN_NOINPUT,
54 RETURN_SOFTWARE
55 };
56
57 enum radix_style {
58 RADIX_DECIMAL,
59 RADIX_HEX,
60 RADIX_OCTAL
61 };
62
63 enum encoding_style {
64 ENCODING_7BIT,
65 ENCODING_8BIT,
66 ENCODING_16BIT_BIG,
67 ENCODING_16BIT_LITTLE,
68 ENCODING_32BIT_BIG,
69 ENCODING_32BIT_LITTLE
70 };
71
72 #define PRINTABLE(c) \
73 ((c) >= 0 && (c) <= 255 && \
74 ((c) == '\t' || isprint((c)) || \
75 (encoding == ENCODING_8BIT && (c) > 127)))
76
77 static int encoding_size, entire_file, show_filename, show_loc;
78 static enum encoding_style encoding;
79 static enum radix_style radix;
80 static intmax_t min_len;
81
82 static struct option strings_longopts[] = {
83 { "all", no_argument, NULL, 'a'},
84 { "bytes", required_argument, NULL, 'n'},
85 { "encoding", required_argument, NULL, 'e'},
86 { "help", no_argument, NULL, 'h'},
87 { "print-file-name", no_argument, NULL, 'f'},
88 { "radix", required_argument, NULL, 't'},
89 { "version", no_argument, NULL, 'v'},
90 { NULL, 0, NULL, 0 }
91 };
92
93 long getcharacter(void);
94 int handle_file(const char *);
95 int handle_elf(const char *, int);
96 int handle_binary(const char *, int);
97 int find_strings(const char *, off_t, off_t);
98 void show_version(void);
99 void usage(void);
100
101 /*
102 * strings(1) extracts text(contiguous printable characters)
103 * from elf and binary files.
104 */
105 int
main(int argc,char ** argv)106 main(int argc, char **argv)
107 {
108 int ch, rc;
109
110 rc = RETURN_OK;
111 min_len = 0;
112 encoding_size = 1;
113 if (elf_version(EV_CURRENT) == EV_NONE)
114 errx(EXIT_FAILURE, "ELF library initialization failed: %s",
115 elf_errmsg(-1));
116
117 while ((ch = getopt_long(argc, argv, "1234567890ae:fhn:ot:Vv",
118 strings_longopts, NULL)) != -1)
119 switch((char)ch) {
120 case 'a':
121 entire_file = 1;
122 break;
123 case 'e':
124 if (*optarg == 's') {
125 encoding = ENCODING_7BIT;
126 } else if (*optarg == 'S') {
127 encoding = ENCODING_8BIT;
128 } else if (*optarg == 'b') {
129 encoding = ENCODING_16BIT_BIG;
130 encoding_size = 2;
131 } else if (*optarg == 'B') {
132 encoding = ENCODING_32BIT_BIG;
133 encoding_size = 4;
134 } else if (*optarg == 'l') {
135 encoding = ENCODING_16BIT_LITTLE;
136 encoding_size = 2;
137 } else if (*optarg == 'L') {
138 encoding = ENCODING_32BIT_LITTLE;
139 encoding_size = 4;
140 } else
141 usage();
142 /* NOTREACHED */
143 break;
144 case 'f':
145 show_filename = 1;
146 break;
147 case 'n':
148 min_len = strtoimax(optarg, (char**)NULL, 10);
149 if (min_len <= 0)
150 errx(EX_USAGE, "option -n should specify a "
151 "positive decimal integer.");
152 break;
153 case 'o':
154 show_loc = 1;
155 radix = RADIX_OCTAL;
156 break;
157 case 't':
158 show_loc = 1;
159 if (*optarg == 'd')
160 radix = RADIX_DECIMAL;
161 else if (*optarg == 'o')
162 radix = RADIX_OCTAL;
163 else if (*optarg == 'x')
164 radix = RADIX_HEX;
165 else
166 usage();
167 /* NOTREACHED */
168 break;
169 case 'v':
170 case 'V':
171 show_version();
172 /* NOTREACHED */
173 case '0':
174 case '1':
175 case '2':
176 case '3':
177 case '4':
178 case '5':
179 case '6':
180 case '7':
181 case '8':
182 case '9':
183 min_len *= 10;
184 min_len += ch - '0';
185 break;
186 case 'h':
187 case '?':
188 default:
189 usage();
190 /* NOTREACHED */
191 }
192 argc -= optind;
193 argv += optind;
194
195 if (!min_len)
196 min_len = 4;
197 if (!*argv)
198 rc = handle_file("{standard input}");
199 else while (*argv) {
200 rc = handle_file(*argv);
201 argv++;
202 }
203 return (rc);
204 }
205
206 int
handle_file(const char * name)207 handle_file(const char *name)
208 {
209 int fd, rt;
210
211 if (name == NULL)
212 return (RETURN_NOINPUT);
213 if (strcmp("{standard input}", name) != 0) {
214 if (freopen(name, "rb", stdin) == NULL) {
215 warnx("'%s': %s", name, strerror(errno));
216 return (RETURN_NOINPUT);
217 }
218 } else {
219 return (find_strings(name, (off_t)0, (off_t)0));
220 }
221
222 fd = fileno(stdin);
223 if (fd < 0)
224 return (RETURN_NOINPUT);
225 rt = handle_elf(name, fd);
226 return (rt);
227 }
228
229 /*
230 * Files not understood by handle_elf, will be passed off here and will
231 * treated as a binary file. This would include text file, core dumps ...
232 */
233 int
handle_binary(const char * name,int fd)234 handle_binary(const char *name, int fd)
235 {
236 struct stat buf;
237
238 memset(&buf, 0, sizeof(struct stat));
239 (void) lseek(fd, (off_t)0, SEEK_SET);
240 if (!fstat(fd, &buf))
241 return (find_strings(name, (off_t)0, buf.st_size));
242 return (RETURN_SOFTWARE);
243 }
244
245 /*
246 * Will analyse a file to see if it ELF, other files including ar(1),
247 * core dumps are passed off and treated as flat binary files. Unlike
248 * GNU size in FreeBSD this routine will not treat ELF object from
249 * different archs as flat binary files(has to overridden using -a).
250 */
251 int
handle_elf(const char * name,int fd)252 handle_elf(const char *name, int fd)
253 {
254 GElf_Ehdr elfhdr;
255 GElf_Shdr shdr;
256 Elf *elf;
257 Elf_Scn *scn;
258 int rc;
259
260 rc = RETURN_OK;
261 /* If entire file is choosen, treat it as a binary file */
262 if (entire_file)
263 return (handle_binary(name, fd));
264
265 (void) lseek(fd, (off_t)0, SEEK_SET);
266 elf = elf_begin(fd, ELF_C_READ, NULL);
267 if (elf_kind(elf) != ELF_K_ELF) {
268 (void) elf_end(elf);
269 return (handle_binary(name, fd));
270 }
271
272 if (gelf_getehdr(elf, &elfhdr) == NULL) {
273 (void) elf_end(elf);
274 warnx("%s: ELF file could not be processed", name);
275 return (RETURN_SOFTWARE);
276 }
277
278 if (elfhdr.e_shnum == 0 && elfhdr.e_type == ET_CORE) {
279 (void) elf_end(elf);
280 return (handle_binary(name, fd));
281 } else {
282 scn = NULL;
283 while ((scn = elf_nextscn(elf, scn)) != NULL) {
284 if (gelf_getshdr(scn, &shdr) == NULL)
285 continue;
286 if (shdr.sh_type != SHT_NOBITS &&
287 (shdr.sh_flags & SHF_ALLOC) != 0) {
288 rc = find_strings(name, shdr.sh_offset,
289 shdr.sh_size);
290 }
291 }
292 }
293 (void) elf_end(elf);
294 return (rc);
295 }
296
297 /*
298 * Retrieves a character from input stream based on the encoding
299 * type requested.
300 */
301 long
getcharacter(void)302 getcharacter(void)
303 {
304 long rt;
305 int i;
306 char buf[4], c;
307
308 rt = EOF;
309 for(i = 0; i < encoding_size; i++) {
310 c = getc(stdin);
311 if (feof(stdin))
312 return (EOF);
313 buf[i] = c;
314 }
315
316 switch(encoding) {
317 case ENCODING_7BIT:
318 case ENCODING_8BIT:
319 rt = buf[0];
320 break;
321 case ENCODING_16BIT_BIG:
322 rt = (buf[0] << 8) | buf[1];
323 break;
324 case ENCODING_16BIT_LITTLE:
325 rt = buf[0] | (buf[1] << 8);
326 break;
327 case ENCODING_32BIT_BIG:
328 rt = ((long) buf[0] << 24) | ((long) buf[1] << 16) |
329 ((long) buf[2] << 8) | buf[3];
330 break;
331 case ENCODING_32BIT_LITTLE:
332 rt = buf[0] | ((long) buf[1] << 8) | ((long) buf[2] << 16) |
333 ((long) buf[3] << 24);
334 break;
335 }
336 return (rt);
337 }
338
339 /*
340 * Input stream stdin is read until the end of file is reached or until
341 * the section size is reached in case of ELF files. Contiguous
342 * characters of >= min_size(default 4) will be displayed.
343 */
344 int
find_strings(const char * name,off_t offset,off_t size)345 find_strings(const char *name, off_t offset, off_t size)
346 {
347 off_t cur_off, start_off;
348 char *obuf;
349 long c;
350 int i;
351
352 if ((obuf = (char*)calloc(1, min_len + 1)) == NULL) {
353 (void) fprintf(stderr, "Unable to allocate memory: %s\n",
354 strerror(errno));
355 return (RETURN_SOFTWARE);
356 }
357
358 (void) fseeko(stdin, offset, SEEK_SET);
359 cur_off = offset;
360 start_off = 0;
361 while(1) {
362 if ((offset + size) && (cur_off >= offset + size))
363 break;
364 start_off = cur_off;
365 memset(obuf, 0, min_len+1);
366 for(i = 0; i < min_len; i++) {
367 c = getcharacter();
368 if (c == EOF && feof(stdin))
369 goto _exit1;
370 if (PRINTABLE(c)) {
371 obuf[i] = c;
372 obuf[i+1] = 0;
373 cur_off += encoding_size;
374 } else {
375 if (encoding == ENCODING_8BIT &&
376 (uint8_t)c > 127) {
377 obuf[i] = c;
378 obuf[i+1] = 0;
379 cur_off += encoding_size;
380 continue;
381 }
382 cur_off += encoding_size;
383 break;
384 }
385 }
386
387 if (i >= min_len && ((cur_off <= offset + size) ||
388 !(offset + size))) {
389 if (show_filename)
390 printf ("%s: ", name);
391 if (show_loc) {
392 switch(radix) {
393 case RADIX_DECIMAL:
394 (void) printf("%7ju ",
395 (uintmax_t)start_off);
396 break;
397 case RADIX_HEX:
398 (void) printf("%7jx ",
399 (uintmax_t)start_off);
400 break;
401 case RADIX_OCTAL:
402 (void) printf("%7jo ",
403 (uintmax_t)start_off);
404 break;
405 }
406 }
407 printf("%s", obuf);
408
409 while(1) {
410 if ((offset + size) &&
411 (cur_off >= offset + size))
412 break;
413 c = getcharacter();
414 cur_off += encoding_size;
415 if (encoding == ENCODING_8BIT &&
416 (uint8_t)c > 127) {
417 putchar(c);
418 continue;
419 }
420 if (!PRINTABLE(c) || c == EOF)
421 break;
422 putchar(c);
423 }
424 putchar('\n');
425 }
426 }
427 _exit1:
428 free(obuf);
429 return (RETURN_OK);
430 }
431
432 #define USAGE_MESSAGE "\
433 Usage: %s [options] [file...]\n\
434 Print contiguous sequences of printable characters.\n\n\
435 Options:\n\
436 -a | --all Scan the entire file for strings.\n\
437 -e ENC | --encoding=ENC Select the character encoding to use.\n\
438 -f | --print-file-name Print the file name before each string.\n\
439 -h | --help Print a help message and exit.\n\
440 -n N | --bytes=N | -N Print sequences with 'N' or more characters.\n\
441 -o Print offsets in octal.\n\
442 -t R | --radix=R Print offsets using the radix named by 'R'.\n\
443 -v | --version Print a version identifier and exit.\n"
444
445 void
usage(void)446 usage(void)
447 {
448 (void) fprintf(stderr, USAGE_MESSAGE, ELFTC_GETPROGNAME());
449 exit(EXIT_FAILURE);
450 }
451
452 void
show_version(void)453 show_version(void)
454 {
455 (void) printf("%s (%s)\n", ELFTC_GETPROGNAME(), elftc_version());
456 exit(EXIT_SUCCESS);
457 }
458