1 /* strings -- print the strings of printable characters in files
2    Copyright (C) 1993-2020 Free Software Foundation, Inc.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; either version 3, or (at your option)
7    any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program; if not, write to the Free Software
16    Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
17    02110-1301, USA.  */
18 
19 /* Usage: strings [options] file...
20 
21    Options:
22    --all
23    -a
24    -		Scan each file in its entirety.
25 
26    --data
27    -d		Scan only the initialized data section(s) of object files.
28 
29    --print-file-name
30    -f		Print the name of the file before each string.
31 
32    --bytes=min-len
33    -n min-len
34    -min-len	Print graphic char sequences, MIN-LEN or more bytes long,
35 		that are followed by a NUL or a newline.  Default is 4.
36 
37    --radix={o,x,d}
38    -t {o,x,d}	Print the offset within the file before each string,
39 		in octal/hex/decimal.
40 
41   --include-all-whitespace
42   -w		By default tab and space are the only whitepace included in graphic
43 		char sequences.  This option considers all of isspace() valid.
44 
45    -o		Like -to.  (Some other implementations have -o like -to,
46 		others like -td.  We chose one arbitrarily.)
47 
48    --encoding={s,S,b,l,B,L}
49    -e {s,S,b,l,B,L}
50 		Select character encoding: 7-bit-character, 8-bit-character,
51 		bigendian 16-bit, littleendian 16-bit, bigendian 32-bit,
52 		littleendian 32-bit.
53 
54    --target=BFDNAME
55    -T {bfdname}
56 		Specify a non-default object file format.
57 
58   --output-separator=sep_string
59   -s sep_string	String used to separate parsed strings in output.
60 		Default is newline.
61 
62    --help
63    -h		Print the usage message on the standard output.
64 
65    --version
66    -V
67    -v		Print the program version number.
68 
69    Written by Richard Stallman <rms@gnu.ai.mit.edu>
70    and David MacKenzie <djm@gnu.ai.mit.edu>.  */
71 
72 #include "sysdep.h"
73 #include "bfd.h"
74 #include "getopt.h"
75 #include "libiberty.h"
76 #include "safe-ctype.h"
77 #include "bucomm.h"
78 
79 #define STRING_ISGRAPHIC(c) \
80       (   (c) >= 0 \
81        && (c) <= 255 \
82        && ((c) == '\t' || ISPRINT (c) || (encoding == 'S' && (c) > 127) \
83 	   || (include_all_whitespace && ISSPACE (c))) \
84       )
85 
86 #ifndef errno
87 extern int errno;
88 #endif
89 
90 /* The BFD section flags that identify an initialized data section.  */
91 #define DATA_FLAGS (SEC_ALLOC | SEC_LOAD | SEC_HAS_CONTENTS)
92 
93 /* Radix for printing addresses (must be 8, 10 or 16).  */
94 static int address_radix;
95 
96 /* Minimum length of sequence of graphic chars to trigger output.  */
97 static int string_min;
98 
99 /* Whether or not we include all whitespace as a graphic char.   */
100 static bfd_boolean include_all_whitespace;
101 
102 /* TRUE means print address within file for each string.  */
103 static bfd_boolean print_addresses;
104 
105 /* TRUE means print filename for each string.  */
106 static bfd_boolean print_filenames;
107 
108 /* TRUE means for object files scan only the data section.  */
109 static bfd_boolean datasection_only;
110 
111 /* The BFD object file format.  */
112 static char *target;
113 
114 /* The character encoding format.  */
115 static char encoding;
116 static int encoding_bytes;
117 
118 /* Output string used to separate parsed strings  */
119 static char *output_separator;
120 
121 static struct option long_options[] =
122 {
123   {"all", no_argument, NULL, 'a'},
124   {"data", no_argument, NULL, 'd'},
125   {"print-file-name", no_argument, NULL, 'f'},
126   {"bytes", required_argument, NULL, 'n'},
127   {"radix", required_argument, NULL, 't'},
128   {"include-all-whitespace", no_argument, NULL, 'w'},
129   {"encoding", required_argument, NULL, 'e'},
130   {"target", required_argument, NULL, 'T'},
131   {"output-separator", required_argument, NULL, 's'},
132   {"help", no_argument, NULL, 'h'},
133   {"version", no_argument, NULL, 'v'},
134   {NULL, 0, NULL, 0}
135 };
136 
137 static bfd_boolean strings_file (char *);
138 static void print_strings (const char *, FILE *, file_ptr, int, int, char *);
139 static void usage (FILE *, int) ATTRIBUTE_NORETURN;
140 
141 int main (int, char **);
142 
143 int
144 main (int argc, char **argv)
145 {
146   int optc;
147   int exit_status = 0;
148   bfd_boolean files_given = FALSE;
149   char *s;
150   int numeric_opt = 0;
151 
152 #if defined (HAVE_SETLOCALE)
153   setlocale (LC_ALL, "");
154 #endif
155   bindtextdomain (PACKAGE, LOCALEDIR);
156   textdomain (PACKAGE);
157 
158   program_name = argv[0];
159   xmalloc_set_program_name (program_name);
160   bfd_set_error_program_name (program_name);
161 
162   expandargv (&argc, &argv);
163 
164   string_min = 4;
165   include_all_whitespace = FALSE;
166   print_addresses = FALSE;
167   print_filenames = FALSE;
168   if (DEFAULT_STRINGS_ALL)
169     datasection_only = FALSE;
170   else
171     datasection_only = TRUE;
172   target = NULL;
173   encoding = 's';
174   output_separator = NULL;
175 
176   while ((optc = getopt_long (argc, argv, "adfhHn:wot:e:T:s:Vv0123456789",
177 			      long_options, (int *) 0)) != EOF)
178     {
179       switch (optc)
180 	{
181 	case 'a':
182 	  datasection_only = FALSE;
183 	  break;
184 
185 	case 'd':
186 	  datasection_only = TRUE;
187 	  break;
188 
189 	case 'f':
190 	  print_filenames = TRUE;
191 	  break;
192 
193 	case 'H':
194 	case 'h':
195 	  usage (stdout, 0);
196 
197 	case 'n':
198 	  string_min = (int) strtoul (optarg, &s, 0);
199 	  if (s != NULL && *s != 0)
200 	    fatal (_("invalid integer argument %s"), optarg);
201 	  break;
202 
203 	case 'w':
204 	  include_all_whitespace = TRUE;
205 	  break;
206 
207 	case 'o':
208 	  print_addresses = TRUE;
209 	  address_radix = 8;
210 	  break;
211 
212 	case 't':
213 	  print_addresses = TRUE;
214 	  if (optarg[1] != '\0')
215 	    usage (stderr, 1);
216 	  switch (optarg[0])
217 	    {
218 	    case 'o':
219 	      address_radix = 8;
220 	      break;
221 
222 	    case 'd':
223 	      address_radix = 10;
224 	      break;
225 
226 	    case 'x':
227 	      address_radix = 16;
228 	      break;
229 
230 	    default:
231 	      usage (stderr, 1);
232 	    }
233 	  break;
234 
235 	case 'T':
236 	  target = optarg;
237 	  break;
238 
239 	case 'e':
240 	  if (optarg[1] != '\0')
241 	    usage (stderr, 1);
242 	  encoding = optarg[0];
243 	  break;
244 
245 	case 's':
246 	  output_separator = optarg;
247           break;
248 
249 	case 'V':
250 	case 'v':
251 	  print_version ("strings");
252 	  break;
253 
254 	case '?':
255 	  usage (stderr, 1);
256 
257 	default:
258 	  numeric_opt = optind;
259 	  break;
260 	}
261     }
262 
263   if (numeric_opt != 0)
264     {
265       string_min = (int) strtoul (argv[numeric_opt - 1] + 1, &s, 0);
266       if (s != NULL && *s != 0)
267 	fatal (_("invalid integer argument %s"), argv[numeric_opt - 1] + 1);
268     }
269   if (string_min < 1)
270     fatal (_("invalid minimum string length %d"), string_min);
271 
272   switch (encoding)
273     {
274     case 'S':
275     case 's':
276       encoding_bytes = 1;
277       break;
278     case 'b':
279     case 'l':
280       encoding_bytes = 2;
281       break;
282     case 'B':
283     case 'L':
284       encoding_bytes = 4;
285       break;
286     default:
287       usage (stderr, 1);
288     }
289 
290   if (bfd_init () != BFD_INIT_MAGIC)
291     fatal (_("fatal error: libbfd ABI mismatch"));
292   set_default_bfd_target ();
293 
294   if (optind >= argc)
295     {
296       datasection_only = FALSE;
297       SET_BINARY (fileno (stdin));
298       print_strings ("{standard input}", stdin, 0, 0, 0, (char *) NULL);
299       files_given = TRUE;
300     }
301   else
302     {
303       for (; optind < argc; ++optind)
304 	{
305 	  if (strcmp (argv[optind], "-") == 0)
306 	    datasection_only = FALSE;
307 	  else
308 	    {
309 	      files_given = TRUE;
310 	      exit_status |= !strings_file (argv[optind]);
311 	    }
312 	}
313     }
314 
315   if (!files_given)
316     usage (stderr, 1);
317 
318   return (exit_status);
319 }
320 
321 /* Scan section SECT of the file ABFD, whose printable name is
322    FILENAME.  If it contains initialized data set GOT_A_SECTION and
323    print the strings in it.  */
324 
325 static void
326 strings_a_section (bfd *abfd, asection *sect, const char *filename,
327 		   bfd_boolean *got_a_section)
328 {
329   bfd_size_type sectsize;
330   bfd_byte *mem;
331 
332   if ((sect->flags & DATA_FLAGS) != DATA_FLAGS)
333     return;
334 
335   sectsize = bfd_section_size (sect);
336   if (sectsize == 0)
337     return;
338 
339   if (!bfd_malloc_and_get_section (abfd, sect, &mem))
340     {
341       non_fatal (_("%s: Reading section %s failed: %s"),
342 		 filename, sect->name, bfd_errmsg (bfd_get_error ()));
343       return;
344     }
345 
346   *got_a_section = TRUE;
347   print_strings (filename, NULL, sect->filepos, 0, sectsize, (char *) mem);
348   free (mem);
349 }
350 
351 /* Scan all of the sections in FILE, and print the strings
352    in the initialized data section(s).
353 
354    Return TRUE if successful,
355    FALSE if not (such as if FILE is not an object file).  */
356 
357 static bfd_boolean
358 strings_object_file (const char *file)
359 {
360   bfd *abfd;
361   asection *s;
362   bfd_boolean got_a_section;
363 
364   abfd = bfd_openr (file, target);
365 
366   if (abfd == NULL)
367     /* Treat the file as a non-object file.  */
368     return FALSE;
369 
370   /* This call is mainly for its side effect of reading in the sections.
371      We follow the traditional behavior of `strings' in that we don't
372      complain if we don't recognize a file to be an object file.  */
373   if (!bfd_check_format (abfd, bfd_object))
374     {
375       bfd_close (abfd);
376       return FALSE;
377     }
378 
379   got_a_section = FALSE;
380   for (s = abfd->sections; s != NULL; s = s->next)
381     strings_a_section (abfd, s, file, &got_a_section);
382 
383   if (!bfd_close (abfd))
384     {
385       bfd_nonfatal (file);
386       return FALSE;
387     }
388 
389   return got_a_section;
390 }
391 
392 /* Print the strings in FILE.  Return TRUE if ok, FALSE if an error occurs.  */
393 
394 static bfd_boolean
395 strings_file (char *file)
396 {
397   struct stat st;
398 
399   /* get_file_size does not support non-S_ISREG files.  */
400 
401   if (stat (file, &st) < 0)
402     {
403       if (errno == ENOENT)
404 	non_fatal (_("'%s': No such file"), file);
405       else
406 	non_fatal (_("Warning: could not locate '%s'.  reason: %s"),
407 		   file, strerror (errno));
408       return FALSE;
409     }
410   else if (S_ISDIR (st.st_mode))
411     {
412       non_fatal (_("Warning: '%s' is a directory"), file);
413       return FALSE;
414     }
415 
416   /* If we weren't told to scan the whole file,
417      try to open it as an object file and only look at
418      initialized data sections.  If that fails, fall back to the
419      whole file.  */
420   if (!datasection_only || !strings_object_file (file))
421     {
422       FILE *stream;
423 
424       stream = fopen (file, FOPEN_RB);
425       if (stream == NULL)
426 	{
427 	  fprintf (stderr, "%s: ", program_name);
428 	  perror (file);
429 	  return FALSE;
430 	}
431 
432       print_strings (file, stream, (file_ptr) 0, 0, 0, (char *) 0);
433 
434       if (fclose (stream) == EOF)
435 	{
436 	  fprintf (stderr, "%s: ", program_name);
437 	  perror (file);
438 	  return FALSE;
439 	}
440     }
441 
442   return TRUE;
443 }
444 
445 /* Read the next character, return EOF if none available.
446    Assume that STREAM is positioned so that the next byte read
447    is at address ADDRESS in the file.
448 
449    If STREAM is NULL, do not read from it.
450    The caller can supply a buffer of characters
451    to be processed before the data in STREAM.
452    MAGIC is the address of the buffer and
453    MAGICCOUNT is how many characters are in it.  */
454 
455 static long
456 get_char (FILE *stream, file_ptr *address, int *magiccount, char **magic)
457 {
458   int c, i;
459   long r = 0;
460 
461   for (i = 0; i < encoding_bytes; i++)
462     {
463       if (*magiccount)
464 	{
465 	  (*magiccount)--;
466 	  c = *(*magic)++;
467 	}
468       else
469 	{
470 	  if (stream == NULL)
471 	    return EOF;
472 
473 	  /* Only use getc_unlocked if we found a declaration for it.
474 	     Otherwise, libc is not thread safe by default, and we
475 	     should not use it.  */
476 
477 #if defined(HAVE_GETC_UNLOCKED) && HAVE_DECL_GETC_UNLOCKED
478 	  c = getc_unlocked (stream);
479 #else
480 	  c = getc (stream);
481 #endif
482 	  if (c == EOF)
483 	    return EOF;
484 	}
485 
486       (*address)++;
487       r = (r << 8) | (c & 0xff);
488     }
489 
490   switch (encoding)
491     {
492     default:
493       break;
494     case 'l':
495       r = ((r & 0xff) << 8) | ((r & 0xff00) >> 8);
496       break;
497     case 'L':
498       r = (((r & 0xff) << 24) | ((r & 0xff00) << 8)
499 	   | ((r & 0xff0000) >> 8) | ((r & 0xff000000) >> 24));
500       break;
501     }
502 
503   return r;
504 }
505 
506 /* Throw away one byte of a (possibly) multi-byte char C, updating
507    address and buffer to suit.  */
508 
509 static void
510 unget_part_char (long c, file_ptr *address, int *magiccount, char **magic)
511 {
512   static char tmp[4];
513 
514   if (encoding_bytes > 1)
515     {
516       *address -= encoding_bytes - 1;
517 
518       if (*magiccount == 0)
519 	{
520 	  /* If no magic buffer exists, use temp buffer.  */
521 	  switch (encoding)
522 	    {
523 	    default:
524 	      break;
525 	    case 'b':
526 	      tmp[0] = c & 0xff;
527 	      *magiccount = 1;
528 	      break;
529 	    case 'l':
530 	      tmp[0] = (c >> 8) & 0xff;
531 	      *magiccount = 1;
532 	      break;
533 	    case 'B':
534 	      tmp[0] = (c >> 16) & 0xff;
535 	      tmp[1] = (c >> 8) & 0xff;
536 	      tmp[2] = c & 0xff;
537 	      *magiccount = 3;
538 	      break;
539 	    case 'L':
540 	      tmp[0] = (c >> 8) & 0xff;
541 	      tmp[1] = (c >> 16) & 0xff;
542 	      tmp[2] = (c >> 24) & 0xff;
543 	      *magiccount = 3;
544 	      break;
545 	    }
546 	  *magic = tmp;
547 	}
548       else
549 	{
550 	  /* If magic buffer exists, rewind.  */
551 	  *magic -= encoding_bytes - 1;
552 	  *magiccount += encoding_bytes - 1;
553 	}
554     }
555 }
556 
557 /* Find the strings in file FILENAME, read from STREAM.
558    Assume that STREAM is positioned so that the next byte read
559    is at address ADDRESS in the file.
560    Stop reading at address STOP_POINT in the file, if nonzero.
561 
562    If STREAM is NULL, do not read from it.
563    The caller can supply a buffer of characters
564    to be processed before the data in STREAM.
565    MAGIC is the address of the buffer and
566    MAGICCOUNT is how many characters are in it.
567    Those characters come at address ADDRESS and the data in STREAM follow.  */
568 
569 static void
570 print_strings (const char *filename, FILE *stream, file_ptr address,
571 	       int stop_point, int magiccount, char *magic)
572 {
573   char *buf = (char *) xmalloc (sizeof (char) * (string_min + 1));
574 
575   while (1)
576     {
577       file_ptr start;
578       int i;
579       long c;
580 
581       /* See if the next `string_min' chars are all graphic chars.  */
582     tryline:
583       if (stop_point && address >= stop_point)
584 	break;
585       start = address;
586       for (i = 0; i < string_min; i++)
587 	{
588 	  c = get_char (stream, &address, &magiccount, &magic);
589 	  if (c == EOF)
590 	    {
591 	      free (buf);
592 	      return;
593 	    }
594 
595 	  if (! STRING_ISGRAPHIC (c))
596 	    {
597 	      /* Found a non-graphic.  Try again starting with next byte.  */
598 	      unget_part_char (c, &address, &magiccount, &magic);
599 	      goto tryline;
600 	    }
601 	  buf[i] = c;
602 	}
603 
604       /* We found a run of `string_min' graphic characters.  Print up
605 	 to the next non-graphic character.  */
606 
607       if (print_filenames)
608 	printf ("%s: ", filename);
609       if (print_addresses)
610 	switch (address_radix)
611 	  {
612 	  case 8:
613 #ifdef HAVE_LONG_LONG
614 	    if (sizeof (start) > sizeof (long))
615 	      {
616 # ifndef __MSVCRT__
617 		printf ("%7llo ", (unsigned long long) start);
618 # else
619 		printf ("%7I64o ", (unsigned long long) start);
620 # endif
621 	      }
622 	    else
623 #elif !BFD_HOST_64BIT_LONG
624 	      if (start != (unsigned long) start)
625 		printf ("++%7lo ", (unsigned long) start);
626 	      else
627 #endif
628 		printf ("%7lo ", (unsigned long) start);
629 	    break;
630 
631 	  case 10:
632 #ifdef HAVE_LONG_LONG
633 	    if (sizeof (start) > sizeof (long))
634 	      {
635 # ifndef __MSVCRT__
636 		printf ("%7llu ", (unsigned long long) start);
637 # else
638 		printf ("%7I64d ", (unsigned long long) start);
639 # endif
640 	      }
641 	    else
642 #elif !BFD_HOST_64BIT_LONG
643 	      if (start != (unsigned long) start)
644 		printf ("++%7lu ", (unsigned long) start);
645 	      else
646 #endif
647 		printf ("%7ld ", (long) start);
648 	    break;
649 
650 	  case 16:
651 #ifdef HAVE_LONG_LONG
652 	    if (sizeof (start) > sizeof (long))
653 	      {
654 # ifndef __MSVCRT__
655 		printf ("%7llx ", (unsigned long long) start);
656 # else
657 		printf ("%7I64x ", (unsigned long long) start);
658 # endif
659 	      }
660 	    else
661 #elif !BFD_HOST_64BIT_LONG
662 	      if (start != (unsigned long) start)
663 		printf ("%lx%8.8lx ", (unsigned long) (start >> 32),
664 			(unsigned long) (start & 0xffffffff));
665 	      else
666 #endif
667 		printf ("%7lx ", (unsigned long) start);
668 	    break;
669 	  }
670 
671       buf[i] = '\0';
672       fputs (buf, stdout);
673 
674       while (1)
675 	{
676 	  c = get_char (stream, &address, &magiccount, &magic);
677 	  if (c == EOF)
678 	    break;
679 	  if (! STRING_ISGRAPHIC (c))
680 	    {
681 	      unget_part_char (c, &address, &magiccount, &magic);
682 	      break;
683 	    }
684 	  putchar (c);
685 	}
686 
687       if (output_separator)
688 	fputs (output_separator, stdout);
689       else
690 	putchar ('\n');
691     }
692   free (buf);
693 }
694 
695 static void
696 usage (FILE *stream, int status)
697 {
698   fprintf (stream, _("Usage: %s [option(s)] [file(s)]\n"), program_name);
699   fprintf (stream, _(" Display printable strings in [file(s)] (stdin by default)\n"));
700   fprintf (stream, _(" The options are:\n"));
701 
702   if (DEFAULT_STRINGS_ALL)
703     fprintf (stream, _("\
704   -a - --all                Scan the entire file, not just the data section [default]\n\
705   -d --data                 Only scan the data sections in the file\n"));
706   else
707     fprintf (stream, _("\
708   -a - --all                Scan the entire file, not just the data section\n\
709   -d --data                 Only scan the data sections in the file [default]\n"));
710 
711   fprintf (stream, _("\
712   -f --print-file-name      Print the name of the file before each string\n\
713   -n --bytes=[number]       Locate & print any NUL-terminated sequence of at\n\
714   -<number>                   least [number] characters (default 4).\n\
715   -t --radix={o,d,x}        Print the location of the string in base 8, 10 or 16\n\
716   -w --include-all-whitespace Include all whitespace as valid string characters\n\
717   -o                        An alias for --radix=o\n\
718   -T --target=<BFDNAME>     Specify the binary file format\n\
719   -e --encoding={s,S,b,l,B,L} Select character size and endianness:\n\
720                             s = 7-bit, S = 8-bit, {b,l} = 16-bit, {B,L} = 32-bit\n\
721   -s --output-separator=<string> String used to separate strings in output.\n\
722   @<file>                   Read options from <file>\n\
723   -h --help                 Display this information\n\
724   -v -V --version           Print the program's version number\n"));
725   list_supported_targets (program_name, stream);
726   if (REPORT_BUGS_TO[0] && status == 0)
727     fprintf (stream, _("Report bugs to %s\n"), REPORT_BUGS_TO);
728   exit (status);
729 }
730