1 /* strings -- print the strings of printable characters in files
2    Copyright 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3    2002, 2003, 2004, 2005 Free Software Foundation, Inc.
4 
5    This program is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; either version 2, or (at your option)
8    any later version.
9 
10    This program is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13    GNU General Public License for more details.
14 
15    You should have received a copy of the GNU General Public License
16    along with this program; if not, write to the Free Software
17    Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
18    02110-1301, USA.  */
19 
20 /* Usage: strings [options] file...
21 
22    Options:
23    --all
24    -a
25    -		Do not scan only the initialized data section of object files.
26 
27    --print-file-name
28    -f		Print the name of the file before each string.
29 
30    --bytes=min-len
31    -n min-len
32    -min-len	Print graphic char sequences, MIN-LEN or more bytes long,
33 		that are followed by a NUL or a newline.  Default is 4.
34 
35    --radix={o,x,d}
36    -t {o,x,d}	Print the offset within the file before each string,
37 		in octal/hex/decimal.
38 
39    -o		Like -to.  (Some other implementations have -o like -to,
40 		others like -td.  We chose one arbitrarily.)
41 
42    --encoding={s,S,b,l,B,L}
43    -e {s,S,b,l,B,L}
44 		Select character encoding: 7-bit-character, 8-bit-character,
45 		bigendian 16-bit, littleendian 16-bit, bigendian 32-bit,
46 		littleendian 32-bit.
47 
48    --target=BFDNAME
49 		Specify a non-default object file format.
50 
51    --help
52    -h		Print the usage message on the standard output.
53 
54    --version
55    -v		Print the program version number.
56 
57    Written by Richard Stallman <rms@gnu.ai.mit.edu>
58    and David MacKenzie <djm@gnu.ai.mit.edu>.  */
59 
60 #ifdef HAVE_CONFIG_H
61 #include "config.h"
62 #endif
63 #include "bfd.h"
64 #include <stdio.h>
65 #include "getopt.h"
66 #include <errno.h>
67 #include "bucomm.h"
68 #include "libiberty.h"
69 #include "safe-ctype.h"
70 #include <sys/stat.h>
71 
72 /* Some platforms need to put stdin into binary mode, to read
73     binary files.  */
74 #ifdef HAVE_SETMODE
75 #ifndef O_BINARY
76 #ifdef _O_BINARY
77 #define O_BINARY _O_BINARY
78 #define setmode _setmode
79 #else
80 #define O_BINARY 0
81 #endif
82 #endif
83 #if O_BINARY
84 #include <io.h>
85 #define SET_BINARY(f) do { if (!isatty (f)) setmode (f,O_BINARY); } while (0)
86 #endif
87 #endif
88 
89 #define STRING_ISGRAPHIC(c) \
90       (   (c) >= 0 \
91        && (c) <= 255 \
92        && ((c) == '\t' || ISPRINT (c) || (encoding == 'S' && (c) > 127)))
93 
94 #ifndef errno
95 extern int errno;
96 #endif
97 
98 /* The BFD section flags that identify an initialized data section.  */
99 #define DATA_FLAGS (SEC_ALLOC | SEC_LOAD | SEC_HAS_CONTENTS)
100 
101 #ifdef HAVE_FOPEN64
102 typedef off64_t file_off;
103 #define file_open(s,m) fopen64(s, m)
104 #else
105 typedef off_t file_off;
106 #define file_open(s,m) fopen(s, m)
107 #endif
108 #ifdef HAVE_STAT64
109 typedef struct stat64 statbuf;
110 #define file_stat(f,s) stat64(f, s)
111 #else
112 typedef struct stat statbuf;
113 #define file_stat(f,s) stat(f, s)
114 #endif
115 
116 /* Radix for printing addresses (must be 8, 10 or 16).  */
117 static int address_radix;
118 
119 /* Minimum length of sequence of graphic chars to trigger output.  */
120 static int string_min;
121 
122 /* TRUE means print address within file for each string.  */
123 static bfd_boolean print_addresses;
124 
125 /* TRUE means print filename for each string.  */
126 static bfd_boolean print_filenames;
127 
128 /* TRUE means for object files scan only the data section.  */
129 static bfd_boolean datasection_only;
130 
131 /* TRUE if we found an initialized data section in the current file.  */
132 static bfd_boolean got_a_section;
133 
134 /* The BFD object file format.  */
135 static char *target;
136 
137 /* The character encoding format.  */
138 static char encoding;
139 static int encoding_bytes;
140 
141 static struct option long_options[] =
142 {
143   {"all", no_argument, NULL, 'a'},
144   {"print-file-name", no_argument, NULL, 'f'},
145   {"bytes", required_argument, NULL, 'n'},
146   {"radix", required_argument, NULL, 't'},
147   {"encoding", required_argument, NULL, 'e'},
148   {"target", required_argument, NULL, 'T'},
149   {"help", no_argument, NULL, 'h'},
150   {"version", no_argument, NULL, 'v'},
151   {NULL, 0, NULL, 0}
152 };
153 
154 /* Records the size of a named file so that we
155    do not repeatedly run bfd_stat() on it.  */
156 
157 typedef struct
158 {
159   const char *  filename;
160   bfd_size_type filesize;
161 } filename_and_size_t;
162 
163 static void strings_a_section (bfd *, asection *, void *);
164 static bfd_boolean strings_object_file (const char *);
165 static bfd_boolean strings_file (char *file);
166 static int integer_arg (char *s);
167 static void print_strings (const char *, FILE *, file_off, int, int, char *);
168 static void usage (FILE *, int);
169 static long get_char (FILE *, file_off *, int *, char **);
170 
171 int main (int, char **);
172 
173 int
174 main (int argc, char **argv)
175 {
176   int optc;
177   int exit_status = 0;
178   bfd_boolean files_given = FALSE;
179 
180 #if defined (HAVE_SETLOCALE)
181   setlocale (LC_ALL, "");
182 #endif
183   bindtextdomain (PACKAGE, LOCALEDIR);
184   textdomain (PACKAGE);
185 
186   if (pledge ("stdio rpath", NULL) == -1)
187     fatal (_("Failed to pledge"));
188 
189   program_name = argv[0];
190   xmalloc_set_program_name (program_name);
191 
192   expandargv (&argc, &argv);
193 
194   string_min = -1;
195   print_addresses = FALSE;
196   print_filenames = FALSE;
197   datasection_only = TRUE;
198   target = NULL;
199   encoding = 's';
200 
201   while ((optc = getopt_long (argc, argv, "afhHn:ot:e:Vv0123456789",
202 			      long_options, (int *) 0)) != EOF)
203     {
204       switch (optc)
205 	{
206 	case 'a':
207 	  datasection_only = FALSE;
208 	  break;
209 
210 	case 'f':
211 	  print_filenames = TRUE;
212 	  break;
213 
214 	case 'H':
215 	case 'h':
216 	  usage (stdout, 0);
217 
218 	case 'n':
219 	  string_min = integer_arg (optarg);
220 	  if (string_min < 1)
221 	    fatal (_("invalid number %s"), optarg);
222 	  break;
223 
224 	case 'o':
225 	  print_addresses = TRUE;
226 	  address_radix = 8;
227 	  break;
228 
229 	case 't':
230 	  print_addresses = TRUE;
231 	  if (optarg[1] != '\0')
232 	    usage (stderr, 1);
233 	  switch (optarg[0])
234 	    {
235 	    case 'o':
236 	      address_radix = 8;
237 	      break;
238 
239 	    case 'd':
240 	      address_radix = 10;
241 	      break;
242 
243 	    case 'x':
244 	      address_radix = 16;
245 	      break;
246 
247 	    default:
248 	      usage (stderr, 1);
249 	    }
250 	  break;
251 
252 	case 'T':
253 	  target = optarg;
254 	  break;
255 
256 	case 'e':
257 	  if (optarg[1] != '\0')
258 	    usage (stderr, 1);
259 	  encoding = optarg[0];
260 	  break;
261 
262 	case 'V':
263 	case 'v':
264 	  print_version ("strings");
265 	  break;
266 
267 	case '?':
268 	  usage (stderr, 1);
269 
270 	default:
271 	  if (string_min < 0)
272 	    string_min = optc - '0';
273 	  else
274 	    string_min = string_min * 10 + optc - '0';
275 	  break;
276 	}
277     }
278 
279   if (string_min < 0)
280     string_min = 4;
281 
282   switch (encoding)
283     {
284     case 'S':
285     case 's':
286       encoding_bytes = 1;
287       break;
288     case 'b':
289     case 'l':
290       encoding_bytes = 2;
291       break;
292     case 'B':
293     case 'L':
294       encoding_bytes = 4;
295       break;
296     default:
297       usage (stderr, 1);
298     }
299 
300   bfd_init ();
301   set_default_bfd_target ();
302 
303   if (optind >= argc)
304     {
305       datasection_only = FALSE;
306 #ifdef SET_BINARY
307       SET_BINARY (fileno (stdin));
308 #endif
309       print_strings ("{standard input}", stdin, 0, 0, 0, (char *) NULL);
310       files_given = TRUE;
311     }
312   else
313     {
314       for (; optind < argc; ++optind)
315 	{
316 	  if (strcmp (argv[optind], "-") == 0)
317 	    datasection_only = FALSE;
318 	  else
319 	    {
320 	      files_given = TRUE;
321 	      exit_status |= strings_file (argv[optind]) == FALSE;
322 	    }
323 	}
324     }
325 
326   if (!files_given)
327     usage (stderr, 1);
328 
329   return (exit_status);
330 }
331 
332 /* Scan section SECT of the file ABFD, whose printable name is in
333    ARG->filename and whose size might be in ARG->filesize.  If it
334    contains initialized data set `got_a_section' and print the
335    strings in it.
336 
337    FIXME: We ought to be able to return error codes/messages for
338    certain conditions.  */
339 
340 static void
341 strings_a_section (bfd *abfd, asection *sect, void *arg)
342 {
343   filename_and_size_t * filename_and_sizep;
344   bfd_size_type *filesizep;
345   bfd_size_type sectsize;
346   void *mem;
347 
348   if ((sect->flags & DATA_FLAGS) != DATA_FLAGS)
349     return;
350 
351   sectsize = bfd_get_section_size (sect);
352 
353   if (sectsize <= 0)
354     return;
355 
356   /* Get the size of the file.  This might have been cached for us.  */
357   filename_and_sizep = (filename_and_size_t *) arg;
358   filesizep = & filename_and_sizep->filesize;
359 
360   if (*filesizep == 0)
361     {
362       struct stat st;
363 
364       if (bfd_stat (abfd, &st))
365 	return;
366 
367       /* Cache the result so that we do not repeatedly stat this file.  */
368       *filesizep = st.st_size;
369     }
370 
371   /* Compare the size of the section against the size of the file.
372      If the section is bigger then the file must be corrupt and
373      we should not try dumping it.  */
374   if (sectsize >= *filesizep)
375     return;
376 
377   mem = xmalloc (sectsize);
378 
379   if (bfd_get_section_contents (abfd, sect, mem, (file_ptr) 0, sectsize))
380     {
381       got_a_section = TRUE;
382 
383       print_strings (filename_and_sizep->filename, NULL, sect->filepos,
384 		     0, sectsize, mem);
385     }
386 
387   free (mem);
388 }
389 
390 /* Scan all of the sections in FILE, and print the strings
391    in the initialized data section(s).
392 
393    Return TRUE if successful,
394    FALSE if not (such as if FILE is not an object file).  */
395 
396 static bfd_boolean
397 strings_object_file (const char *file)
398 {
399   filename_and_size_t filename_and_size;
400   bfd *abfd;
401 
402   abfd = bfd_openr (file, target);
403 
404   if (abfd == NULL)
405     /* Treat the file as a non-object file.  */
406     return FALSE;
407 
408   /* This call is mainly for its side effect of reading in the sections.
409      We follow the traditional behavior of `strings' in that we don't
410      complain if we don't recognize a file to be an object file.  */
411   if (!bfd_check_format (abfd, bfd_object))
412     {
413       bfd_close (abfd);
414       return FALSE;
415     }
416 
417   got_a_section = FALSE;
418   filename_and_size.filename = file;
419   filename_and_size.filesize = 0;
420   bfd_map_over_sections (abfd, strings_a_section, & filename_and_size);
421 
422   if (!bfd_close (abfd))
423     {
424       bfd_nonfatal (file);
425       return FALSE;
426     }
427 
428   return got_a_section;
429 }
430 
431 /* Print the strings in FILE.  Return TRUE if ok, FALSE if an error occurs.  */
432 
433 static bfd_boolean
434 strings_file (char *file)
435 {
436   statbuf st;
437 
438   if (file_stat (file, &st) < 0)
439     {
440       if (errno == ENOENT)
441 	non_fatal (_("'%s': No such file"), file);
442       else
443 	non_fatal (_("Warning: could not locate '%s'.  reason: %s"),
444 		   file, strerror (errno));
445       return FALSE;
446     }
447 
448   /* If we weren't told to scan the whole file,
449      try to open it as an object file and only look at
450      initialized data sections.  If that fails, fall back to the
451      whole file.  */
452   if (!datasection_only || !strings_object_file (file))
453     {
454       FILE *stream;
455 
456       stream = file_open (file, FOPEN_RB);
457       if (stream == NULL)
458 	{
459 	  fprintf (stderr, "%s: ", program_name);
460 	  perror (file);
461 	  return FALSE;
462 	}
463 
464       print_strings (file, stream, (file_off) 0, 0, 0, (char *) 0);
465 
466       if (fclose (stream) == EOF)
467 	{
468 	  fprintf (stderr, "%s: ", program_name);
469 	  perror (file);
470 	  return FALSE;
471 	}
472     }
473 
474   return TRUE;
475 }
476 
477 /* Read the next character, return EOF if none available.
478    Assume that STREAM is positioned so that the next byte read
479    is at address ADDRESS in the file.
480 
481    If STREAM is NULL, do not read from it.
482    The caller can supply a buffer of characters
483    to be processed before the data in STREAM.
484    MAGIC is the address of the buffer and
485    MAGICCOUNT is how many characters are in it.  */
486 
487 static long
488 get_char (FILE *stream, file_off *address, int *magiccount, char **magic)
489 {
490   int c, i;
491   long r = EOF;
492   unsigned char buf[4];
493 
494   for (i = 0; i < encoding_bytes; i++)
495     {
496       if (*magiccount)
497 	{
498 	  (*magiccount)--;
499 	  c = *(*magic)++;
500 	}
501       else
502 	{
503 	  if (stream == NULL)
504 	    return EOF;
505 
506 	  /* Only use getc_unlocked if we found a declaration for it.
507 	     Otherwise, libc is not thread safe by default, and we
508 	     should not use it.  */
509 
510 #if defined(HAVE_GETC_UNLOCKED) && HAVE_DECL_GETC_UNLOCKED
511 	  c = getc_unlocked (stream);
512 #else
513 	  c = getc (stream);
514 #endif
515 	  if (c == EOF)
516 	    return EOF;
517 	}
518 
519       (*address)++;
520       buf[i] = c;
521     }
522 
523   switch (encoding)
524     {
525     case 'S':
526     case 's':
527       r = buf[0];
528       break;
529     case 'b':
530       r = (buf[0] << 8) | buf[1];
531       break;
532     case 'l':
533       r = buf[0] | (buf[1] << 8);
534       break;
535     case 'B':
536       r = ((long) buf[0] << 24) | ((long) buf[1] << 16) |
537 	((long) buf[2] << 8) | buf[3];
538       break;
539     case 'L':
540       r = buf[0] | ((long) buf[1] << 8) | ((long) buf[2] << 16) |
541 	((long) buf[3] << 24);
542       break;
543     }
544 
545   if (r == EOF)
546     return 0;
547 
548   return r;
549 }
550 
551 /* Find the strings in file FILENAME, read from STREAM.
552    Assume that STREAM is positioned so that the next byte read
553    is at address ADDRESS in the file.
554    Stop reading at address STOP_POINT in the file, if nonzero.
555 
556    If STREAM is NULL, do not read from it.
557    The caller can supply a buffer of characters
558    to be processed before the data in STREAM.
559    MAGIC is the address of the buffer and
560    MAGICCOUNT is how many characters are in it.
561    Those characters come at address ADDRESS and the data in STREAM follow.  */
562 
563 static void
564 print_strings (const char *filename, FILE *stream, file_off address,
565 	       int stop_point, int magiccount, char *magic)
566 {
567   char *buf = (char *) xmalloc (sizeof (char) * (string_min + 1));
568 
569   while (1)
570     {
571       file_off start;
572       int i;
573       long c;
574 
575       /* See if the next `string_min' chars are all graphic chars.  */
576     tryline:
577       if (stop_point && address >= stop_point)
578 	break;
579       start = address;
580       for (i = 0; i < string_min; i++)
581 	{
582 	  c = get_char (stream, &address, &magiccount, &magic);
583 	  if (c == EOF)
584 	    return;
585 	  if (! STRING_ISGRAPHIC (c))
586 	    /* Found a non-graphic.  Try again starting with next char.  */
587 	    goto tryline;
588 	  buf[i] = c;
589 	}
590 
591       /* We found a run of `string_min' graphic characters.  Print up
592 	 to the next non-graphic character.  */
593 
594       if (print_filenames)
595 	printf ("%s: ", filename);
596       if (print_addresses)
597 	switch (address_radix)
598 	  {
599 	  case 8:
600 #if __STDC_VERSION__ >= 199901L || (defined(__GNUC__) && __GNUC__ >= 2)
601 	    if (sizeof (start) > sizeof (long))
602 	      printf ("%7Lo ", (unsigned long long) start);
603 	    else
604 #else
605 # if !BFD_HOST_64BIT_LONG
606 	    if (start != (unsigned long) start)
607 	      printf ("++%7lo ", (unsigned long) start);
608 	    else
609 # endif
610 #endif
611 	      printf ("%7lo ", (unsigned long) start);
612 	    break;
613 
614 	  case 10:
615 #if __STDC_VERSION__ >= 199901L || (defined(__GNUC__) && __GNUC__ >= 2)
616 	    if (sizeof (start) > sizeof (long))
617 	      printf ("%7Ld ", (unsigned long long) start);
618 	    else
619 #else
620 # if !BFD_HOST_64BIT_LONG
621 	    if (start != (unsigned long) start)
622 	      printf ("++%7ld ", (unsigned long) start);
623 	    else
624 # endif
625 #endif
626 	      printf ("%7ld ", (long) start);
627 	    break;
628 
629 	  case 16:
630 #if __STDC_VERSION__ >= 199901L || (defined(__GNUC__) && __GNUC__ >= 2)
631 	    if (sizeof (start) > sizeof (long))
632 	      printf ("%7Lx ", (unsigned long long) start);
633 	    else
634 #else
635 # if !BFD_HOST_64BIT_LONG
636 	    if (start != (unsigned long) start)
637 	      printf ("%lx%8.8lx ", (unsigned long) (start >> 32),
638 		      (unsigned long) (start & 0xffffffff));
639 	    else
640 # endif
641 #endif
642 	      printf ("%7lx ", (unsigned long) start);
643 	    break;
644 	  }
645 
646       buf[i] = '\0';
647       fputs (buf, stdout);
648 
649       while (1)
650 	{
651 	  c = get_char (stream, &address, &magiccount, &magic);
652 	  if (c == EOF)
653 	    break;
654 	  if (! STRING_ISGRAPHIC (c))
655 	    break;
656 	  putchar (c);
657 	}
658 
659       putchar ('\n');
660     }
661 }
662 
663 /* Parse string S as an integer, using decimal radix by default,
664    but allowing octal and hex numbers as in C.  */
665 
666 static int
667 integer_arg (char *s)
668 {
669   int value;
670   int radix = 10;
671   char *p = s;
672   int c;
673 
674   if (*p != '0')
675     radix = 10;
676   else if (*++p == 'x')
677     {
678       radix = 16;
679       p++;
680     }
681   else
682     radix = 8;
683 
684   value = 0;
685   while (((c = *p++) >= '0' && c <= '9')
686 	 || (radix == 16 && (c & ~40) >= 'A' && (c & ~40) <= 'Z'))
687     {
688       value *= radix;
689       if (c >= '0' && c <= '9')
690 	value += c - '0';
691       else
692 	value += (c & ~40) - 'A';
693     }
694 
695   if (c == 'b')
696     value *= 512;
697   else if (c == 'B')
698     value *= 1024;
699   else
700     p--;
701 
702   if (*p)
703     fatal (_("invalid integer argument %s"), s);
704 
705   return value;
706 }
707 
708 static void
709 usage (FILE *stream, int status)
710 {
711   fprintf (stream, _("Usage: %s [option(s)] [file(s)]\n"), program_name);
712   fprintf (stream, _(" Display printable strings in [file(s)] (stdin by default)\n"));
713   fprintf (stream, _(" The options are:\n\
714   -a - --all                Scan the entire file, not just the data section\n\
715   -f --print-file-name      Print the name of the file before each string\n\
716   -n --bytes=[number]       Locate & print any NUL-terminated sequence of at\n\
717   -<number>                 least [number] characters (default 4).\n\
718   -t --radix={o,d,x}        Print the location of the string in base 8, 10 or 16\n\
719   -o                        An alias for --radix=o\n\
720   -T --target=<BFDNAME>     Specify the binary file format\n\
721   -e --encoding={s,S,b,l,B,L} Select character size and endianness:\n\
722                             s = 7-bit, S = 8-bit, {b,l} = 16-bit, {B,L} = 32-bit\n\
723   @<file>                   Read options from <file>\n\
724   -h --help                 Display this information\n\
725   -v --version              Print the program's version number\n"));
726   list_supported_targets (program_name, stream);
727   if (status == 0)
728     fprintf (stream, _("Report bugs to %s\n"), REPORT_BUGS_TO);
729   exit (status);
730 }
731