1 /*
2 * hd - display files in hexadecimal format
3 *
4 * Gunnar Ritter, Freiburg i. Br., Germany, September 2003.
5 */
6 /*
7 * Copyright (c) 2003 Gunnar Ritter
8 *
9 * This software is provided 'as-is', without any express or implied
10 * warranty. In no event will the authors be held liable for any damages
11 * arising from the use of this software.
12 *
13 * Permission is granted to anyone to use this software for any purpose,
14 * including commercial applications, and to alter it and redistribute
15 * it freely, subject to the following restrictions:
16 *
17 * 1. The origin of this software must not be misrepresented; you must not
18 * claim that you wrote the original software. If you use this software
19 * in a product, an acknowledgment in the product documentation would be
20 * appreciated but is not required.
21 *
22 * 2. Altered source versions must be plainly marked as such, and must not be
23 * misrepresented as being the original software.
24 *
25 * 3. This notice may not be removed or altered from any source distribution.
26 */
27
28 #if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4
29 #define USED __attribute__ ((used))
30 #elif defined __GNUC__
31 #define USED __attribute__ ((unused))
32 #else
33 #define USED
34 #endif
35 static const char sccsid[] USED = "@(#)hd.sl 1.12 (gritter) 5/29/05";
36
37 #include <sys/types.h>
38 #include <stdio.h>
39 #include <unistd.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <libgen.h>
43 #include <stdarg.h>
44 #include <locale.h>
45 #include <ctype.h>
46 #include <wctype.h>
47 #include <wchar.h>
48 #include <inttypes.h>
49 #include <limits.h>
50 #include "atoll.h"
51 #include "mbtowi.h"
52
53 #ifdef __GLIBC__
54 #ifdef _IO_getc_unlocked
55 #undef getc
56 #define getc(f) _IO_getc_unlocked(f)
57 #endif
58 #ifdef _IO_putc_unlocked
59 #undef putchar
60 #define putchar(c) _IO_putc_unlocked(c, stdout)
61 #endif
62 #endif /* __GLIBC__ */
63
64 enum base {
65 BASE_0 = 00,
66 BASE_X = 01,
67 BASE_D = 02,
68 BASE_O = 04
69 };
70
71 union block {
72 int8_t b_c[16];
73 int16_t b_w[8];
74 int32_t b_l[4];
75 };
76
77 static const struct fm {
78 int f_fmt;
79 enum base f_base;
80 char f_width;
81 char f_align[3];
82 const char *f_prf;
83 } ofmt[] = {
84 { 'b', BASE_X, 2, {2,5,11}, "%02x" },
85 { 'b', BASE_D, 3, {3,7,15}, "%3u" },
86 { 'b', BASE_O, 3, {3,7,15}, "%03o" },
87 { 'c', BASE_X, 2, {2,5,11}, "%02x" },
88 { 'c', BASE_D, 3, {3,7,15}, "%3u" },
89 { 'c', BASE_O, 3, {3,7,15}, "%03o" },
90 { 'w', BASE_X, 4, {0,4, 9}, "%04x" },
91 { 'w', BASE_D, 5, {0,5,11}, "%5u" },
92 { 'w', BASE_O, 6, {0,6,13}, "%06o" },
93 { 'l', BASE_X, 8, {0,0, 8}, "%08lx" },
94 { 'l', BASE_D, 10, {0,0,10}, "%10lu" },
95 { 'l', BASE_O, 11, {0,0,11}, "%011lo" },
96 { 0, BASE_0, 0, {0,0, 0}, NULL }
97 };
98
99 static int Aflag; /* print ASCII at right */
100 static enum base aflag; /* address format specifier */
101 static enum base bflag; /* byte format specifier */
102 static enum base cflag; /* print ASCII at center */
103 static enum base lflag; /* long (32 bit) format specifier */
104 static long long nflag; /* number of bytes to process */
105 static long long sflag; /* start offset */
106 static int tflag; /* print text file */
107 static int vflag; /* no '*' for identical lines */
108 static enum base wflag; /* word (16 bit) format specifier */
109 static char align[3];
110 static const char *progname;
111 static int status;
112 static int mb_cur_max;
113
114 static void usage(void);
115 static void flag(int);
116 static void base(enum base, enum base *);
117 static long long count(const char *);
118 static void usage(void);
119 static void diag(const char *, ...);
120 static void hd(FILE *);
121 static void prna(long long);
122 static void prnb(union block *, int);
123 static void line(union block *, int, int, enum base, int);
124 static const struct fm *getfmt(int, enum base);
125 static void getalign(void);
126 static void prnt(FILE *, long long);
127 static void prnc(int);
128 static char *wcget(FILE *fp, wint_t *wc, int *len);
129
130 int
main(int argc,char ** argv)131 main(int argc, char **argv)
132 {
133 FILE *fp;
134 int i, j;
135
136 progname = basename(argv[0]);
137 setlocale(LC_CTYPE, "");
138 mb_cur_max = MB_CUR_MAX;
139 for (i = 1; i < argc && argv[i][0] == '-'; i++) {
140 switch (argv[i][1]) {
141 case 's':
142 if (argv[i][2])
143 sflag = count(&argv[i][2]);
144 else if (++i < argc)
145 sflag = count(argv[i]);
146 else
147 usage();
148 break;
149 case 'n':
150 if (argv[i][2])
151 nflag = count(&argv[i][2]);
152 else if (++i < argc)
153 nflag = count(argv[i]);
154 else
155 usage();
156 break;
157 default:
158 for (j = 1; argv[i][j]; j++)
159 flag(argv[i][j]&0377);
160 flag(0);
161 }
162 }
163 if (tflag && (Aflag|bflag|cflag|lflag|wflag))
164 diag("-t flag overrides other flags");
165 if ((Aflag|bflag|cflag|lflag|wflag) == 0)
166 Aflag = 1;
167 if ((bflag|cflag|lflag|wflag) == 0)
168 bflag = BASE_X;
169 getalign();
170 if (i < argc) {
171 j = i+1 < argc;
172 do {
173 if (access(argv[i], R_OK) < 0) {
174 diag("cannot access %s", argv[i]);
175 continue;
176 }
177 if ((fp = fopen(argv[i], "r")) == NULL) {
178 diag("open of %s failed", argv[i]);
179 continue;
180 }
181 if (j)
182 printf("%s:\n", argv[i]);
183 hd(fp);
184 fclose(fp);
185 if (i+1 < argc)
186 printf("\n");
187 } while (++i < argc);
188 } else
189 hd(stdin);
190 return status;
191 }
192
193 static void
flag(int c)194 flag(int c)
195 {
196 static enum base *basep;
197
198 switch (c) {
199 case '\0':
200 if (basep && basep != &aflag && *basep == BASE_0)
201 *basep |= BASE_O|BASE_D|BASE_X;
202 basep = NULL;
203 break;
204 case 'a':
205 basep = &aflag;
206 break;
207 case 'b':
208 basep = &bflag;
209 break;
210 case 'w':
211 basep = &wflag;
212 break;
213 case 'l':
214 basep = &lflag;
215 break;
216 case 'c':
217 basep = &cflag;
218 break;
219 case 'A':
220 Aflag = 1;
221 break;
222 case 'x':
223 base(BASE_X, basep);
224 break;
225 case 'd':
226 base(BASE_D, basep);
227 break;
228 case 'o':
229 base(BASE_O, basep);
230 break;
231 case 't':
232 tflag = 1;
233 break;
234 case 'v':
235 vflag = 1;
236 break;
237 default:
238 usage();
239 }
240 }
241
242 static void
base(enum base b,enum base * basep)243 base(enum base b, enum base *basep)
244 {
245 if (basep) {
246 if (basep == &aflag)
247 *basep = b;
248 else
249 *basep |= b;
250 } else {
251 if (aflag == BASE_0)
252 aflag |= b;
253 cflag |= b;
254 bflag |= b;
255 wflag |= b;
256 lflag |= b;
257 }
258 }
259
260 static long long
count(const char * s)261 count(const char *s)
262 {
263 long long c;
264 int bs = 10;
265 char *x;
266
267 if (s[0] == '0' && s[1] == 'x') {
268 bs = 16;
269 s += 2;
270 } else if (s[0] == '0') {
271 bs = 8;
272 s++;
273 }
274 c = strtoll(s, &x, bs);
275 s = x;
276 if (*s == '*')
277 s++;
278 switch (*s) {
279 case 'w':
280 c *= 2;
281 s++;
282 break;
283 case 'l':
284 c *= 4;
285 s++;
286 break;
287 case 'b':
288 c *= 512;
289 s++;
290 break;
291 case 'k':
292 c *= 1024;
293 s++;
294 break;
295 }
296 if (*s) {
297 diag("bad count/offset value");
298 exit(3);
299 }
300 return c;
301 }
302
303 static void
usage(void)304 usage(void)
305 {
306 fprintf(stderr, "usage: %s [-acbwlAxdo] [-t] [-s offset[*][wlbk]] "
307 "[-n count[*][wlbk]] [file] ...\n",
308 progname);
309 exit(2);
310 }
311
312 static void
diag(const char * fmt,...)313 diag(const char *fmt, ...)
314 {
315 va_list ap;
316
317 fprintf(stderr, "%s: ", progname);
318 va_start(ap, fmt);
319 vfprintf(stderr, fmt, ap);
320 va_end(ap);
321 fprintf(stderr, "\n");
322 status |= 1;
323 }
324
325 static void
hd(FILE * fp)326 hd(FILE *fp)
327 {
328 long long of = 0, rd = 0;
329 union block b, ob;
330 size_t n, m, on = 0;
331 int star = 0;
332
333 if (sflag)
334 while (of < sflag) {
335 getc(fp);
336 of++;
337 }
338 if (tflag) {
339 prnt(fp, of);
340 return;
341 }
342 do {
343 if (nflag == 0 || rd + sizeof b.b_c < nflag)
344 m = sizeof b.b_c;
345 else
346 m = nflag - rd;
347 if ((n = fread(b.b_c, 1, m, fp)) > 0) {
348 if (!vflag && n==on && memcmp(b.b_c, ob.b_c, n) == 0) {
349 if (star == 0)
350 printf("*\n");
351 star = 1;
352 } else {
353 star = 0;
354 prna(of);
355 if (n < sizeof b.b_c)
356 memset(&b.b_c[n], 0, sizeof b.b_c - n);
357 prnb(&b, n);
358 }
359 }
360 rd += n;
361 of += n;
362 on = n;
363 ob = b;
364 } while (n == m && (nflag == 0 || rd < nflag));
365 prna(of);
366 putchar('\n');
367 }
368
369 static void
prna(long long n)370 prna(long long n)
371 {
372 switch (aflag) {
373 case BASE_O:
374 printf("%06llo", n);
375 break;
376 case BASE_D:
377 printf("%05llu", n);
378 break;
379 case BASE_0:
380 case BASE_X:
381 printf("%04llx", n);
382 break;
383 }
384 }
385
386 static void
prnb(union block * bp,int n)387 prnb(union block *bp, int n)
388 {
389 int cnt = 0;
390
391 if (cflag&BASE_X)
392 line(bp, n, 'c', BASE_X, cnt++);
393 if (cflag&BASE_D)
394 line(bp, n, 'c', BASE_D, cnt++);
395 if (cflag&BASE_O)
396 line(bp, n, 'c', BASE_O, cnt++);
397 if (bflag&BASE_X)
398 line(bp, n, 'b', BASE_X, cnt++);
399 if (bflag&BASE_D)
400 line(bp, n, 'b', BASE_D, cnt++);
401 if (bflag&BASE_O)
402 line(bp, n, 'b', BASE_O, cnt++);
403 if (wflag&BASE_X)
404 line(bp, n, 'w', BASE_X, cnt++);
405 if (wflag&BASE_D)
406 line(bp, n, 'w', BASE_D, cnt++);
407 if (wflag&BASE_O)
408 line(bp, n, 'w', BASE_O, cnt++);
409 if (lflag&BASE_X)
410 line(bp, n, 'l', BASE_X, cnt++);
411 if (lflag&BASE_D)
412 line(bp, n, 'l', BASE_D, cnt++);
413 if (lflag&BASE_O)
414 line(bp, n, 'l', BASE_O, cnt++);
415 }
416
417 static void
line(union block * bp,int n,int fmt,enum base base,int cnt)418 line(union block *bp, int n, int fmt, enum base base, int cnt)
419 {
420 int c, i, j, k, col = 0;
421 const char *cp;
422 const struct fm *fmp;
423
424 putchar('\t');
425 i = 0;
426 switch (fmt) {
427 case 'l':
428 fmp = getfmt('l', base);
429 for (j = i/4; j < (n>>2); j++, i += 4) {
430 if (col > 0) {
431 putchar(' ');
432 col++;
433 }
434 if (i == 8) {
435 putchar(' ');
436 col++;
437 }
438 for (k = fmp->f_width; k < align[2]; k++) {
439 putchar(' ');
440 col++;
441 }
442 col += printf(fmp->f_prf,(long)(bp->b_l[j]&0xffffffff));
443 }
444 if (i == n)
445 break;
446 /*FALLTHRU*/
447 case 'w':
448 fmp = getfmt('w', base);
449 for (j = i/2; j < (n>>1); j++, i += 2) {
450 if (col > 0) {
451 putchar(' ');
452 col++;
453 }
454 if (i == 8) {
455 putchar(' ');
456 col++;
457 }
458 for (k = fmp->f_width; k < align[1]; k++) {
459 putchar(' ');
460 col++;
461 }
462 col += printf(fmp->f_prf, (int)(bp->b_w[j]&0177777));
463 }
464 if (i == n)
465 break;
466 /*FALLTHRU*/
467 case 'b':
468 fmp = getfmt('b', base);
469 for (j = i; j < n; j++, i++) {
470 if (col > 0) {
471 putchar(' ');
472 col++;
473 }
474 if (i == 8) {
475 putchar(' ');
476 col++;
477 }
478 for (k = fmp->f_width; k < align[0]; k++) {
479 putchar(' ');
480 col++;
481 }
482 col += printf(fmp->f_prf, bp->b_c[j]&0377);
483 }
484 break;
485 case 'c':
486 fmp = getfmt('c', base);
487 for (i = 0; i < n; i++) {
488 if (col > 0) {
489 putchar(' ');
490 col++;
491 }
492 if (i == 8) {
493 putchar(' ');
494 col++;
495 }
496 for (k = fmp->f_width; k < align[0]; k++) {
497 putchar(' ');
498 col++;
499 }
500 c = bp->b_c[i]&0377;
501 cp = NULL;
502 if (c == '\b')
503 cp = "\\b";
504 else if (c == '\t')
505 cp = "\\t";
506 else if (c == '\n')
507 cp = "\\n";
508 else if (c == '\f')
509 cp = "\\f";
510 else if (c == '\r')
511 cp = "\\r";
512 else if (!isprint(c)) {
513 col += printf(fmp->f_prf, c);
514 } else {
515 if (base != BASE_X) {
516 putchar(' ');
517 col++;
518 }
519 col += printf(" %c", c);
520 }
521 if (cp) {
522 if (base != BASE_X) {
523 putchar(' ');
524 col++;
525 }
526 printf(cp);
527 }
528 }
529 break;
530 }
531 if (cnt == 0 && Aflag) {
532 while (col++ < 51)
533 putchar(' ');
534 for (i = 0; i < n; i++) {
535 if ((bp->b_c[i]&0340) == 0 || bp->b_c[i] == 0177 ||
536 !isprint(bp->b_c[i]&0377))
537 putchar('.');
538 else
539 putchar(bp->b_c[i]&0377);
540 }
541 }
542 putchar('\n');
543 }
544
545 static const struct fm *
getfmt(int fmt,enum base base)546 getfmt(int fmt, enum base base)
547 {
548 int i;
549
550 for (i = 0; ofmt[i].f_fmt; i++)
551 if (ofmt[i].f_fmt == fmt && ofmt[i].f_base == base)
552 return &ofmt[i];
553 return NULL;
554 }
555
556 static void
getalign(void)557 getalign(void)
558 {
559 int i, j;
560 enum base *basep;
561
562 for (i = 0; ofmt[i].f_fmt; i++) {
563 switch (ofmt[i].f_fmt) {
564 case 'b':
565 basep = &bflag;
566 break;
567 case 'c':
568 basep = &cflag;
569 break;
570 case 'w':
571 basep = &wflag;
572 break;
573 case 'l':
574 basep = &lflag;
575 break;
576 default:
577 basep = NULL;
578 }
579 if (basep && *basep & ofmt[i].f_base)
580 for (j = 0; j < sizeof align; j++)
581 if (ofmt[i].f_align[j] > align[j])
582 align[j] = ofmt[i].f_align[j];
583 }
584 }
585
586 static void
prnt(FILE * fp,long long of)587 prnt(FILE *fp, long long of)
588 {
589 wint_t wc;
590 char b, *mb;
591 int c, lastc = '\n', n;
592 long long rd = 0;
593
594 while ((nflag == 0 || rd < nflag)) {
595 if (mb_cur_max > 1) {
596 if ((mb = wcget(fp, &wc, &n)) == NULL)
597 break;
598 } else {
599 if ((c = getc(fp)) == EOF)
600 break;
601 b = wc = c;
602 mb = &b;
603 n = 1;
604 }
605 if (lastc == '\n') {
606 prna(of);
607 putchar('\t');
608 }
609 of += n, rd += n;
610 if (n == 1) {
611 c = *mb&0377;
612 lastc = c;
613 if (wc != WEOF && isprint(c) && c != '\\' &&
614 c != '^' && c != '~')
615 putchar(c);
616 else
617 prnc(c);
618 if (lastc == '\n')
619 putchar('\n');
620 } else {
621 lastc = c = EOF;
622 if (wc != WEOF && iswprint(wc))
623 while (n--) {
624 putchar(*mb&0377);
625 mb++;
626 }
627 else
628 while (n--) {
629 prnc(*mb&0377);
630 mb++;
631 }
632 }
633 }
634 if (lastc != '\n')
635 putchar('\n');
636 prna(of);
637 putchar('\n');
638 }
639
640 static void
prnc(int c)641 prnc(int c)
642 {
643 if (c == 0177 || c == 0377) {
644 printf("\\%o", c);
645 return;
646 }
647 if (c & 0200) {
648 putchar('~');
649 c &= 0177;
650 }
651 if (c < 040) {
652 putchar('^');
653 c |= 0100;
654 }
655 if (c == '\\' || c == '~' || c == '^')
656 putchar('\\');
657 putchar(c);
658 }
659
660 static char *
wcget(FILE * fp,wint_t * wc,int * len)661 wcget(FILE *fp, wint_t *wc, int *len)
662 {
663 static char mbuf[MB_LEN_MAX+1];
664 static char *mcur, *mend;
665 static int incompl;
666 size_t rest;
667 int c, i, n;
668
669 i = 0;
670 rest = mend - mcur;
671 if (rest && mcur > mbuf) {
672 do
673 mbuf[i] = mcur[i];
674 while (i++, --rest);
675 } else if (incompl) {
676 incompl = 0;
677 *wc = WEOF;
678 mend = mcur = NULL;
679 return NULL;
680 }
681 if (i == 0) {
682 c = getc(fp);
683 if (c == EOF) {
684 *wc = WEOF;
685 mend = mcur = NULL;
686 return NULL;
687 }
688 mbuf[i++] = c;
689 }
690 if (mbuf[0] & 0200) {
691 while (mbuf[i-1] != '\n' && i < mb_cur_max &&
692 incompl == 0) {
693 c = getc(fp);
694 if (c != EOF)
695 mbuf[i++] = c;
696 else
697 incompl = 1;
698 }
699 n = mbtowi(wc, mbuf, i);
700 if (n < 0) {
701 *len = 1;
702 *wc = WEOF;
703 } else if (n == 0) {
704 *len = 1;
705 *wc = '\0';
706 } else
707 *len = n;
708 } else {
709 *wc = mbuf[0];
710 *len = n = 1;
711 }
712 mcur = &mbuf[*len];
713 mend = &mcur[i - *len];
714 return mbuf;
715 }
716