xref: /original-bsd/usr.bin/file/file.c (revision 08cd6844)
1 #ifndef lint
2 static	char sccsid[] = "@(#)file.c	4.19 (Berkeley) 04/10/91";
3 #endif
4 /*
5  * file - determine type of file
6  */
7 
8 #include <sys/param.h>
9 #include <sys/stat.h>
10 #include <string.h>
11 #include <stdio.h>
12 #include <ctype.h>
13 #include <a.out.h>
14 
15 extern int	errno;
16 
17 int in;
18 int i  = 0;
19 char buf[BUFSIZ];
20 char *troff[] = {	/* new troff intermediate lang */
21 	"x","T","res","init","font","202","V0","p1",0};
22 char *fort[] = {
23 	"function","subroutine","common","dimension","block","integer",
24 	"real","data","double",0};
25 char *asc[] = {
26 	"chmk","mov","tst","clr","jmp",0};
27 char *c[] = {
28 	"int","char","float","double","struct","extern",0};
29 char *as[] = {
30 	"globl","byte","align","text","data","comm",0};
31 char *sh[] = {
32 	"fi", "elif", "esac", "done", "export",
33 	"readonly", "trap", "PATH", "HOME", 0 };
34 char *csh[] = {
35 	"alias", "breaksw", "endsw", "foreach", "limit",  "onintr",
36 	"repeat", "setenv", "source", "path", "home", 0 };
37 int	ifile;
38 
39 main(argc, argv)
40 char **argv;
41 {
42 	FILE *fl;
43 	register char *p;
44 	char ap[MAXPATHLEN + 1];
45 
46 	if (argc < 2) {
47 		fprintf(stderr, "usage: %s file ...\n", argv[0]);
48 		exit(3);
49 	}
50 
51 	if (argc>1 && argv[1][0]=='-' && argv[1][1]=='f') {
52 		if ((fl = fopen(argv[2], "r")) == NULL) {
53 			perror(argv[2]);
54 			exit(2);
55 		}
56 		while ((p = fgets(ap, sizeof ap, fl)) != NULL) {
57 			int l = strlen(p);
58 			if (l>0)
59 				p[l-1] = '\0';
60 			type(p);
61 			if (ifile>=0)
62 				close(ifile);
63 		}
64 		exit(1);
65 	}
66 	while(argc > 1) {
67 		ifile = -1;
68 		type(argv[1]);
69 		fflush(stdout);
70 		argc--;
71 		argv++;
72 		if (ifile >= 0)
73 			close(ifile);
74 	}
75 	exit(0);
76 }
77 
78 type(file)
79 char *file;
80 {
81 	int j,nl;
82 	char ch;
83 	struct stat mbuf;
84 	char slink[MAXPATHLEN + 1];
85 
86 	if (lstat(file, &mbuf) < 0) {
87 		fprintf(stderr, "file: %s: %s\n", file, strerror(errno));
88 		return;
89 	}
90 	switch (mbuf.st_mode & S_IFMT) {
91 	case S_IFLNK:
92 		printf("%s:\tsymbolic link", file);
93 		j = readlink(file, slink, sizeof slink - 1);
94 		if (j >= 0) {
95 			slink[j] = '\0';
96 			printf(" to %s", slink);
97 		}
98 		printf("\n");
99 		return;
100 
101 	case S_IFDIR:
102 		printf("%s:\t", file);
103 		if (mbuf.st_mode & S_ISVTX)
104 			printf("append-only ");
105 		printf("directory\n");
106 		return;
107 
108 	case S_IFCHR:
109 	case S_IFBLK:
110 		printf("%s:\t%s special (%d/%d)\n", file,
111 		    (mbuf.st_mode&S_IFMT) == S_IFCHR ? "character" : "block",
112 		     major(mbuf.st_rdev), minor(mbuf.st_rdev));
113 		return;
114 
115 	case S_IFSOCK:
116 		printf("%s:\tsocket\n", file);
117 		return;
118 	}
119 
120 	ifile = open(file, 0);
121 	if(ifile < 0) {
122 		fprintf(stderr, "file: %s: %s\n", file, strerror(errno));
123 		return;
124 	}
125 	printf("%s:\t", file);
126 	in = read(ifile, buf, BUFSIZ);
127 	if(in == 0){
128 		printf("empty\n");
129 		return;
130 	}
131 	switch(*(int *)buf) {
132 
133 	case 0413:
134 		printf("demand paged ");
135 
136 	case 0410:
137 		printf("pure ");
138 		goto exec;
139 
140 	case 0411:
141 		printf("jfr or pdp-11 unix 411 executable\n");
142 		return;
143 
144 	case 0407:
145 exec:
146 		if (mbuf.st_mode & S_ISUID)
147 			printf("set-uid ");
148 		if (mbuf.st_mode & S_ISGID)
149 			printf("set-gid ");
150 		if (mbuf.st_mode & S_ISVTX)
151 			printf("sticky ");
152 		printf("executable");
153 		if(((int *)buf)[4] != 0) {
154 			printf(" not stripped");
155 			if(oldo(buf))
156 				printf(" (old format symbol table)");
157 		}
158 		printf("\n");
159 		return;
160 
161 	case 0177555:
162 		printf("very old archive\n");
163 		return;
164 
165 	case 0177545:
166 		printf("old archive\n");
167 		return;
168 
169 	case 070707:
170 		printf("cpio data\n");
171 		return;
172 	}
173 
174 	if (buf[0] == '#' && buf[1] == '!' && shellscript(buf+2, &mbuf))
175 		return;
176 	if (buf[0] == '\037' && buf[1] == '\235') {
177 		if (buf[2]&0x80)
178 			printf("block ");
179 		printf("compressed %d bit code data\n", buf[2]&0x1f);
180 		return;
181 	}
182 	if(strncmp(buf, "!<arch>\n__.SYMDEF", 17) == 0 ) {
183 		printf("archive random library\n");
184 		return;
185 	}
186 	if (strncmp(buf, "!<arch>\n", 8)==0) {
187 		printf("archive\n");
188 		return;
189 	}
190 	if (mbuf.st_size % 512 == 0) {	/* it may be a PRESS file */
191 		lseek(ifile, -512L, 2);	/* last block */
192 		if (read(ifile, buf, BUFSIZ) > 0 && *(short *)buf == 12138) {
193 			printf("PRESS file\n");
194 			return;
195 		}
196 	}
197 	i = 0;
198 	if(ccom() == 0)goto notc;
199 	while(buf[i] == '#'){
200 		j = i;
201 		while(buf[i++] != '\n'){
202 			if(i - j > 255){
203 				printf("data\n");
204 				return;
205 			}
206 			if(i >= in)goto notc;
207 		}
208 		if(ccom() == 0)goto notc;
209 	}
210 check:
211 	if(lookup(c) == 1){
212 		while((ch = buf[i++]) != ';' && ch != '{')if(i >= in)goto notc;
213 		printf("c program text");
214 		goto outa;
215 	}
216 	nl = 0;
217 	while(buf[i] != '('){
218 		if(buf[i] <= 0)
219 			goto notas;
220 		if(buf[i] == ';'){
221 			i++;
222 			goto check;
223 		}
224 		if(buf[i++] == '\n')
225 			if(nl++ > 6)goto notc;
226 		if(i >= in)goto notc;
227 	}
228 	while(buf[i] != ')'){
229 		if(buf[i++] == '\n')
230 			if(nl++ > 6)goto notc;
231 		if(i >= in)goto notc;
232 	}
233 	while(buf[i] != '{'){
234 		if(buf[i++] == '\n')
235 			if(nl++ > 6)goto notc;
236 		if(i >= in)goto notc;
237 	}
238 	printf("c program text");
239 	goto outa;
240 notc:
241 	i = 0;
242 	while(buf[i] == 'c' || buf[i] == '#'){
243 		while(buf[i++] != '\n')if(i >= in)goto notfort;
244 	}
245 	if(lookup(fort) == 1){
246 		printf("fortran program text");
247 		goto outa;
248 	}
249 notfort:
250 	i=0;
251 	if(ascom() == 0)goto notas;
252 	j = i-1;
253 	if(buf[i] == '.'){
254 		i++;
255 		if(lookup(as) == 1){
256 			printf("assembler program text");
257 			goto outa;
258 		}
259 		else if(buf[j] == '\n' && isalpha(buf[j+2])){
260 			printf("roff, nroff, or eqn input text");
261 			goto outa;
262 		}
263 	}
264 	while(lookup(asc) == 0){
265 		if(ascom() == 0)goto notas;
266 		while(buf[i] != '\n' && buf[i++] != ':')
267 			if(i >= in)goto notas;
268 		while(buf[i] == '\n' || buf[i] == ' ' || buf[i] == '\t')if(i++ >= in)goto notas;
269 		j = i-1;
270 		if(buf[i] == '.'){
271 			i++;
272 			if(lookup(as) == 1){
273 				printf("assembler program text");
274 				goto outa;
275 			}
276 			else if(buf[j] == '\n' && isalpha(buf[j+2])){
277 				printf("roff, nroff, or eqn input text");
278 				goto outa;
279 			}
280 		}
281 	}
282 	printf("assembler program text");
283 	goto outa;
284 notas:
285 	for(i=0; i < in; i++)if(buf[i]&0200){
286 		if (buf[0]=='\100' && buf[1]=='\357')
287 			printf("troff (CAT) output\n");
288 		else
289 			printf("data\n");
290 		return;
291 	}
292 	if (mbuf.st_mode&((S_IEXEC)|(S_IEXEC>>3)|(S_IEXEC>>6))) {
293 		if (mbuf.st_mode & S_ISUID)
294 			printf("set-uid ");
295 		if (mbuf.st_mode & S_ISGID)
296 			printf("set-gid ");
297 		if (mbuf.st_mode & S_ISVTX)
298 			printf("sticky ");
299 		if (shell(buf, in, sh))
300 			printf("shell script");
301 		else if (shell(buf, in, csh))
302 			printf("c-shell script");
303 		else
304 			printf("commands text");
305 	} else if (troffint(buf, in))
306 		printf("troff intermediate output text");
307 	else if (shell(buf, in, sh))
308 		printf("shell commands");
309 	else if (shell(buf, in, csh))
310 		printf("c-shell commands");
311 	else if (english(buf, in))
312 		printf("English text");
313 	else
314 		printf("ascii text");
315 outa:
316 	while(i < in)
317 		if((buf[i++]&0377) > 127){
318 			printf(" with garbage\n");
319 			return;
320 		}
321 	/* if next few lines in then read whole file looking for nulls ...
322 		while((in = read(ifile,buf,BUFSIZ)) > 0)
323 			for(i = 0; i < in; i++)
324 				if((buf[i]&0377) > 127){
325 					printf(" with garbage\n");
326 					return;
327 				}
328 		/*.... */
329 	printf("\n");
330 }
331 
332 oldo(cp)
333 char *cp;
334 {
335 	struct exec ex;
336 	struct stat stb;
337 
338 	ex = *(struct exec *)cp;
339 	if (fstat(ifile, &stb) < 0)
340 		return(0);
341 	if (N_STROFF(ex)+sizeof(off_t) > stb.st_size)
342 		return (1);
343 	return (0);
344 }
345 
346 
347 
348 troffint(bp, n)
349 char *bp;
350 int n;
351 {
352 	int k;
353 
354 	i = 0;
355 	for (k = 0; k < 6; k++) {
356 		if (lookup(troff) == 0)
357 			return(0);
358 		if (lookup(troff) == 0)
359 			return(0);
360 		while (i < n && buf[i] != '\n')
361 			i++;
362 		if (i++ >= n)
363 			return(0);
364 	}
365 	return(1);
366 }
367 lookup(tab)
368 char *tab[];
369 {
370 	char r;
371 	int k,j,l;
372 	while(buf[i] == ' ' || buf[i] == '\t' || buf[i] == '\n')i++;
373 	for(j=0; tab[j] != 0; j++){
374 		l=0;
375 		for(k=i; ((r=tab[j][l++]) == buf[k] && r != '\0');k++);
376 		if(r == '\0')
377 			if(buf[k] == ' ' || buf[k] == '\n' || buf[k] == '\t'
378 			    || buf[k] == '{' || buf[k] == '/'){
379 				i=k;
380 				return(1);
381 			}
382 	}
383 	return(0);
384 }
385 ccom(){
386 	char cc;
387 	while((cc = buf[i]) == ' ' || cc == '\t' || cc == '\n')if(i++ >= in)return(0);
388 	if(buf[i] == '/' && buf[i+1] == '*'){
389 		i += 2;
390 		while(buf[i] != '*' || buf[i+1] != '/'){
391 			if(buf[i] == '\\')i += 2;
392 			else i++;
393 			if(i >= in)return(0);
394 		}
395 		if((i += 2) >= in)return(0);
396 	}
397 	if(buf[i] == '\n')if(ccom() == 0)return(0);
398 	return(1);
399 }
400 ascom(){
401 	while(buf[i] == '/'){
402 		i++;
403 		while(buf[i++] != '\n')if(i >= in)return(0);
404 		while(buf[i] == '\n')if(i++ >= in)return(0);
405 	}
406 	return(1);
407 }
408 
409 english (bp, n)
410 char *bp;
411 {
412 # define NASC 128
413 	int ct[NASC], j, vow, freq, rare;
414 	int badpun = 0, punct = 0;
415 	if (n<50) return(0); /* no point in statistics on squibs */
416 	for(j=0; j<NASC; j++)
417 		ct[j]=0;
418 	for(j=0; j<n; j++)
419 	{
420 		if ((u_char)bp[j]<NASC)
421 			ct[bp[j]|040]++;
422 		switch (bp[j])
423 		{
424 		case '.':
425 		case ',':
426 		case ')':
427 		case '%':
428 		case ';':
429 		case ':':
430 		case '?':
431 			punct++;
432 			if ( j < n-1 &&
433 			    bp[j+1] != ' ' &&
434 			    bp[j+1] != '\n')
435 				badpun++;
436 		}
437 	}
438 	if (badpun*5 > punct)
439 		return(0);
440 	vow = ct['a'] + ct['e'] + ct['i'] + ct['o'] + ct['u'];
441 	freq = ct['e'] + ct['t'] + ct['a'] + ct['i'] + ct['o'] + ct['n'];
442 	rare = ct['v'] + ct['j'] + ct['k'] + ct['q'] + ct['x'] + ct['z'];
443 	if (2*ct[';'] > ct['e']) return(0);
444 	if ( (ct['>']+ct['<']+ct['/'])>ct['e']) return(0); /* shell file test */
445 	return (vow*5 >= n-ct[' '] && freq >= 10*rare);
446 }
447 
448 shellscript(buf, sb)
449 	char buf[];
450 	struct stat *sb;
451 {
452 	register char *tp;
453 	char *cp, *xp, *index();
454 
455 	cp = index(buf, '\n');
456 	if (cp == 0 || cp - buf > in)
457 		return (0);
458 	for (tp = buf; tp != cp && isspace(*tp); tp++)
459 		if (!isascii(*tp))
460 			return (0);
461 	for (xp = tp; tp != cp && !isspace(*tp); tp++)
462 		if (!isascii(*tp))
463 			return (0);
464 	if (tp == xp)
465 		return (0);
466 	if (sb->st_mode & S_ISUID)
467 		printf("set-uid ");
468 	if (sb->st_mode & S_ISGID)
469 		printf("set-gid ");
470 	if (strncmp(xp, "/bin/sh", tp-xp) == 0)
471 		xp = "shell";
472 	else if (strncmp(xp, "/bin/csh", tp-xp) == 0)
473 		xp = "c-shell";
474 	else
475 		*tp = '\0';
476 	printf("executable %s script\n", xp);
477 	return (1);
478 }
479 
480 shell(bp, n, tab)
481 	char *bp;
482 	int n;
483 	char *tab[];
484 {
485 
486 	i = 0;
487 	do {
488 		if (buf[i] == '#' || buf[i] == ':')
489 			while (i < n && buf[i] != '\n')
490 				i++;
491 		if (++i >= n)
492 			break;
493 		if (lookup(tab) == 1)
494 			return (1);
495 	} while (i < n);
496 	return (0);
497 }
498